mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-06 23:52:38 -05:00
Compare commits
61 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
718c7e0fc1 | ||
|
|
ef7951b91d | ||
|
|
214b1f021b | ||
|
|
81b18c0741 | ||
|
|
8e83f72456 | ||
|
|
c2ae625032 | ||
|
|
60566dc84c | ||
|
|
4ea8fe694d | ||
|
|
669d1ab008 | ||
|
|
e87d5111a2 | ||
|
|
56158779de | ||
|
|
efb322df66 | ||
|
|
e673d541c1 | ||
|
|
a98db529fb | ||
|
|
1a9eaaad8f | ||
|
|
be5fbca9b6 | ||
|
|
2feb264375 | ||
|
|
00990f2649 | ||
|
|
d78713be48 | ||
|
|
77367abe13 | ||
|
|
cd046f6fd0 | ||
|
|
63b7ec2887 | ||
|
|
a1e8f1c3e5 | ||
|
|
6db480a1ab | ||
|
|
a7acd9de6d | ||
|
|
a64f4d1870 | ||
|
|
9bfe59b630 | ||
|
|
1a4bf16521 | ||
|
|
a4d5d0a75a | ||
|
|
c40f1f9f66 | ||
|
|
15e5052dd0 | ||
|
|
f9f7963453 | ||
|
|
02240eff8c | ||
|
|
d64c963e5e | ||
|
|
c6292ce9ee | ||
|
|
cd652e2644 | ||
|
|
6f5ef0fe0f | ||
|
|
01fa968763 | ||
|
|
8e6f3ad99e | ||
|
|
b1f2479ec1 | ||
|
|
ecceba8ecd | ||
|
|
cb5f4a9c17 | ||
|
|
3a8ebfdcb6 | ||
|
|
0dcafeb571 | ||
|
|
a1d7ee4c6b | ||
|
|
03e70ba2ed | ||
|
|
19ef8c5d65 | ||
|
|
63baa9e263 | ||
|
|
1248bd5859 | ||
|
|
5c951ddb8a | ||
|
|
4ab0ad928d | ||
|
|
e67eb47796 | ||
|
|
a6656a8c49 | ||
|
|
a903d0a5bd | ||
|
|
ceaebfd877 | ||
|
|
5156ff11a8 | ||
|
|
e0143a92a8 | ||
|
|
f682d9a2e9 | ||
|
|
3bece0ff40 | ||
|
|
e6c456a970 | ||
|
|
923d1d712f |
27
CHANGELOG.md
27
CHANGELOG.md
@@ -1,3 +1,30 @@
|
||||
# v6.16.2
|
||||
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
|
||||
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
|
||||
- Fixed several performance bottlenecks introduced in v6.16.1.
|
||||
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
|
||||
- **Linux builds are up to 10% faster now compared to v6.16.0 GCC build.**
|
||||
- **Windows builds are up to 5% faster now compared to v6.16.0 MSVC build.**
|
||||
|
||||
# v6.16.1
|
||||
- [#2729](https://github.com/xmrig/xmrig/pull/2729) GhostRider fixes:
|
||||
- Added average hashrate display.
|
||||
- Fixed the number of threads shown at startup.
|
||||
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use).
|
||||
- [#2738](https://github.com/xmrig/xmrig/pull/2738) GhostRider fixes:
|
||||
- Fixed "difficulty is not a number" error when diff is high on some pools.
|
||||
- Fixed GhostRider compilation when `WITH_KAWPOW=OFF`.
|
||||
- [#2740](https://github.com/xmrig/xmrig/pull/2740) Added VAES support for Cryptonight variants **+4% speedup on Zen3**.
|
||||
- VAES instructions are available on Intel Ice Lake/AMD Zen3 and newer CPUs.
|
||||
- +4% speedup on Ryzen 5 5600X.
|
||||
|
||||
# v6.16.0
|
||||
- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
|
||||
- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.
|
||||
- [#2684](https://github.com/xmrig/xmrig/pull/2684) MSR mod: fix for error 183.
|
||||
|
||||
# v6.15.3
|
||||
- [#2614](https://github.com/xmrig/xmrig/pull/2614) OpenCL fixes for non-AMD platforms.
|
||||
- [#2623](https://github.com/xmrig/xmrig/pull/2623) Fixed compiling without kawpow.
|
||||
|
||||
@@ -10,6 +10,7 @@ option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
|
||||
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
|
||||
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
|
||||
option(WITH_KAWPOW "Enable KawPow algorithms family" ON)
|
||||
option(WITH_GHOSTRIDER "Enable GhostRider algorithm" ON)
|
||||
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
|
||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||
@@ -27,6 +28,7 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
|
||||
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
||||
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
||||
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
|
||||
@@ -128,6 +130,19 @@ set(SOURCES_CRYPTO
|
||||
src/crypto/common/VirtualMemory.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
|
||||
endif()
|
||||
|
||||
if (WITH_VAES)
|
||||
add_definitions(-DXMRIG_VAES)
|
||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.h)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/cn/CryptoNight_x86_vaes.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize -mavx2 -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_HWLOC)
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/common/NUMAMemoryPool.h
|
||||
@@ -186,6 +201,7 @@ include(cmake/randomx.cmake)
|
||||
include(cmake/argon2.cmake)
|
||||
include(cmake/astrobwt.cmake)
|
||||
include(cmake/kawpow.cmake)
|
||||
include(cmake/ghostrider.cmake)
|
||||
include(cmake/OpenSSL.cmake)
|
||||
include(cmake/asm.cmake)
|
||||
|
||||
@@ -221,7 +237,7 @@ if (WITH_DEBUG_LOG)
|
||||
endif()
|
||||
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY})
|
||||
|
||||
if (WIN32)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
@@ -229,6 +245,7 @@ if (WIN32)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_10M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/pool_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/rtm_ghostrider_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode)
|
||||
|
||||
@@ -9,10 +9,23 @@ if (NOT CMAKE_SYSTEM_PROCESSOR)
|
||||
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
||||
endif()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(VAES_SUPPORTED ON)
|
||||
else()
|
||||
CHECK_CXX_COMPILER_FLAG("-mavx2 -mvaes" VAES_SUPPORTED)
|
||||
endif()
|
||||
|
||||
if (NOT VAES_SUPPORTED)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||
add_definitions(-DRAPIDJSON_SSE2)
|
||||
else()
|
||||
set(WITH_SSE4_1 OFF)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (NOT ARM_TARGET)
|
||||
@@ -29,8 +42,6 @@ if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||
|
||||
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (ARM_TARGET EQUAL 8)
|
||||
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)
|
||||
|
||||
|
||||
8
cmake/ghostrider.cmake
Normal file
8
cmake/ghostrider.cmake
Normal file
@@ -0,0 +1,8 @@
|
||||
if (WITH_GHOSTRIDER)
|
||||
add_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
add_subdirectory(src/crypto/ghostrider)
|
||||
set(GHOSTRIDER_LIBRARY ghostrider)
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
set(GHOSTRIDER_LIBRARY "")
|
||||
endif()
|
||||
20
scripts/rtm_ghostrider_example.cmd
Normal file
20
scripts/rtm_ghostrider_example.cmd
Normal file
@@ -0,0 +1,20 @@
|
||||
:: Example batch file for mining Raptoreum at a pool
|
||||
::
|
||||
:: Format:
|
||||
:: xmrig.exe -a gr -o <pool address>:<pool port> -u <pool username/wallet> -p <pool password>
|
||||
::
|
||||
:: Fields:
|
||||
:: pool address The host name of the pool stratum or its IP address, for example raptoreumemporium.com
|
||||
:: pool port The port of the pool's stratum to connect to, for example 3333. Check your pool's getting started page.
|
||||
:: pool username/wallet For most pools, this is the wallet address you want to mine to. Some pools require a username
|
||||
:: pool password For most pools this can be just 'x'. For pools using usernames, you may need to provide a password as configured on the pool.
|
||||
::
|
||||
:: List of Raptoreum mining pools:
|
||||
:: https://miningpoolstats.stream/raptoreum
|
||||
::
|
||||
:: Choose pools outside of top 5 to help Raptoreum network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS -p x
|
||||
pause
|
||||
@@ -53,6 +53,9 @@ xmrig::Hashrate::Hashrate(size_t threads) :
|
||||
m_timestamps[i] = new uint64_t[kBucketSize]();
|
||||
m_top[i] = 0;
|
||||
}
|
||||
|
||||
m_earliestTimestamp = std::numeric_limits<uint64_t>::max();
|
||||
m_totalCount = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +69,14 @@ xmrig::Hashrate::~Hashrate()
|
||||
delete [] m_counts;
|
||||
delete [] m_timestamps;
|
||||
delete [] m_top;
|
||||
|
||||
}
|
||||
|
||||
|
||||
double xmrig::Hashrate::average() const
|
||||
{
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
return (ts > m_earliestTimestamp) ? (m_totalCount * 1e3 / (ts - m_earliestTimestamp)) : 0.0;
|
||||
}
|
||||
|
||||
|
||||
@@ -167,4 +178,11 @@ void xmrig::Hashrate::addData(size_t index, uint64_t count, uint64_t timestamp)
|
||||
m_timestamps[index][top] = timestamp;
|
||||
|
||||
m_top[index] = (top + 1) & kBucketMask;
|
||||
|
||||
if (index == 0) {
|
||||
if (m_earliestTimestamp == std::numeric_limits<uint64_t>::max()) {
|
||||
m_earliestTimestamp = timestamp;
|
||||
}
|
||||
m_totalCount = count;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,8 @@ public:
|
||||
inline void add(size_t threadId, uint64_t count, uint64_t timestamp) { addData(threadId + 1U, count, timestamp); }
|
||||
inline void add(uint64_t count, uint64_t timestamp) { addData(0U, count, timestamp); }
|
||||
|
||||
double average() const;
|
||||
|
||||
static const char *format(double h, char *buf, size_t size);
|
||||
static rapidjson::Value normalize(double d);
|
||||
|
||||
@@ -72,6 +74,9 @@ private:
|
||||
uint32_t* m_top;
|
||||
uint64_t** m_counts;
|
||||
uint64_t** m_timestamps;
|
||||
|
||||
uint64_t m_earliestTimestamp;
|
||||
uint64_t m_totalCount;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ class Worker : public IWorker
|
||||
public:
|
||||
Worker(size_t id, int64_t affinity, int priority);
|
||||
|
||||
size_t threads() const override { return 1; }
|
||||
|
||||
protected:
|
||||
inline int64_t affinity() const { return m_affinity; }
|
||||
inline size_t id() const override { return m_id; }
|
||||
|
||||
@@ -239,6 +239,9 @@ xmrig::IWorker *xmrig::Workers<CpuLaunchData>::create(Thread<CpuLaunchData> *han
|
||||
|
||||
case 5:
|
||||
return new CpuWorker<5>(handle->id(), handle->config());
|
||||
|
||||
case 8:
|
||||
return new CpuWorker<8>(handle->id(), handle->config());
|
||||
}
|
||||
|
||||
return nullptr;
|
||||
|
||||
@@ -46,6 +46,7 @@ public:
|
||||
virtual const VirtualMemory *memory() const = 0;
|
||||
virtual size_t id() const = 0;
|
||||
virtual size_t intensity() const = 0;
|
||||
virtual size_t threads() const = 0;
|
||||
virtual void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const = 0;
|
||||
virtual void jobEarlyNotification(const Job &job) = 0;
|
||||
virtual void start() = 0;
|
||||
|
||||
@@ -88,6 +88,7 @@ public:
|
||||
{
|
||||
if (ready) {
|
||||
m_started++;
|
||||
m_totalStarted += worker->threads();
|
||||
|
||||
if (m_workersMemory.insert(worker->memory()).second) {
|
||||
m_hugePages += worker->memory()->hugePages();
|
||||
@@ -112,7 +113,7 @@ public:
|
||||
LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%1.0f%% %zu/%zu" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"),
|
||||
Tags::cpu(),
|
||||
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
|
||||
m_started, m_threads, m_ways,
|
||||
m_totalStarted, std::max(m_totalStarted, m_threads), m_ways,
|
||||
(m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
|
||||
m_hugePages.percent(),
|
||||
m_hugePages.allocated, m_hugePages.total,
|
||||
@@ -127,6 +128,7 @@ private:
|
||||
size_t m_errors = 0;
|
||||
size_t m_memory = 0;
|
||||
size_t m_started = 0;
|
||||
size_t m_totalStarted = 0;
|
||||
size_t m_threads = 0;
|
||||
size_t m_ways = 0;
|
||||
uint64_t m_ts = 0;
|
||||
|
||||
@@ -122,8 +122,15 @@ std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, cons
|
||||
const size_t count = threads.count();
|
||||
out.reserve(count);
|
||||
|
||||
std::vector<int64_t> affinities;
|
||||
affinities.reserve(count);
|
||||
|
||||
for (const auto& thread : threads.data()) {
|
||||
affinities.emplace_back(thread.affinity());
|
||||
}
|
||||
|
||||
for (const auto &thread : threads.data()) {
|
||||
out.emplace_back(miner, algorithm, *this, thread, count);
|
||||
out.emplace_back(miner, algorithm, *this, thread, count, affinities);
|
||||
}
|
||||
|
||||
return out;
|
||||
@@ -200,6 +207,7 @@ void xmrig::CpuConfig::generate()
|
||||
count += xmrig::generate<Algorithm::RANDOM_X>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ARGON2>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::ASTROBWT>(m_threads, m_limit);
|
||||
count += xmrig::generate<Algorithm::GHOSTRIDER>(m_threads, m_limit);
|
||||
|
||||
m_shouldSave |= count > 0;
|
||||
}
|
||||
|
||||
@@ -161,6 +161,15 @@ size_t inline generate<Algorithm::ASTROBWT>(Threads<CpuThreads>& threads, uint32
|
||||
}
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
template<>
|
||||
size_t inline generate<Algorithm::GHOSTRIDER>(Threads<CpuThreads>& threads, uint32_t limit)
|
||||
{
|
||||
return generate(Algorithm::kGHOSTRIDER, threads, Algorithm::GHOSTRIDER_RTM, limit);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
|
||||
@@ -32,7 +32,7 @@
|
||||
#include <algorithm>
|
||||
|
||||
|
||||
xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads) :
|
||||
xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities) :
|
||||
algorithm(algorithm),
|
||||
assembly(config.assembly()),
|
||||
astrobwtAVX2(config.astrobwtAVX2()),
|
||||
@@ -44,7 +44,8 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
|
||||
affinity(thread.affinity()),
|
||||
miner(miner),
|
||||
threads(threads),
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()))
|
||||
intensity(std::max<uint32_t>(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()), algorithm.minIntensity())),
|
||||
affinities(affinities)
|
||||
{
|
||||
}
|
||||
|
||||
|
||||
@@ -44,7 +44,7 @@ class Miner;
|
||||
class CpuLaunchData
|
||||
{
|
||||
public:
|
||||
CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads);
|
||||
CpuLaunchData(const Miner *miner, const Algorithm &algorithm, const CpuConfig &config, const CpuThread &thread, size_t threads, const std::vector<int64_t>& affinities);
|
||||
|
||||
bool isEqual(const CpuLaunchData &other) const;
|
||||
CnHash::AlgoVariant av() const;
|
||||
@@ -68,6 +68,7 @@ public:
|
||||
const Miner *miner;
|
||||
const size_t threads;
|
||||
const uint32_t intensity;
|
||||
const std::vector<int64_t> affinities;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ public:
|
||||
CpuThread(const rapidjson::Value &value);
|
||||
|
||||
inline bool isEqual(const CpuThread &other) const { return other.m_affinity == m_affinity && other.m_intensity == m_intensity; }
|
||||
inline bool isValid() const { return m_intensity <= 5; }
|
||||
inline bool isValid() const { return m_intensity <= 8; }
|
||||
inline int64_t affinity() const { return m_affinity; }
|
||||
inline uint32_t intensity() const { return m_intensity == 0 ? 1 : m_intensity; }
|
||||
|
||||
|
||||
@@ -34,6 +34,7 @@
|
||||
#include "crypto/rx/Rx.h"
|
||||
#include "crypto/rx/RxDataset.h"
|
||||
#include "crypto/rx/RxVm.h"
|
||||
#include "crypto/ghostrider/ghostrider.h"
|
||||
#include "net/JobResults.h"
|
||||
|
||||
|
||||
@@ -82,7 +83,8 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
||||
{
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
|
||||
const bool is_vermeer = (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21);
|
||||
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (m_assembly != Assembly::NONE) && is_vermeer) {
|
||||
std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
|
||||
if (!cn_heavyZen3Memory) {
|
||||
// Round up number of threads to the multiple of 8
|
||||
@@ -96,6 +98,10 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
|
||||
{
|
||||
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
m_ghHelper = ghostrider::create_helper_thread(affinity(), data.affinities);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
@@ -114,6 +120,10 @@ xmrig::CpuWorker<N>::~CpuWorker()
|
||||
{
|
||||
delete m_memory;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ghostrider::destroy_helper_thread(m_ghHelper);
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
@@ -153,6 +163,12 @@ bool xmrig::CpuWorker<N>::selfTest()
|
||||
|
||||
allocateCnCtx();
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return (N == 8) && verify(Algorithm::GHOSTRIDER_RTM, test_output_gr);
|
||||
}
|
||||
# endif
|
||||
|
||||
if (m_algorithm.family() == Algorithm::CN) {
|
||||
const bool rc = verify(Algorithm::CN_0, test_output_v0) &&
|
||||
verify(Algorithm::CN_1, test_output_v1) &&
|
||||
@@ -299,16 +315,30 @@ void xmrig::CpuWorker<N>::start()
|
||||
else
|
||||
# endif
|
||||
{
|
||||
switch (job.algorithm().family()) {
|
||||
|
||||
# ifdef XMRIG_ALGO_ASTROBWT
|
||||
if (job.algorithm().family() == Algorithm::ASTROBWT) {
|
||||
case Algorithm::ASTROBWT:
|
||||
if (!astrobwt::astrobwt_dero(m_job.blob(), job.size(), m_ctx[0]->memory, m_hash, m_astrobwtMaxSize, m_astrobwtAVX2)) {
|
||||
valid = false;
|
||||
}
|
||||
}
|
||||
else
|
||||
break;
|
||||
# endif
|
||||
{
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
case Algorithm::GHOSTRIDER:
|
||||
if (N == 8) {
|
||||
ghostrider::hash_octa(m_job.blob(), job.size(), m_hash, m_ctx, m_ghHelper);
|
||||
}
|
||||
else {
|
||||
valid = false;
|
||||
}
|
||||
break;
|
||||
# endif
|
||||
|
||||
default:
|
||||
fn(job.algorithm())(m_job.blob(), job.size(), m_hash, m_ctx, job.height());
|
||||
break;
|
||||
}
|
||||
|
||||
if (!nextRound()) {
|
||||
@@ -367,6 +397,37 @@ bool xmrig::CpuWorker<N>::nextRound()
|
||||
template<size_t N>
|
||||
bool xmrig::CpuWorker<N>::verify(const Algorithm &algorithm, const uint8_t *referenceValue)
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
uint8_t blob[N * 80] = {};
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
blob[i * 80 + 0] = static_cast<uint8_t>(i);
|
||||
blob[i * 80 + 4] = 0x10;
|
||||
blob[i * 80 + 5] = 0x02;
|
||||
}
|
||||
|
||||
uint8_t hash1[N * 32] = {};
|
||||
ghostrider::hash_octa(blob, 80, hash1, m_ctx, 0, false);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
blob[i * 80 + 0] = static_cast<uint8_t>(i);
|
||||
blob[i * 80 + 4] = 0x43;
|
||||
blob[i * 80 + 5] = 0x05;
|
||||
}
|
||||
|
||||
uint8_t hash2[N * 32] = {};
|
||||
ghostrider::hash_octa(blob, 80, hash2, m_ctx, 0, false);
|
||||
|
||||
for (size_t i = 0; i < N * 32; ++i) {
|
||||
if ((hash1[i] ^ hash2[i]) != referenceValue[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_hash_fun func = fn(algorithm);
|
||||
if (!func) {
|
||||
return false;
|
||||
@@ -483,6 +544,7 @@ template class CpuWorker<2>;
|
||||
template class CpuWorker<3>;
|
||||
template class CpuWorker<4>;
|
||||
template class CpuWorker<5>;
|
||||
template class CpuWorker<8>;
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
@@ -38,6 +38,11 @@ namespace xmrig {
|
||||
class RxVm;
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
namespace ghostrider { struct HelperThread; }
|
||||
#endif
|
||||
|
||||
|
||||
template<size_t N>
|
||||
class CpuWorker : public Worker
|
||||
{
|
||||
@@ -47,6 +52,15 @@ public:
|
||||
CpuWorker(size_t id, const CpuLaunchData &data);
|
||||
~CpuWorker() override;
|
||||
|
||||
size_t threads() const override
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
return ((m_algorithm.family() == Algorithm::GHOSTRIDER) && m_ghHelper) ? 2 : 1;
|
||||
# else
|
||||
return 1;
|
||||
# endif
|
||||
}
|
||||
|
||||
protected:
|
||||
bool selfTest() override;
|
||||
void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override;
|
||||
@@ -87,6 +101,10 @@ private:
|
||||
randomx_vm *m_vm = nullptr;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ghostrider::HelperThread* m_ghHelper = nullptr;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
uint32_t m_benchSize = 0;
|
||||
# endif
|
||||
@@ -102,6 +120,7 @@ extern template class CpuWorker<2>;
|
||||
extern template class CpuWorker<3>;
|
||||
extern template class CpuWorker<4>;
|
||||
extern template class CpuWorker<5>;
|
||||
extern template class CpuWorker<8>;
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -61,6 +61,7 @@ public:
|
||||
|
||||
enum Flag : uint32_t {
|
||||
FLAG_AES,
|
||||
FLAG_VAES,
|
||||
FLAG_AVX,
|
||||
FLAG_AVX2,
|
||||
FLAG_AVX512F,
|
||||
@@ -90,6 +91,7 @@ public:
|
||||
virtual Assembly::Id assembly() const = 0;
|
||||
virtual bool has(Flag feature) const = 0;
|
||||
virtual bool hasAES() const = 0;
|
||||
virtual bool hasVAES() const = 0;
|
||||
virtual bool hasAVX() const = 0;
|
||||
virtual bool hasAVX2() const = 0;
|
||||
virtual bool hasBMI2() const = 0;
|
||||
@@ -111,6 +113,7 @@ public:
|
||||
virtual size_t packages() const = 0;
|
||||
virtual size_t threads() const = 0;
|
||||
virtual Vendor vendor() const = 0;
|
||||
virtual uint32_t model() const = 0;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -30,6 +30,12 @@
|
||||
#endif
|
||||
|
||||
|
||||
#include "crypto/cn/CryptoNight_monero.h"
|
||||
#ifdef XMRIG_VAES
|
||||
# include "crypto/cn/CryptoNight_x86_vaes.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "backend/cpu/platform/BasicCpuInfo.h"
|
||||
#include "3rdparty/rapidjson/document.h"
|
||||
#include "crypto/common/Assembly.h"
|
||||
@@ -52,8 +58,8 @@
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
constexpr size_t kCpuFlagsSize = 14;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
|
||||
constexpr size_t kCpuFlagsSize = 15;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
|
||||
static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch");
|
||||
|
||||
|
||||
@@ -140,6 +146,7 @@ static inline bool has_osxsave() { return has_feature(PROCESSOR_INFO,
|
||||
static inline bool has_aes_ni() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 25); }
|
||||
static inline bool has_avx() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 28) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_avx2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 5) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_vaes() { return has_feature(EXTENDED_FEATURES, ECX_Reg, 1 << 9) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_avx512f() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 16) && has_osxsave() && has_xcr_avx512(); }
|
||||
static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 8); }
|
||||
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
|
||||
@@ -178,6 +185,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_flags.set(FLAG_AES, has_aes_ni());
|
||||
m_flags.set(FLAG_AVX, has_avx());
|
||||
m_flags.set(FLAG_AVX2, has_avx2());
|
||||
m_flags.set(FLAG_VAES, has_vaes());
|
||||
m_flags.set(FLAG_AVX512F, has_avx512f());
|
||||
m_flags.set(FLAG_BMI2, has_bmi2());
|
||||
m_flags.set(FLAG_OSXSAVE, has_osxsave());
|
||||
@@ -292,6 +300,9 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_sse41_enabled = has(FLAG_SSE41);
|
||||
cn_vaes_enabled = has(FLAG_VAES);
|
||||
}
|
||||
|
||||
|
||||
@@ -361,6 +372,12 @@ xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint3
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (f == Algorithm::GHOSTRIDER) {
|
||||
return CpuThreads(std::max<size_t>(count / 2, 1), 8);
|
||||
}
|
||||
# endif
|
||||
|
||||
return CpuThreads(std::max<size_t>(count / 2, 1), 1);
|
||||
}
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ protected:
|
||||
inline Assembly::Id assembly() const override { return m_assembly; }
|
||||
inline bool has(Flag flag) const override { return m_flags.test(flag); }
|
||||
inline bool hasAES() const override { return has(FLAG_AES); }
|
||||
inline bool hasVAES() const override { return has(FLAG_VAES); }
|
||||
inline bool hasAVX() const override { return has(FLAG_AVX); }
|
||||
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
|
||||
inline bool hasBMI2() const override { return has(FLAG_BMI2); }
|
||||
@@ -62,6 +63,14 @@ protected:
|
||||
inline size_t packages() const override { return 1; }
|
||||
inline size_t threads() const override { return m_threads; }
|
||||
inline Vendor vendor() const override { return m_vendor; }
|
||||
inline uint32_t model() const override
|
||||
{
|
||||
# ifndef XMRIG_ARM
|
||||
return m_model;
|
||||
# else
|
||||
return 0;
|
||||
# endif
|
||||
}
|
||||
|
||||
protected:
|
||||
Arch m_arch = ARCH_UNKNOWN;
|
||||
|
||||
@@ -99,8 +99,14 @@ const char *xmrig::BasicCpuInfo::backend() const
|
||||
}
|
||||
|
||||
|
||||
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &, uint32_t) const
|
||||
xmrig::CpuThreads xmrig::BasicCpuInfo::threads(const Algorithm &algorithm, uint32_t) const
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return CpuThreads(threads(), 8);
|
||||
}
|
||||
# endif
|
||||
|
||||
return CpuThreads(threads());
|
||||
}
|
||||
|
||||
|
||||
@@ -269,8 +269,10 @@ xmrig::CpuThreads xmrig::HwlocCpuInfo::allThreads(const Algorithm &algorithm, ui
|
||||
CpuThreads threads;
|
||||
threads.reserve(m_threads);
|
||||
|
||||
const uint32_t intensity = (algorithm.family() == Algorithm::GHOSTRIDER) ? 8 : 0;
|
||||
|
||||
for (const int32_t pu : m_units) {
|
||||
threads.add(pu, 0);
|
||||
threads.add(pu, intensity);
|
||||
}
|
||||
|
||||
if (threads.isEmpty()) {
|
||||
@@ -296,6 +298,18 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||
cores.reserve(m_cores);
|
||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if ((algorithm == Algorithm::GHOSTRIDER_RTM) && (PUs > cores.size()) && (PUs < cores.size() * 2)) {
|
||||
// Don't use E-cores on Alder Lake
|
||||
cores.erase(std::remove_if(cores.begin(), cores.end(), [](hwloc_obj_t c) { return hwloc_bitmap_weight(c->cpuset) == 1; }), cores.end());
|
||||
|
||||
// This shouldn't happen, but check it anyway
|
||||
if (cores.empty()) {
|
||||
findByType(cache, HWLOC_OBJ_CORE, [&cores](hwloc_obj_t found) { cores.emplace_back(found); });
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
size_t L3 = cache->attr->cache.size;
|
||||
const bool L3_exclusive = isCacheExclusive(cache);
|
||||
size_t L2 = 0;
|
||||
@@ -351,6 +365,15 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
|
||||
cacheHashes = std::min(cacheHashes, limit);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
// GhostRider implementation runs 8 hashes at a time
|
||||
intensity = 8;
|
||||
// Always 1 thread per core (it uses additional helper thread when possible)
|
||||
cacheHashes = std::min(cacheHashes, cores.size());
|
||||
}
|
||||
# endif
|
||||
|
||||
if (cacheHashes >= PUs) {
|
||||
for (hwloc_obj_t core : cores) {
|
||||
const std::vector<hwloc_obj_t> units = findByType(core, HWLOC_OBJ_PU);
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
static const char astrobwt_cl[12493] = {
|
||||
static const char astrobwt_cl[12489] = {
|
||||
0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,
|
||||
0x73,0x68,0x6f,0x72,0x74,0x20,0x75,0x69,0x6e,0x74,0x31,0x36,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x69,0x6e,0x74,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x3b,0x0a,0x74,
|
||||
@@ -260,140 +260,140 @@ static const char astrobwt_cl[12493] = {
|
||||
0x6f,0x66,0x66,0x73,0x65,0x74,0x2b,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x29,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x29,0x5d,0x20,0x26,0x3d,0x20,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x3e,0x3e,0x28,0x28,0x34,0x2d,0x28,0x6f,0x75,0x74,0x70,0x75,
|
||||
0x74,0x5f,0x73,0x69,0x7a,0x65,0x26,0x33,0x29,0x29,0x3c,0x3c,0x33,0x29,0x3b,0x0a,0x7d,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,
|
||||
0x20,0x32,0x34,0x20,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,
|
||||
0x20,0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,
|
||||
0x20,0x32,0x34,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,0x20,
|
||||
0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,0x55,
|
||||
0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,0x30,
|
||||
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,
|
||||
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
|
||||
0x0a,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,
|
||||
0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,
|
||||
0x5b,0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,
|
||||
0x31,0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,
|
||||
0x32,0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,
|
||||
0x35,0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,
|
||||
0x32,0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,
|
||||
0x36,0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,
|
||||
0x31,0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,
|
||||
0x31,0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,
|
||||
0x36,0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,
|
||||
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,
|
||||
0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,
|
||||
0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,
|
||||
0x20,0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,
|
||||
0x32,0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,
|
||||
0x37,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,
|
||||
0x39,0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,
|
||||
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,
|
||||
0x7b,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,
|
||||
0x2c,0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,
|
||||
0x32,0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,
|
||||
0x32,0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,
|
||||
0x31,0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,
|
||||
0x38,0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,
|
||||
0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,
|
||||
0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,
|
||||
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,
|
||||
0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,
|
||||
0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,
|
||||
0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,
|
||||
0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
|
||||
0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,
|
||||
0x5d,0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,
|
||||
0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,
|
||||
0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,
|
||||
0x5f,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,
|
||||
0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,
|
||||
0x64,0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,
|
||||
0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,
|
||||
0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,
|
||||
0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,
|
||||
0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,
|
||||
0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,
|
||||
0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,
|
||||
0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,
|
||||
0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,
|
||||
0x3d,0x30,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
||||
0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
|
||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,
|
||||
0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,
|
||||
0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,
|
||||
0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,
|
||||
0x65,0x2a,0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,
|
||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,
|
||||
0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,
|
||||
0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,
|
||||
0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,
|
||||
0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,
|
||||
0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,
|
||||
0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,
|
||||
0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,
|
||||
0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,
|
||||
0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,
|
||||
0x69,0x6e,0x69,0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,
|
||||
0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
|
||||
0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,
|
||||
0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,
|
||||
0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,
|
||||
0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
||||
0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,
|
||||
0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,
|
||||
0x3d,0x28,0x74,0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x2a,0x29,0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,
|
||||
0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,
|
||||
0x65,0x26,0x30,0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,
|
||||
0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,
|
||||
0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,
|
||||
0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
|
||||
0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,
|
||||
0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,
|
||||
0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,
|
||||
0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,
|
||||
0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,
|
||||
0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,
|
||||
0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,
|
||||
0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,
|
||||
0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,
|
||||
0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,
|
||||
0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,
|
||||
0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
||||
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x7d,
|
||||
0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,0x5b,
|
||||
0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,0x31,
|
||||
0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,0x32,
|
||||
0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,0x35,
|
||||
0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,0x32,
|
||||
0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,0x36,
|
||||
0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,0x31,
|
||||
0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,
|
||||
0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,0x31,
|
||||
0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,0x36,
|
||||
0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,0x32,
|
||||
0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,0x2c,
|
||||
0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,
|
||||
0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,0x32,
|
||||
0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,0x37,
|
||||
0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,0x39,
|
||||
0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,0x32,
|
||||
0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,0x7b,
|
||||
0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,0x2c,
|
||||
0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,
|
||||
0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,0x32,
|
||||
0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,
|
||||
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,0x38,
|
||||
0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,
|
||||
0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,
|
||||
0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,
|
||||
0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,
|
||||
0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,
|
||||
0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,
|
||||
0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,
|
||||
0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,
|
||||
0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
|
||||
0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,
|
||||
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,0x5d,
|
||||
0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,
|
||||
0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,
|
||||
0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,
|
||||
0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,0x69,
|
||||
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,0x41,
|
||||
0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,
|
||||
0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,
|
||||
0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,
|
||||
0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,
|
||||
0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,
|
||||
0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,
|
||||
0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,
|
||||
0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,
|
||||
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,
|
||||
0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,
|
||||
0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,
|
||||
0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,
|
||||
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,
|
||||
0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,
|
||||
0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,
|
||||
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,
|
||||
0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,
|
||||
0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,
|
||||
0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,
|
||||
0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,
|
||||
0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,
|
||||
0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,
|
||||
0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,0x5f,0x61,0x74,
|
||||
0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,
|
||||
0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,0x69,0x6e,0x69,
|
||||
0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,
|
||||
0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,
|
||||
0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,
|
||||
0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,
|
||||
0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,
|
||||
0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
|
||||
0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
|
||||
0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,
|
||||
0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x28,0x74,
|
||||
0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x29,
|
||||
0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x3d,0x28,0x6e,
|
||||
0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x26,0x30,
|
||||
0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,
|
||||
0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,0x3b,0x0a,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,
|
||||
0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,
|
||||
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
|
||||
0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,
|
||||
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,
|
||||
0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,
|
||||
0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,
|
||||
0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,
|
||||
0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,
|
||||
0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,
|
||||
0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,
|
||||
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,
|
||||
0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,
|
||||
0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,
|
||||
0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
||||
};
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -132,6 +132,7 @@ set(SOURCES_BASE
|
||||
src/base/net/tools/LineReader.cpp
|
||||
src/base/net/tools/NetBuffer.cpp
|
||||
src/base/tools/Arguments.cpp
|
||||
src/base/tools/Chrono.cpp
|
||||
src/base/tools/cryptonote/BlockTemplate.cpp
|
||||
src/base/tools/cryptonote/crypto-ops-data.c
|
||||
src/base/tools/cryptonote/crypto-ops.c
|
||||
@@ -244,7 +245,7 @@ else()
|
||||
endif()
|
||||
|
||||
|
||||
if (WITH_KAWPOW)
|
||||
if (WITH_KAWPOW OR WITH_GHOSTRIDER)
|
||||
list(APPEND HEADERS_BASE
|
||||
src/base/net/stratum/AutoClient.h
|
||||
src/base/net/stratum/EthStratumClient.h
|
||||
|
||||
@@ -101,6 +101,11 @@ const char *Algorithm::kKAWPOW = "kawpow";
|
||||
const char *Algorithm::kKAWPOW_RVN = "kawpow";
|
||||
#endif
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
const char* Algorithm::kGHOSTRIDER = "ghostrider";
|
||||
const char* Algorithm::kGHOSTRIDER_RTM = "ghostrider";
|
||||
#endif
|
||||
|
||||
|
||||
#define ALGO_NAME(ALGO) { Algorithm::ALGO, Algorithm::k##ALGO }
|
||||
#define ALGO_ALIAS(ALGO, NAME) { NAME, Algorithm::ALGO }
|
||||
@@ -163,6 +168,10 @@ static const std::map<uint32_t, const char *> kAlgorithmNames = {
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
ALGO_NAME(KAWPOW_RVN),
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ALGO_NAME(GHOSTRIDER_RTM),
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
@@ -278,6 +287,11 @@ static const std::map<const char *, Algorithm::Id, aliasCompare> kAlgorithmAlias
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
ALGO_ALIAS_AUTO(KAWPOW_RVN), ALGO_ALIAS(KAWPOW_RVN, "kawpow/rvn"),
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ALGO_ALIAS_AUTO(GHOSTRIDER_RTM), ALGO_ALIAS(GHOSTRIDER_RTM, "ghostrider/rtm"),
|
||||
ALGO_ALIAS(GHOSTRIDER_RTM, "gr"),
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
@@ -352,7 +366,8 @@ std::vector<xmrig::Algorithm> xmrig::Algorithm::all(const std::function<bool(con
|
||||
RX_0, RX_WOW, RX_ARQ, RX_GRAFT, RX_SFX, RX_KEVA,
|
||||
AR2_CHUKWA, AR2_CHUKWA_V2, AR2_WRKZ,
|
||||
ASTROBWT_DERO,
|
||||
KAWPOW_RVN
|
||||
KAWPOW_RVN,
|
||||
GHOSTRIDER_RTM
|
||||
};
|
||||
|
||||
Algorithms out;
|
||||
|
||||
@@ -65,6 +65,13 @@ public:
|
||||
CN_PICO_0 = 0x63120200, // "cn-pico" CryptoNight-Pico
|
||||
CN_PICO_TLO = 0x63120274, // "cn-pico/tlo" CryptoNight-Pico (TLO)
|
||||
CN_UPX2 = 0x63110200, // "cn/upx2" Uplexa (UPX2)
|
||||
CN_GR_0 = 0x63130100, // "cn/dark" GhostRider
|
||||
CN_GR_1 = 0x63130101, // "cn/dark-lite" GhostRider
|
||||
CN_GR_2 = 0x63150102, // "cn/fast" GhostRider
|
||||
CN_GR_3 = 0x63140103, // "cn/lite" GhostRider
|
||||
CN_GR_4 = 0x63120104, // "cn/turtle" GhostRider
|
||||
CN_GR_5 = 0x63120105, // "cn/turtle-lite" GhostRider
|
||||
GHOSTRIDER_RTM = 0x6c150000, // "ghostrider" GhostRider
|
||||
RX_0 = 0x72151200, // "rx/0" RandomX (reference configuration).
|
||||
RX_WOW = 0x72141177, // "rx/wow" RandomWOW (Wownero).
|
||||
RX_ARQ = 0x72121061, // "rx/arq" RandomARQ (Arqma).
|
||||
@@ -89,7 +96,8 @@ public:
|
||||
RANDOM_X = 0x72000000,
|
||||
ARGON2 = 0x61000000,
|
||||
ASTROBWT = 0x41000000,
|
||||
KAWPOW = 0x6b000000
|
||||
KAWPOW = 0x6b000000,
|
||||
GHOSTRIDER = 0x6c000000
|
||||
};
|
||||
|
||||
static const char *kINVALID;
|
||||
@@ -157,6 +165,11 @@ public:
|
||||
static const char *kKAWPOW_RVN;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
static const char* kGHOSTRIDER;
|
||||
static const char* kGHOSTRIDER_RTM;
|
||||
# endif
|
||||
|
||||
inline Algorithm() = default;
|
||||
inline Algorithm(const char *algo) : m_id(parse(algo)) {}
|
||||
inline Algorithm(Id id) : m_id(id) {}
|
||||
@@ -176,7 +189,8 @@ public:
|
||||
inline Id id() const { return m_id; }
|
||||
inline size_t l2() const { return l2(m_id); }
|
||||
inline uint32_t family() const { return family(m_id); }
|
||||
inline uint32_t maxIntensity() const { return isCN() ? 5 : 1; };
|
||||
inline uint32_t minIntensity() const { return ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
|
||||
inline uint32_t maxIntensity() const { return isCN() ? 5 : ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
|
||||
|
||||
inline size_t l3() const
|
||||
{
|
||||
|
||||
@@ -87,6 +87,7 @@ public:
|
||||
SpendSecretKey = 1055,
|
||||
DaemonZMQPortKey = 1056,
|
||||
HugePagesJitKey = 1057,
|
||||
RotationKey = 1058,
|
||||
|
||||
// xmrig common
|
||||
CPUPriorityKey = 1021,
|
||||
|
||||
@@ -50,7 +50,7 @@ bool xmrig::AutoClient::parseLogin(const rapidjson::Value &result, int *code)
|
||||
}
|
||||
|
||||
const Algorithm algo(Json::getString(result, "algo"));
|
||||
if (algo.family() != Algorithm::KAWPOW) {
|
||||
if (algo.family() != Algorithm::KAWPOW && algo.family() != Algorithm::GHOSTRIDER) {
|
||||
*code = 6;
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -34,6 +34,16 @@
|
||||
#include "base/kernel/interfaces/IClientListener.h"
|
||||
#include "net/JobResult.h"
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
#include <cmath>
|
||||
|
||||
extern "C" {
|
||||
#include "crypto/ghostrider/sph_sha2.h"
|
||||
}
|
||||
|
||||
#include "base/tools/Cvt.h"
|
||||
#endif
|
||||
|
||||
|
||||
|
||||
xmrig::EthStratumClient::EthStratumClient(int id, const char *agent, IClientListener *listener) :
|
||||
@@ -63,32 +73,56 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result)
|
||||
auto& allocator = doc.GetAllocator();
|
||||
|
||||
Value params(kArrayType);
|
||||
params.PushBack(m_pool.user().toJSON(), allocator);
|
||||
params.PushBack(m_user.toJSON(), allocator);
|
||||
params.PushBack(result.jobId.toJSON(), allocator);
|
||||
|
||||
std::stringstream s;
|
||||
s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_pool.algorithm().id() == Algorithm::GHOSTRIDER_RTM) {
|
||||
params.PushBack(Value("00000000000000000000000000000000", static_cast<uint32_t>(m_extraNonce2Size * 2)), allocator);
|
||||
params.PushBack(Value(m_ntime.data(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.headerHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
std::stringstream s;
|
||||
s << std::hex << std::setw(8) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
else
|
||||
# endif
|
||||
{
|
||||
std::stringstream s;
|
||||
s << "0x" << std::hex << std::setw(16) << std::setfill('0') << result.nonce;
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.mixHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.headerHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
s.str(std::string());
|
||||
s << "0x";
|
||||
for (size_t i = 0; i < 32; ++i) {
|
||||
const uint32_t k = result.mixHash()[i];
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << k;
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
}
|
||||
params.PushBack(Value(s.str().c_str(), allocator), allocator);
|
||||
|
||||
JsonRequest::create(doc, m_sequence, "mining.submit", params);
|
||||
|
||||
uint64_t actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
|
||||
uint64_t actual_diff;
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (result.algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
actual_diff = reinterpret_cast<const uint64_t*>(result.result())[3];
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
actual_diff = ethash_swap_u64(*((uint64_t*)result.result()));
|
||||
}
|
||||
|
||||
actual_diff = actual_diff ? (uint64_t(-1) / actual_diff) : 0;
|
||||
|
||||
# ifdef XMRIG_PROXY_PROJECT
|
||||
@@ -161,6 +195,34 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
|
||||
setExtraNonce(arr[0]);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (strcmp(method, "mining.set_difficulty") == 0) {
|
||||
if (!params.IsArray()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params is not an array"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (m_pool.algorithm().id() != Algorithm::GHOSTRIDER_RTM) {
|
||||
return;
|
||||
}
|
||||
|
||||
auto arr = params.GetArray();
|
||||
|
||||
if (arr.Empty()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: params array is empty"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!arr[0].IsDouble() && !arr[0].IsUint64()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: difficulty is not a number"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
const double diff = arr[0].IsDouble() ? arr[0].GetDouble() : arr[0].GetUint64();
|
||||
m_nextDifficulty = static_cast<uint64_t>(ceil(diff * 65536.0));
|
||||
}
|
||||
# endif
|
||||
|
||||
if (strcmp(method, "mining.notify") == 0) {
|
||||
if (!params.IsArray()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: params is not an array"), tag());
|
||||
@@ -169,44 +231,152 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
|
||||
|
||||
auto arr = params.GetArray();
|
||||
|
||||
if (arr.Size() < 6) {
|
||||
auto algo = m_pool.algorithm();
|
||||
if (!algo.isValid()) {
|
||||
algo = m_pool.coin().algorithm();
|
||||
}
|
||||
|
||||
const size_t min_arr_size = (algo.id() == Algorithm::GHOSTRIDER_RTM) ? 8 : 6;
|
||||
|
||||
if (arr.Size() < min_arr_size) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: params array has wrong size"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
if (!arr[0].IsString()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid job id"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
Job job;
|
||||
job.setId(arr[0].GetString());
|
||||
|
||||
auto algo = m_pool.algorithm();
|
||||
if (!algo.isValid()) {
|
||||
algo = m_pool.coin().algorithm();
|
||||
}
|
||||
|
||||
job.setAlgorithm(algo);
|
||||
job.setExtraNonce(m_extraNonce.second);
|
||||
|
||||
std::stringstream s;
|
||||
|
||||
// header hash (32 bytes)
|
||||
s << arr[1].GetString();
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algo.id() == Algorithm::GHOSTRIDER_RTM) {
|
||||
// Raptoreum uses Bitcoin's Stratum protocol
|
||||
// https://en.bitcoinwiki.org/wiki/Stratum_mining_protocol#mining.notify
|
||||
|
||||
// nonce template (8 bytes)
|
||||
for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
|
||||
if (!arr[1].IsString() || !arr[2].IsString() || !arr[3].IsString() || !arr[4].IsArray() || !arr[5].IsString() || !arr[6].IsString() || !arr[7].IsString()) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid param array"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
// Version
|
||||
s << arr[5].GetString();
|
||||
|
||||
// Previous block hash
|
||||
s << arr[1].GetString();
|
||||
|
||||
// Merkle tree root
|
||||
std::string blob = arr[2].GetString();
|
||||
blob += m_extraNonce.second;
|
||||
blob.append(m_extraNonce2Size * 2, '0');
|
||||
blob += arr[3].GetString();
|
||||
|
||||
uint8_t merkle_root[64];
|
||||
|
||||
Buffer buf = Cvt::fromHex(blob.c_str(), blob.length());
|
||||
|
||||
// Get height from coinbase
|
||||
{
|
||||
uint8_t* p = buf.data() + 32;
|
||||
uint8_t* m = p + 128;
|
||||
|
||||
while ((p < m) && (*p != 0xff)) ++p;
|
||||
while ((p < m) && (*p == 0xff)) ++p;
|
||||
|
||||
if ((p < m) && (*(p - 1) == 0xff) && (*(p - 2) == 0xff)) {
|
||||
uint32_t height = *reinterpret_cast<uint16_t*>(p + 2);
|
||||
switch (*(p + 1)) {
|
||||
case 4:
|
||||
height += *reinterpret_cast<uint16_t*>(p + 4) * 0x10000UL;
|
||||
break;
|
||||
case 3:
|
||||
height += *(p + 4) * 0x10000UL;
|
||||
break;
|
||||
}
|
||||
job.setHeight(height);
|
||||
}
|
||||
else {
|
||||
job.setHeight(0);
|
||||
}
|
||||
}
|
||||
|
||||
sha256d(merkle_root, buf.data(), static_cast<int>(buf.size()));
|
||||
|
||||
auto merkle_branches = arr[4].GetArray();
|
||||
for (int i = 0, n = merkle_branches.Size(); i < n; ++i) {
|
||||
auto& b = merkle_branches[i];
|
||||
buf = b.IsString() ? Cvt::fromHex(b.GetString(), b.GetStringLength()) : Buffer();
|
||||
if (buf.size() != 32) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: param 4 is invalid"), tag());
|
||||
return;
|
||||
}
|
||||
memcpy(merkle_root + 32, buf.data(), 32);
|
||||
sha256d(merkle_root, merkle_root, 64);
|
||||
}
|
||||
|
||||
s << Cvt::toHex(merkle_root, 32);
|
||||
|
||||
// ntime
|
||||
m_ntime = arr[7].GetString();
|
||||
s << m_ntime;
|
||||
|
||||
// nbits
|
||||
s << arr[6].GetString();
|
||||
|
||||
blob = s.str();
|
||||
|
||||
if (blob.size() != 76 * 2) {
|
||||
LOG_ERR("%s " RED("invalid mining.notify notification: invalid blob size"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
// zeros up to 80 bytes
|
||||
blob.resize(80 * 2, '0');
|
||||
|
||||
// Invert byte order (no idea why, but it's done in Bitcoin's Stratum)
|
||||
buf = Cvt::fromHex(blob.c_str(), blob.length());
|
||||
for (size_t i = 0; i < 80; i += sizeof(uint32_t)) {
|
||||
uint32_t& k = *reinterpret_cast<uint32_t*>(buf.data() + i);
|
||||
if ((i < 36) || (i >= 68)) {
|
||||
k = ethash_swap_u32(k);
|
||||
}
|
||||
}
|
||||
blob = Cvt::toHex(buf.data(), buf.size());
|
||||
|
||||
job.setBlob(blob.c_str());
|
||||
job.setDiff(m_nextDifficulty);
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
// header hash (32 bytes)
|
||||
s << arr[1].GetString();
|
||||
|
||||
std::string blob = s.str();
|
||||
// nonce template (8 bytes)
|
||||
for (uint64_t i = 0, k = m_extraNonce.first; i < sizeof(m_extraNonce.first); ++i, k >>= 8) {
|
||||
s << std::hex << std::setw(2) << std::setfill('0') << (k & 0xFF);
|
||||
}
|
||||
|
||||
// zeros up to 76 bytes
|
||||
blob.resize(76 * 2, '0');
|
||||
job.setBlob(blob.c_str());
|
||||
std::string blob = s.str();
|
||||
|
||||
std::string target_str = arr[3].GetString();
|
||||
target_str.resize(16, '0');
|
||||
const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
|
||||
job.setDiff(Job::toDiff(target));
|
||||
// zeros up to 76 bytes
|
||||
blob.resize(76 * 2, '0');
|
||||
job.setBlob(blob.c_str());
|
||||
|
||||
job.setHeight(arr[5].GetUint64());
|
||||
std::string target_str = arr[3].GetString();
|
||||
target_str.resize(16, '0');
|
||||
const uint64_t target = strtoull(target_str.c_str(), nullptr, 16);
|
||||
job.setDiff(Job::toDiff(target));
|
||||
|
||||
job.setHeight(arr[5].GetUint64());
|
||||
}
|
||||
|
||||
bool ok = true;
|
||||
m_listener->onVerifyAlgorithm(this, algo, &ok);
|
||||
@@ -301,8 +471,8 @@ void xmrig::EthStratumClient::authorize()
|
||||
auto &allocator = doc.GetAllocator();
|
||||
|
||||
Value params(kArrayType);
|
||||
params.PushBack(m_pool.user().toJSON(), allocator);
|
||||
params.PushBack(m_pool.password().toJSON(), allocator);
|
||||
params.PushBack(m_user.toJSON(), allocator);
|
||||
params.PushBack(m_password.toJSON(), allocator);
|
||||
|
||||
JsonRequest::create(doc, m_sequence, "mining.authorize", params);
|
||||
|
||||
@@ -356,11 +526,19 @@ void xmrig::EthStratumClient::onSubscribeResponse(const rapidjson::Value &result
|
||||
throw std::runtime_error("invalid mining.subscribe response: result is not an array");
|
||||
}
|
||||
|
||||
if (result.GetArray().Size() <= 1) {
|
||||
auto arr = result.GetArray();
|
||||
|
||||
if (arr.Size() <= 1) {
|
||||
throw std::runtime_error("invalid mining.subscribe response: result array is too short");
|
||||
}
|
||||
|
||||
setExtraNonce(result.GetArray()[1]);
|
||||
setExtraNonce(arr[1]);
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if ((arr.Size() > 2) && (arr[2].IsUint())) {
|
||||
m_extraNonce2Size = arr[2].GetUint();
|
||||
}
|
||||
# endif
|
||||
|
||||
if (m_pool.isNicehash()) {
|
||||
using namespace rapidjson;
|
||||
|
||||
@@ -57,6 +57,12 @@ private:
|
||||
|
||||
bool m_authorized = false;
|
||||
std::pair<uint64_t, String> m_extraNonce{};
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
uint64_t m_extraNonce2Size = 0;
|
||||
uint64_t m_nextDifficulty = 0;
|
||||
String m_ntime;
|
||||
# endif
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -47,7 +47,7 @@ xmrig::Job::Job(bool nicehash, const Algorithm &algorithm, const String &clientI
|
||||
|
||||
bool xmrig::Job::isEqual(const Job &other) const
|
||||
{
|
||||
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0;
|
||||
return m_id == other.m_id && m_clientId == other.m_clientId && memcmp(m_blob, other.m_blob, sizeof(m_blob)) == 0 && m_target == other.m_target;
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -76,7 +76,7 @@ public:
|
||||
inline const String &poolWallet() const { return m_poolWallet; }
|
||||
inline const uint32_t *nonce() const { return reinterpret_cast<const uint32_t*>(m_blob + nonceOffset()); }
|
||||
inline const uint8_t *blob() const { return m_blob; }
|
||||
inline int32_t nonceOffset() const { return (algorithm().family() == Algorithm::KAWPOW) ? 32 : 39; }
|
||||
inline int32_t nonceOffset() const { auto f = algorithm().family(); return (f == Algorithm::KAWPOW) ? 32 : ((f == Algorithm::GHOSTRIDER) ? 76 : 39); }
|
||||
inline size_t nonceSize() const { return (algorithm().family() == Algorithm::KAWPOW) ? 8 : 4; }
|
||||
inline size_t size() const { return m_size; }
|
||||
inline uint32_t *nonce() { return reinterpret_cast<uint32_t*>(m_blob + nonceOffset()); }
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/net/stratum/Client.h"
|
||||
|
||||
#ifdef XMRIG_ALGO_KAWPOW
|
||||
#if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
# include "base/net/stratum/AutoClient.h"
|
||||
# include "base/net/stratum/EthStratumClient.h"
|
||||
#endif
|
||||
@@ -218,8 +218,9 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
|
||||
IClient *client = nullptr;
|
||||
|
||||
if (m_mode == MODE_POOL) {
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
if ((m_algorithm.family() == Algorithm::KAWPOW) || (m_coin == Coin::RAVEN)) {
|
||||
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
const uint32_t f = m_algorithm.family();
|
||||
if ((f == Algorithm::KAWPOW) || (f == Algorithm::GHOSTRIDER) || (m_coin == Coin::RAVEN)) {
|
||||
client = new EthStratumClient(id, Platform::userAgent(), listener);
|
||||
}
|
||||
else
|
||||
@@ -236,7 +237,7 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
|
||||
client = new SelfSelectClient(id, Platform::userAgent(), listener, m_submitToOrigin);
|
||||
}
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
else if (m_mode == MODE_AUTO_ETH) {
|
||||
client = new AutoClient(id, Platform::userAgent(), listener);
|
||||
}
|
||||
|
||||
@@ -48,6 +48,40 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
|
||||
std::vector<char> blob(112 * 2 + 1, '0');
|
||||
blob.back() = '\0';
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_benchmark->algorithm() == Algorithm::GHOSTRIDER_RTM) {
|
||||
const uint32_t q = (benchmark->rotation() / 20) & 1;
|
||||
const uint32_t r = benchmark->rotation() % 20;
|
||||
|
||||
static constexpr uint32_t indices[20][3] = {
|
||||
{ 0, 1, 2 },
|
||||
{ 0, 1, 3 },
|
||||
{ 0, 1, 4 },
|
||||
{ 0, 1, 5 },
|
||||
{ 0, 2, 3 },
|
||||
{ 0, 2, 4 },
|
||||
{ 0, 2, 5 },
|
||||
{ 0, 3, 4 },
|
||||
{ 0, 3, 5 },
|
||||
{ 0, 4, 5 },
|
||||
{ 1, 2, 3 },
|
||||
{ 1, 2, 4 },
|
||||
{ 1, 2, 5 },
|
||||
{ 1, 3, 4 },
|
||||
{ 1, 3, 5 },
|
||||
{ 1, 4, 5 },
|
||||
{ 2, 3, 4 },
|
||||
{ 2, 3, 5 },
|
||||
{ 2, 4, 5 },
|
||||
{ 3, 4, 5 },
|
||||
};
|
||||
|
||||
blob[ 8] = '0' + indices[r][q ? 2 : 1];
|
||||
blob[ 9] = '0' + indices[r][0];
|
||||
blob[11] = '0' + indices[r][q ? 1 : 2];
|
||||
}
|
||||
# endif
|
||||
|
||||
m_job.setAlgorithm(m_benchmark->algorithm());
|
||||
m_job.setBlob(blob.data());
|
||||
m_job.setDiff(std::numeric_limits<uint64_t>::max());
|
||||
@@ -60,7 +94,7 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
|
||||
BenchState::init(this, m_benchmark->size());
|
||||
|
||||
# ifdef XMRIG_FEATURE_HTTP
|
||||
if (m_benchmark->isSubmit()) {
|
||||
if (m_benchmark->isSubmit() && (m_benchmark->algorithm().family() == Algorithm::RANDOM_X)) {
|
||||
m_mode = ONLINE_BENCH;
|
||||
m_token = m_benchmark->token();
|
||||
|
||||
|
||||
@@ -39,6 +39,7 @@ const char *BenchConfig::kHash = "hash";
|
||||
const char *BenchConfig::kId = "id";
|
||||
const char *BenchConfig::kSeed = "seed";
|
||||
const char *BenchConfig::kSize = "size";
|
||||
const char *BenchConfig::kRotation = "rotation";
|
||||
const char *BenchConfig::kSubmit = "submit";
|
||||
const char *BenchConfig::kToken = "token";
|
||||
const char *BenchConfig::kUser = "user";
|
||||
@@ -53,7 +54,7 @@ const char *BenchConfig::kApiHost = "127.0.0.1";
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi) :
|
||||
xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi, uint32_t rotation) :
|
||||
m_algorithm(Json::getString(object, kAlgo)),
|
||||
m_dmi(dmi),
|
||||
m_submit(Json::getBool(object, kSubmit)),
|
||||
@@ -61,9 +62,15 @@ xmrig::BenchConfig::BenchConfig(uint32_t size, const String &id, const rapidjson
|
||||
m_seed(Json::getString(object, kSeed)),
|
||||
m_token(Json::getString(object, kToken)),
|
||||
m_user(Json::getString(object, kUser)),
|
||||
m_size(size)
|
||||
m_size(size),
|
||||
m_rotation(rotation)
|
||||
{
|
||||
if (!m_algorithm.isValid() || m_algorithm.family() != Algorithm::RANDOM_X) {
|
||||
auto f = m_algorithm.family();
|
||||
if (!m_algorithm.isValid() || (f != Algorithm::RANDOM_X
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
&& f != Algorithm::GHOSTRIDER
|
||||
# endif
|
||||
)) {
|
||||
m_algorithm = Algorithm::RX_0;
|
||||
}
|
||||
|
||||
@@ -80,14 +87,17 @@ xmrig::BenchConfig *xmrig::BenchConfig::create(const rapidjson::Value &object, b
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
const uint32_t size = getSize(Json::getString(object, kSize));
|
||||
const String id = Json::getString(object, kVerify);
|
||||
const uint32_t size = getSize(Json::getString(object, kSize));
|
||||
const String id = Json::getString(object, kVerify);
|
||||
|
||||
const char* rotation_str = Json::getString(object, kRotation);
|
||||
const uint32_t rotation = rotation_str ? strtoul(rotation_str, nullptr, 10) : 0;
|
||||
|
||||
if (size == 0 && id.isEmpty()) {
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
return new BenchConfig(size, id, object, dmi);
|
||||
return new BenchConfig(size, id, object, dmi, rotation);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -37,6 +37,7 @@ public:
|
||||
static const char *kId;
|
||||
static const char *kSeed;
|
||||
static const char *kSize;
|
||||
static const char* kRotation;
|
||||
static const char *kSubmit;
|
||||
static const char *kToken;
|
||||
static const char *kUser;
|
||||
@@ -50,7 +51,7 @@ public:
|
||||
static constexpr const uint16_t kApiPort = 18805;
|
||||
# endif
|
||||
|
||||
BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi);
|
||||
BenchConfig(uint32_t size, const String &id, const rapidjson::Value &object, bool dmi, uint32_t rotation);
|
||||
|
||||
static BenchConfig *create(const rapidjson::Value &object, bool dmi);
|
||||
|
||||
@@ -63,6 +64,7 @@ public:
|
||||
inline const String &user() const { return m_user; }
|
||||
inline uint32_t size() const { return m_size; }
|
||||
inline uint64_t hash() const { return m_hash; }
|
||||
inline uint32_t rotation() const { return m_rotation; }
|
||||
|
||||
rapidjson::Value toJSON(rapidjson::Document &doc) const;
|
||||
|
||||
@@ -77,6 +79,7 @@ private:
|
||||
String m_token;
|
||||
String m_user;
|
||||
uint32_t m_size;
|
||||
uint32_t m_rotation;
|
||||
uint64_t m_hash = 0;
|
||||
};
|
||||
|
||||
|
||||
44
src/base/tools/Chrono.cpp
Normal file
44
src/base/tools/Chrono.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "Chrono.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_WIN
|
||||
# include <Windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
double Chrono::highResolutionMSecs()
|
||||
{
|
||||
# ifdef XMRIG_OS_WIN
|
||||
LARGE_INTEGER f, t;
|
||||
QueryPerformanceFrequency(&f);
|
||||
QueryPerformanceCounter(&t);
|
||||
return static_cast<double>(t.QuadPart) * 1e3 / f.QuadPart;
|
||||
# else
|
||||
using namespace std::chrono;
|
||||
return static_cast<uint64_t>(duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count()) / 1e6;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
@@ -29,12 +29,7 @@ namespace xmrig {
|
||||
class Chrono
|
||||
{
|
||||
public:
|
||||
static inline uint64_t highResolutionMSecs()
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
return static_cast<uint64_t>(time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
}
|
||||
static double highResolutionMSecs();
|
||||
|
||||
|
||||
static inline uint64_t steadyMSecs()
|
||||
|
||||
@@ -67,6 +67,11 @@
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
# include "crypto/ghostrider/ghostrider.h"
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
@@ -282,10 +287,12 @@ public:
|
||||
|
||||
void printHashrate(bool details)
|
||||
{
|
||||
char num[16 * 4] = { 0 };
|
||||
char num[16 * 5] = { 0 };
|
||||
double speed[3] = { 0.0 };
|
||||
uint32_t count = 0;
|
||||
|
||||
double avg_hashrate = 0.0;
|
||||
|
||||
for (auto backend : backends) {
|
||||
const auto hashrate = backend->hashrate();
|
||||
if (hashrate) {
|
||||
@@ -294,6 +301,8 @@ public:
|
||||
speed[0] += hashrate->calc(Hashrate::ShortInterval);
|
||||
speed[1] += hashrate->calc(Hashrate::MediumInterval);
|
||||
speed[2] += hashrate->calc(Hashrate::LargeInterval);
|
||||
|
||||
avg_hashrate += hashrate->average();
|
||||
}
|
||||
|
||||
backend->printHashrate(details);
|
||||
@@ -313,12 +322,22 @@ public:
|
||||
h = "MH/s";
|
||||
}
|
||||
|
||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
|
||||
char avg_hashrate_buf[64];
|
||||
avg_hashrate_buf[0] = '\0';
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
snprintf(avg_hashrate_buf, sizeof(avg_hashrate_buf), " avg " CYAN_BOLD("%s %s"), Hashrate::format(avg_hashrate * scale, num + 16 * 4, 16), h);
|
||||
}
|
||||
# endif
|
||||
|
||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s") "%s",
|
||||
Tags::miner(),
|
||||
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
|
||||
Hashrate::format(speed[1] * scale, num + 16, sizeof(num) / 4),
|
||||
Hashrate::format(speed[2] * scale, num + 16 * 2, sizeof(num) / 4), h,
|
||||
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, sizeof(num) / 4), h
|
||||
Hashrate::format(speed[0] * scale, num, 16),
|
||||
Hashrate::format(speed[1] * scale, num + 16, 16),
|
||||
Hashrate::format(speed[2] * scale, num + 16 * 2, 16), h,
|
||||
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, 16), h,
|
||||
avg_hashrate_buf
|
||||
);
|
||||
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
@@ -334,6 +353,11 @@ public:
|
||||
# endif
|
||||
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
inline void initGhostRider() const { ghostrider::benchmark(); }
|
||||
# endif
|
||||
|
||||
|
||||
Algorithm algorithm;
|
||||
Algorithms algorithms;
|
||||
bool active = false;
|
||||
@@ -553,6 +577,12 @@ void xmrig::Miner::setJob(const Job &job, bool donate)
|
||||
constexpr const bool ready = true;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (job.algorithm().family() == Algorithm::GHOSTRIDER) {
|
||||
d_ptr->initGhostRider();
|
||||
}
|
||||
# endif
|
||||
|
||||
mutex.unlock();
|
||||
|
||||
d_ptr->active = true;
|
||||
|
||||
@@ -269,6 +269,7 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
|
||||
case IConfig::BenchSeedKey: /* --seed */
|
||||
case IConfig::BenchHashKey: /* --hash */
|
||||
case IConfig::UserKey: /* --user */
|
||||
case IConfig::RotationKey: /* --rotation */
|
||||
return transformBenchmark(doc, key, arg);
|
||||
# endif
|
||||
|
||||
@@ -358,6 +359,9 @@ void xmrig::ConfigTransform::transformBenchmark(rapidjson::Document &doc, int ke
|
||||
case IConfig::UserKey: /* --user */
|
||||
return set(doc, BenchConfig::kBenchmark, BenchConfig::kUser, arg);
|
||||
|
||||
case IConfig::RotationKey: /* --rotation */
|
||||
return set(doc, BenchConfig::kBenchmark, BenchConfig::kRotation, arg);
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -71,6 +71,7 @@ static const option options[] = {
|
||||
{ "hugepage-size", 1, nullptr, IConfig::HugePageSizeKey },
|
||||
{ "huge-pages-jit", 0, nullptr, IConfig::HugePagesJitKey },
|
||||
{ "hugepages-jit", 0, nullptr, IConfig::HugePagesJitKey },
|
||||
{ "rotation", 1, nullptr, IConfig::RotationKey },
|
||||
{ "pass", 1, nullptr, IConfig::PasswordKey },
|
||||
{ "print-time", 1, nullptr, IConfig::PrintTimeKey },
|
||||
{ "retries", 1, nullptr, IConfig::RetriesKey },
|
||||
|
||||
@@ -43,6 +43,7 @@ public:
|
||||
constexpr inline size_t memory() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return Algorithm::l3(ALGO); }
|
||||
constexpr inline uint32_t iterations() const { static_assert(Algorithm::isCN(ALGO), "invalid CRYPTONIGHT algorithm"); return CN_ITER; }
|
||||
constexpr inline uint32_t mask() const { return static_cast<uint32_t>(((memory() - 1) / 16) * 16); }
|
||||
constexpr inline uint32_t half_mem() const { return mask() < memory() / 2; }
|
||||
|
||||
inline static uint32_t iterations(Algorithm::Id algo)
|
||||
{
|
||||
@@ -108,6 +109,16 @@ public:
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algo == Algorithm::CN_GR_1) {
|
||||
return 0x3FFF0;
|
||||
}
|
||||
|
||||
if (algo == Algorithm::CN_GR_5) {
|
||||
return 0x1FFF0;
|
||||
}
|
||||
# endif
|
||||
|
||||
return ((Algorithm::l3(algo) - 1) / 16) * 16;
|
||||
}
|
||||
|
||||
@@ -136,6 +147,18 @@ template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::iterations() co
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_PICO_0>::mask() const { return 0x1FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_UPX2>::mask() const { return 0x1FFF0; }
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_0>::iterations() const { return CN_ITER / 4; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::iterations() const { return CN_ITER / 4; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_2>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_3>::iterations() const { return CN_ITER / 2; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_4>::iterations() const { return CN_ITER / 8; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::iterations() const { return CN_ITER / 8; }
|
||||
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_1>::mask() const { return 0x3FFF0; }
|
||||
template<> constexpr inline uint32_t CnAlgo<Algorithm::CN_GR_5>::mask() const { return 0x1FFF0; }
|
||||
#endif
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
|
||||
|
||||
@@ -55,6 +55,10 @@
|
||||
} while (0)
|
||||
|
||||
|
||||
bool cn_sse41_enabled = false;
|
||||
bool cn_vaes_enabled = false;
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
# define ADD_FN_ASM(algo) do { \
|
||||
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
|
||||
@@ -97,6 +101,27 @@ cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
|
||||
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_upx2_double_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_single_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_double_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_quad_mainloop_asm = nullptr;
|
||||
|
||||
|
||||
template<Algorithm::Id SOURCE_ALGO = Algorithm::CN_2, typename T, typename U>
|
||||
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
|
||||
@@ -136,7 +161,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
|
||||
|
||||
static void patchAsmVariants()
|
||||
{
|
||||
const int allocation_size = 131072;
|
||||
constexpr size_t allocation_size = 0x20000;
|
||||
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
|
||||
|
||||
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
|
||||
@@ -173,6 +198,29 @@ static void patchAsmVariants()
|
||||
cn_upx2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x15000);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
cn_gr0_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16000);
|
||||
cn_gr1_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16800);
|
||||
cn_gr2_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17000);
|
||||
cn_gr3_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17800);
|
||||
cn_gr4_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18000);
|
||||
cn_gr5_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18800);
|
||||
|
||||
cn_gr0_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19000);
|
||||
cn_gr1_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19800);
|
||||
cn_gr2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A000);
|
||||
cn_gr3_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A800);
|
||||
cn_gr4_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B000);
|
||||
cn_gr5_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B800);
|
||||
|
||||
cn_gr0_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C000);
|
||||
cn_gr1_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C800);
|
||||
cn_gr2_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D000);
|
||||
cn_gr3_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D800);
|
||||
cn_gr4_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E000);
|
||||
cn_gr5_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E800);
|
||||
# endif
|
||||
|
||||
{
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
|
||||
|
||||
@@ -230,7 +278,30 @@ static void patchAsmVariants()
|
||||
patchCode<Algorithm::CN_RWZ>(cn_upx2_mainloop_asm, cnv2_rwz_mainloop_asm, ITER, MASK);
|
||||
patchCode<Algorithm::CN_RWZ>(cn_upx2_double_mainloop_asm, cnv2_rwz_double_mainloop_asm, ITER, MASK);
|
||||
}
|
||||
#endif
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
# endif
|
||||
|
||||
VirtualMemory::protectRX(base, allocation_size);
|
||||
VirtualMemory::flushInstructionCache(base, allocation_size);
|
||||
@@ -310,6 +381,15 @@ xmrig::CnHash::CnHash()
|
||||
m_map[Algorithm::ASTROBWT_DERO]->data[AV_SINGLE_SOFT][Assembly::NONE] = astrobwt::single_hash<Algorithm::ASTROBWT_DERO>;
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
ADD_FN(Algorithm::CN_GR_0);
|
||||
ADD_FN(Algorithm::CN_GR_1);
|
||||
ADD_FN(Algorithm::CN_GR_2);
|
||||
ADD_FN(Algorithm::CN_GR_3);
|
||||
ADD_FN(Algorithm::CN_GR_4);
|
||||
ADD_FN(Algorithm::CN_GR_5);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
patchAsmVariants();
|
||||
# endif
|
||||
@@ -339,7 +419,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
|
||||
switch (algorithm.id()) {
|
||||
case Algorithm::CN_HEAVY_0:
|
||||
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
|
||||
|
||||
@@ -52,12 +52,17 @@ struct cryptonight_r_data {
|
||||
struct cryptonight_ctx {
|
||||
alignas(16) uint8_t state[224];
|
||||
alignas(16) uint8_t *memory;
|
||||
const uint32_t* tweak1_table;
|
||||
uint64_t tweak1_2;
|
||||
|
||||
uint8_t unused[40];
|
||||
uint8_t unused[24];
|
||||
const uint32_t *saes_table;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
cryptonight_r_data generated_code_data;
|
||||
|
||||
alignas(16) uint8_t save_state[128];
|
||||
bool first_half;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -349,6 +349,9 @@ static inline __m128i aes_round_tweak_div(const __m128i &in, const __m128i &key)
|
||||
}
|
||||
|
||||
|
||||
alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
@@ -368,12 +371,7 @@ static inline void cryptonight_monero_tweak(const uint8_t* l, uint64_t idx, __m1
|
||||
|
||||
uint64_t vh = vgetq_lane_u64(tmp, 1);
|
||||
|
||||
uint8_t x = vh >> 24;
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint8_t>(vh >> 24)];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -204,4 +204,7 @@
|
||||
v4_random_math(code##part, r##part); \
|
||||
}
|
||||
|
||||
extern bool cn_sse41_enabled;
|
||||
extern bool cn_vaes_enabled;
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
||||
|
||||
@@ -100,7 +100,7 @@ const static uint8_t test_output_r[] = {
|
||||
|
||||
|
||||
// "cn/0"
|
||||
const static uint8_t test_output_v0[160] = {
|
||||
const static uint8_t test_output_v0[256] = {
|
||||
0x1A, 0x3F, 0xFB, 0xEE, 0x90, 0x9B, 0x42, 0x0D, 0x91, 0xF7, 0xBE, 0x6E, 0x5F, 0xB5, 0x6D, 0xB7,
|
||||
0x1B, 0x31, 0x10, 0xD8, 0x86, 0x01, 0x1E, 0x87, 0x7E, 0xE5, 0x78, 0x6A, 0xFD, 0x08, 0x01, 0x00,
|
||||
0x1B, 0x60, 0x6A, 0x3F, 0x4A, 0x07, 0xD6, 0x48, 0x9A, 0x1B, 0xCD, 0x07, 0x69, 0x7B, 0xD1, 0x66,
|
||||
@@ -115,7 +115,7 @@ const static uint8_t test_output_v0[160] = {
|
||||
|
||||
|
||||
// "cn/1" Cryptonight variant 1 (Monero v7)
|
||||
const static uint8_t test_output_v1[160] = {
|
||||
const static uint8_t test_output_v1[256] = {
|
||||
0xF2, 0x2D, 0x3D, 0x62, 0x03, 0xD2, 0xA0, 0x8B, 0x41, 0xD9, 0x02, 0x72, 0x78, 0xD8, 0xBC, 0xC9,
|
||||
0x83, 0xAC, 0xAD, 0xA9, 0xB6, 0x8E, 0x52, 0xE3, 0xC6, 0x89, 0x69, 0x2A, 0x50, 0xE9, 0x21, 0xD9,
|
||||
0xC9, 0xFA, 0xE8, 0x42, 0x5D, 0x86, 0x88, 0xDC, 0x23, 0x6B, 0xCD, 0xBC, 0x42, 0xFD, 0xB4, 0x2D,
|
||||
@@ -130,7 +130,7 @@ const static uint8_t test_output_v1[160] = {
|
||||
|
||||
|
||||
// "cn/2" Cryptonight variant 2 (Monero v8)
|
||||
const static uint8_t test_output_v2[160] = {
|
||||
const static uint8_t test_output_v2[256] = {
|
||||
0x97, 0x37, 0x82, 0x82, 0xCF, 0x10, 0xE7, 0xAD, 0x03, 0x3F, 0x7B, 0x80, 0x74, 0xC4, 0x0E, 0x14,
|
||||
0xD0, 0x6E, 0x7F, 0x60, 0x9D, 0xDD, 0xDA, 0x78, 0x76, 0x80, 0xB5, 0x8C, 0x05, 0xF4, 0x3D, 0x21,
|
||||
0x87, 0x1F, 0xCD, 0x68, 0x23, 0xF6, 0xA8, 0x79, 0xBB, 0x3F, 0x33, 0x95, 0x1C, 0x8E, 0x8E, 0x89,
|
||||
@@ -145,7 +145,7 @@ const static uint8_t test_output_v2[160] = {
|
||||
|
||||
|
||||
// "cn/half"
|
||||
const static uint8_t test_output_half[160] = {
|
||||
const static uint8_t test_output_half[256] = {
|
||||
0x5D, 0x4F, 0xBC, 0x35, 0x60, 0x97, 0xEA, 0x64, 0x40, 0xB0, 0x88, 0x8E, 0xDE, 0xB6, 0x35, 0xDD,
|
||||
0xC8, 0x4A, 0x0E, 0x39, 0x7C, 0x86, 0x84, 0x56, 0x89, 0x5C, 0x3F, 0x29, 0xBE, 0x73, 0x12, 0xA7,
|
||||
0x02, 0xE6, 0x1D, 0x2B, 0xBC, 0x84, 0xB6, 0x71, 0x96, 0x71, 0xD5, 0x0C, 0xAC, 0x76, 0x0E, 0x6B,
|
||||
@@ -160,7 +160,7 @@ const static uint8_t test_output_half[160] = {
|
||||
|
||||
|
||||
// "cn/msr" Masari (MSR)
|
||||
const static uint8_t test_output_msr[160] = {
|
||||
const static uint8_t test_output_msr[256] = {
|
||||
0x3C, 0x7A, 0x61, 0x08, 0x4C, 0x5E, 0xB8, 0x65, 0xB4, 0x98, 0xAB, 0x2F, 0x5A, 0x1A, 0xC5, 0x2C,
|
||||
0x49, 0xC1, 0x77, 0xC2, 0xD0, 0x13, 0x34, 0x42, 0xD6, 0x5E, 0xD5, 0x14, 0x33, 0x5C, 0x82, 0xC5,
|
||||
0x69, 0xDF, 0x38, 0x51, 0x1B, 0xB3, 0xEB, 0x7D, 0xE7, 0x6B, 0x08, 0x8E, 0xB6, 0x7E, 0xB7, 0x1C,
|
||||
@@ -175,7 +175,7 @@ const static uint8_t test_output_msr[160] = {
|
||||
|
||||
|
||||
// "cn/xao" Alloy (XAO)
|
||||
const static uint8_t test_output_xao[160] = {
|
||||
const static uint8_t test_output_xao[256] = {
|
||||
0x9A, 0x29, 0xD0, 0xC4, 0xAF, 0xDC, 0x63, 0x9B, 0x65, 0x53, 0xB1, 0xC8, 0x37, 0x35, 0x11, 0x4C,
|
||||
0x5D, 0x77, 0x16, 0x21, 0x42, 0x97, 0x5C, 0xB8, 0x50, 0xC0, 0xA5, 0x1F, 0x64, 0x07, 0xBD, 0x33,
|
||||
0xF1, 0xC9, 0x98, 0x40, 0x42, 0xDE, 0x39, 0xD1, 0xBA, 0x2D, 0xAD, 0xEC, 0xFE, 0xEA, 0xD8, 0x46,
|
||||
@@ -190,7 +190,7 @@ const static uint8_t test_output_xao[160] = {
|
||||
|
||||
|
||||
// "cn/rto" Arto (RTO)
|
||||
const static uint8_t test_output_rto[160] = {
|
||||
const static uint8_t test_output_rto[256] = {
|
||||
0x82, 0x66, 0x1E, 0x1C, 0x6E, 0x64, 0x36, 0x66, 0x84, 0x06, 0x32, 0x7A, 0x9B, 0xB1, 0x13, 0x19,
|
||||
0xA5, 0x56, 0x16, 0x15, 0xDF, 0xEC, 0x1C, 0x9E, 0xE3, 0x88, 0x4A, 0x6C, 0x1C, 0xEB, 0x76, 0xA5,
|
||||
0xB3, 0xFB, 0xF4, 0x3F, 0x2B, 0x6A, 0x3A, 0x39, 0xA3, 0x6E, 0x08, 0x33, 0x67, 0x90, 0x31, 0xB9,
|
||||
@@ -204,7 +204,7 @@ const static uint8_t test_output_rto[160] = {
|
||||
};
|
||||
|
||||
// "cn/rwz"
|
||||
const static uint8_t test_output_rwz[160] = {
|
||||
const static uint8_t test_output_rwz[256] = {
|
||||
0x5f, 0x56, 0xc6, 0xb0, 0x99, 0x6b, 0xa2, 0x3e, 0x0b, 0xba, 0x07, 0x29, 0xc9, 0x90, 0x74, 0x85,
|
||||
0x5a, 0x10, 0xe3, 0x08, 0x7f, 0xdb, 0xfe, 0x94, 0x75, 0x33, 0x54, 0x73, 0x76, 0xf0, 0x75, 0xb8,
|
||||
0x8b, 0x70, 0x43, 0x9a, 0xfc, 0xf5, 0xeb, 0x15, 0xbb, 0xf9, 0xad, 0x9d, 0x2a, 0xbd, 0x72, 0x52,
|
||||
@@ -218,7 +218,7 @@ const static uint8_t test_output_rwz[160] = {
|
||||
};
|
||||
|
||||
// "cn/zls"
|
||||
const static uint8_t test_output_zls[160] = {
|
||||
const static uint8_t test_output_zls[256] = {
|
||||
0x51, 0x6E, 0x33, 0xC6, 0xE4, 0x46, 0xAB, 0xBC, 0xCD, 0xAD, 0x18, 0xC0, 0x4C, 0xD9, 0xA2, 0x5E,
|
||||
0x64, 0x10, 0x28, 0x53, 0xB2, 0x0A, 0x42, 0xDF, 0xDE, 0xAA, 0x8B, 0x59, 0x9E, 0xCF, 0x40, 0xE2,
|
||||
0x0D, 0x62, 0x5B, 0x42, 0x18, 0xE2, 0x76, 0xAD, 0xD0, 0x74, 0x90, 0x60, 0x8D, 0xC4, 0xC7, 0x80,
|
||||
@@ -232,7 +232,7 @@ const static uint8_t test_output_zls[160] = {
|
||||
};
|
||||
|
||||
// "cn/ccx"
|
||||
const static uint8_t test_output_ccx[160] = {
|
||||
const static uint8_t test_output_ccx[256] = {
|
||||
0xB3, 0xA1, 0x67, 0x86, 0xD2, 0xC9, 0x85, 0xEC, 0xAD, 0xC4, 0x5F, 0x91, 0x05, 0x27, 0xC7, 0xA1,
|
||||
0x96, 0xF0, 0xE1, 0xE9, 0x7C, 0x87, 0x09, 0x38, 0x1D, 0x7D, 0x41, 0x93, 0x35, 0xF8, 0x16, 0x72,
|
||||
0xC3, 0xBD, 0x8D, 0xE8, 0xD5, 0xAE, 0xB8, 0x59, 0x0A, 0x6C, 0xCB, 0x7B, 0x41, 0x30, 0xF7, 0x04,
|
||||
@@ -246,7 +246,7 @@ const static uint8_t test_output_ccx[160] = {
|
||||
};
|
||||
|
||||
// "cn/double"
|
||||
const static uint8_t test_output_double[160] = {
|
||||
const static uint8_t test_output_double[256] = {
|
||||
0xAE, 0xFB, 0xB3, 0xF0, 0xCC, 0x88, 0x04, 0x6D, 0x11, 0x9F, 0x6C, 0x54, 0xB9, 0x6D, 0x90, 0xC9,
|
||||
0xE8, 0x84, 0xEA, 0x3B, 0x59, 0x83, 0xA6, 0x0D, 0x50, 0xA4, 0x2D, 0x7D, 0x3E, 0xBE, 0x48, 0x21,
|
||||
0x49, 0xCE, 0x8E, 0xF3, 0xBC, 0x8A, 0x36, 0xBF, 0x86, 0x37, 0x89, 0x55, 0x09, 0xBA, 0x22, 0xF8,
|
||||
@@ -261,7 +261,7 @@ const static uint8_t test_output_double[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_LITE
|
||||
// "cn-lite/0"
|
||||
const static uint8_t test_output_v0_lite[160] = {
|
||||
const static uint8_t test_output_v0_lite[256] = {
|
||||
0x36, 0x95, 0xB4, 0xB5, 0x3B, 0xB0, 0x03, 0x58, 0xB0, 0xAD, 0x38, 0xDC, 0x16, 0x0F, 0xEB, 0x9E,
|
||||
0x00, 0x4E, 0xEC, 0xE0, 0x9B, 0x83, 0xA7, 0x2E, 0xF6, 0xBA, 0x98, 0x64, 0xD3, 0x51, 0x0C, 0x88,
|
||||
0x28, 0xA2, 0x2B, 0xAD, 0x3F, 0x93, 0xD1, 0x40, 0x8F, 0xCA, 0x47, 0x2E, 0xB5, 0xAD, 0x1C, 0xBE,
|
||||
@@ -276,7 +276,7 @@ const static uint8_t test_output_v0_lite[160] = {
|
||||
|
||||
|
||||
// "cn-lite/1" AEON v7
|
||||
const static uint8_t test_output_v1_lite[160] = {
|
||||
const static uint8_t test_output_v1_lite[256] = {
|
||||
0x6D, 0x8C, 0xDC, 0x44, 0x4E, 0x9B, 0xBB, 0xFD, 0x68, 0xFC, 0x43, 0xFC, 0xD4, 0x85, 0x5B, 0x22,
|
||||
0x8C, 0x8A, 0x1B, 0xD9, 0x1D, 0x9D, 0x00, 0x28, 0x5B, 0xEC, 0x02, 0xB7, 0xCA, 0x2D, 0x67, 0x41,
|
||||
0x87, 0xC4, 0xE5, 0x70, 0x65, 0x3E, 0xB4, 0xC2, 0xB4, 0x2B, 0x7A, 0x0D, 0x54, 0x65, 0x59, 0x45,
|
||||
@@ -293,7 +293,7 @@ const static uint8_t test_output_v1_lite[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// "cn-heavy/0"
|
||||
const static uint8_t test_output_v0_heavy[160] = {
|
||||
const static uint8_t test_output_v0_heavy[256] = {
|
||||
0x99, 0x83, 0xF2, 0x1B, 0xDF, 0x20, 0x10, 0xA8, 0xD7, 0x07, 0xBB, 0x2F, 0x14, 0xD7, 0x86, 0x64,
|
||||
0xBB, 0xE1, 0x18, 0x7F, 0x55, 0x01, 0x4B, 0x39, 0xE5, 0xF3, 0xD6, 0x93, 0x28, 0xE4, 0x8F, 0xC2,
|
||||
0x4D, 0x94, 0x7D, 0xD6, 0xDB, 0x6E, 0x07, 0x48, 0x26, 0x4A, 0x51, 0x2E, 0xAC, 0xF3, 0x25, 0x4A,
|
||||
@@ -308,7 +308,7 @@ const static uint8_t test_output_v0_heavy[160] = {
|
||||
|
||||
|
||||
// "cn-heavy/xhv"
|
||||
const static uint8_t test_output_xhv_heavy[160] = {
|
||||
const static uint8_t test_output_xhv_heavy[256] = {
|
||||
0x5A, 0xC3, 0xF7, 0x85, 0xC4, 0x90, 0xC5, 0x85, 0x50, 0xEC, 0x95, 0xD2, 0x72, 0x65, 0x63, 0x57,
|
||||
0x7E, 0x7C, 0x1C, 0x21, 0x2D, 0x0C, 0xDE, 0x59, 0x12, 0x73, 0x20, 0x1E, 0x44, 0xFD, 0xD5, 0xB6,
|
||||
0x1F, 0x4E, 0xB2, 0x0A, 0x36, 0x51, 0x4B, 0xF5, 0x4D, 0xC9, 0xE0, 0x90, 0x2C, 0x16, 0x47, 0x3F,
|
||||
@@ -323,7 +323,7 @@ const static uint8_t test_output_xhv_heavy[160] = {
|
||||
|
||||
|
||||
// "cn-heavy/tube"
|
||||
const static uint8_t test_output_tube_heavy[160] = {
|
||||
const static uint8_t test_output_tube_heavy[256] = {
|
||||
0xFE, 0x53, 0x35, 0x20, 0x76, 0xEA, 0xE6, 0x89, 0xFA, 0x3B, 0x4F, 0xDA, 0x61, 0x46, 0x34, 0xCF,
|
||||
0xC3, 0x12, 0xEE, 0x0C, 0x38, 0x7D, 0xF2, 0xB8, 0xB7, 0x4D, 0xA2, 0xA1, 0x59, 0x74, 0x12, 0x35,
|
||||
0xCD, 0x3F, 0x29, 0xDF, 0x07, 0x4A, 0x14, 0xAD, 0x0B, 0x98, 0x99, 0x37, 0xCA, 0x14, 0x68, 0xA3,
|
||||
@@ -340,7 +340,7 @@ const static uint8_t test_output_tube_heavy[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_PICO
|
||||
// "cn-pico/trtl"
|
||||
const static uint8_t test_output_pico_trtl[160] = {
|
||||
const static uint8_t test_output_pico_trtl[256] = {
|
||||
0x08, 0xF4, 0x21, 0xD7, 0x83, 0x31, 0x17, 0x30, 0x0E, 0xDA, 0x66, 0xE9, 0x8F, 0x4A, 0x25, 0x69,
|
||||
0x09, 0x3D, 0xF3, 0x00, 0x50, 0x01, 0x73, 0x94, 0x4E, 0xFC, 0x40, 0x1E, 0x9A, 0x4A, 0x17, 0xAF,
|
||||
0xB2, 0x17, 0x2E, 0xC9, 0x46, 0x6E, 0x1A, 0xEE, 0x70, 0xEC, 0x85, 0x72, 0xA1, 0x4C, 0x23, 0x3E,
|
||||
@@ -355,7 +355,7 @@ const static uint8_t test_output_pico_trtl[160] = {
|
||||
|
||||
|
||||
// "cn-pico/tlo"
|
||||
const static uint8_t test_output_pico_tlo[160] = {
|
||||
const static uint8_t test_output_pico_tlo[256] = {
|
||||
0x99, 0x75, 0xF2, 0xC1, 0xB3, 0xB4, 0x54, 0x34, 0xA4, 0x93, 0x86, 0x21, 0x30, 0x97, 0xF3, 0x1B,
|
||||
0xB4, 0xB9, 0xA6, 0x58, 0x6A, 0x7E, 0x81, 0xF4, 0x42, 0x9F, 0x6D, 0x5F, 0x65, 0xC3, 0x8D, 0x1A,
|
||||
0xFC, 0x67, 0xDF, 0xCC, 0xB5, 0xFC, 0x90, 0xD7, 0x85, 0x5A, 0xE9, 0x03, 0x36, 0x1E, 0xAB, 0xD7,
|
||||
@@ -372,7 +372,7 @@ const static uint8_t test_output_pico_tlo[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_CN_FEMTO
|
||||
// "cn/upx2"
|
||||
const static uint8_t test_output_femto_upx2[160] = {
|
||||
const static uint8_t test_output_femto_upx2[256] = {
|
||||
0xAA, 0xBB, 0xB8, 0xED, 0x14, 0xA8, 0x35, 0xFA, 0x22, 0xCF, 0xB1, 0xB5, 0xDE, 0xA8, 0x72, 0xB0,
|
||||
0xA1, 0xD6, 0xCB, 0xD8, 0x46, 0xF4, 0x39, 0x1C, 0x0F, 0x01, 0xF3, 0x87, 0x5E, 0x3A, 0x37, 0x61,
|
||||
0x38, 0x59, 0x15, 0x72, 0xF8, 0x20, 0xD4, 0xDE, 0x25, 0x3C, 0xF5, 0x5A, 0x21, 0x92, 0xB6, 0x22,
|
||||
@@ -389,7 +389,7 @@ const static uint8_t test_output_femto_upx2[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_ARGON2
|
||||
// "argon2/chukwa"
|
||||
const static uint8_t argon2_chukwa_test_out[160] = {
|
||||
const static uint8_t argon2_chukwa_test_out[256] = {
|
||||
0xC1, 0x58, 0xA1, 0x05, 0xAE, 0x75, 0xC7, 0x56, 0x1C, 0xFD, 0x02, 0x90, 0x83, 0xA4, 0x7A, 0x87,
|
||||
0x65, 0x3D, 0x51, 0xF9, 0x14, 0x12, 0x8E, 0x21, 0xC1, 0x97, 0x1D, 0x8B, 0x10, 0xC4, 0x90, 0x34,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@@ -403,7 +403,7 @@ const static uint8_t argon2_chukwa_test_out[160] = {
|
||||
};
|
||||
|
||||
// "argon2/chukwav2"
|
||||
const static uint8_t argon2_chukwa_v2_test_out[160] = {
|
||||
const static uint8_t argon2_chukwa_v2_test_out[256] = {
|
||||
0x77, 0xCF, 0x69, 0x58, 0xB3, 0x53, 0x6E, 0x1F, 0x9F, 0x0D, 0x1E, 0xA1, 0x65, 0xF2, 0x28, 0x11,
|
||||
0xCA, 0x7B, 0xC4, 0x87, 0xEA, 0x9F, 0x52, 0x03, 0x0B, 0x50, 0x50, 0xC1, 0x7F, 0xCD, 0xD8, 0xF5,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@@ -417,7 +417,7 @@ const static uint8_t argon2_chukwa_v2_test_out[160] = {
|
||||
};
|
||||
|
||||
// "argon2/wrkz"
|
||||
const static uint8_t argon2_wrkz_test_out[160] = {
|
||||
const static uint8_t argon2_wrkz_test_out[256] = {
|
||||
0x35, 0xE0, 0x83, 0xD4, 0xB9, 0xC6, 0x4C, 0x2A, 0x68, 0x82, 0x0A, 0x43, 0x1F, 0x61, 0x31, 0x19,
|
||||
0x98, 0xA8, 0xCD, 0x18, 0x64, 0xDB, 0xA4, 0x07, 0x7E, 0x25, 0xB7, 0xF1, 0x21, 0xD5, 0x4B, 0xD1,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@@ -434,7 +434,7 @@ const static uint8_t argon2_wrkz_test_out[160] = {
|
||||
|
||||
#ifdef XMRIG_ALGO_ASTROBWT
|
||||
// "astrobwt"
|
||||
const static uint8_t astrobwt_dero_test_out[160] = {
|
||||
const static uint8_t astrobwt_dero_test_out[256] = {
|
||||
0x7E, 0x88, 0x44, 0xF2, 0xD6, 0xB7, 0xA4, 0x34, 0x98, 0xFE, 0x6D, 0x22, 0x65, 0x27, 0x68, 0x90,
|
||||
0x23, 0xDA, 0x8A, 0x52, 0xF9, 0xFC, 0x4E, 0xC6, 0x9E, 0x5A, 0xAA, 0xA6, 0x3E, 0xDC, 0xE1, 0xC1,
|
||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
||||
@@ -450,6 +450,29 @@ const static uint8_t astrobwt_dero_test_out[160] = {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
// "GhostRider"
|
||||
const static uint8_t test_output_gr[256] = {
|
||||
0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
|
||||
0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
|
||||
0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
|
||||
0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
|
||||
0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
|
||||
0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
|
||||
0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
|
||||
0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
|
||||
0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
|
||||
0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
|
||||
0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
|
||||
0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
|
||||
0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
|
||||
0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
|
||||
0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
|
||||
0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,11 @@
|
||||
#include "crypto/cn/soft_aes.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
# include "crypto/cn/CryptoNight_x86_vaes.h"
|
||||
#endif
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "crypto/cn/c_groestl.h"
|
||||
@@ -285,23 +290,48 @@ inline constexpr uint64_t interleaved_index<0>(uint64_t k)
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||
static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
|
||||
__m128i* output = reinterpret_cast<__m128i*>(ctx->memory);
|
||||
|
||||
aes_genkey<SOFT_AES>(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
if (props.half_mem() && !ctx->first_half) {
|
||||
const __m128i* p = reinterpret_cast<const __m128i*>(ctx->save_state);
|
||||
xin0 = _mm_load_si128(p + 0);
|
||||
xin1 = _mm_load_si128(p + 1);
|
||||
xin2 = _mm_load_si128(p + 2);
|
||||
xin3 = _mm_load_si128(p + 3);
|
||||
xin4 = _mm_load_si128(p + 4);
|
||||
xin5 = _mm_load_si128(p + 5);
|
||||
xin6 = _mm_load_si128(p + 6);
|
||||
xin7 = _mm_load_si128(p + 7);
|
||||
}
|
||||
else {
|
||||
xin0 = _mm_load_si128(input + 4);
|
||||
xin1 = _mm_load_si128(input + 5);
|
||||
xin2 = _mm_load_si128(input + 6);
|
||||
xin3 = _mm_load_si128(input + 7);
|
||||
xin4 = _mm_load_si128(input + 8);
|
||||
xin5 = _mm_load_si128(input + 9);
|
||||
xin6 = _mm_load_si128(input + 10);
|
||||
xin7 = _mm_load_si128(input + 11);
|
||||
}
|
||||
|
||||
if (props.isHeavy()) {
|
||||
for (size_t i = 0; i < 16; i++) {
|
||||
@@ -320,50 +350,80 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
|
||||
if (interleave > 0) {
|
||||
_mm_prefetch((const char*)(output), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
}
|
||||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
__m128i* e = output + (N << interleave) - prefetch_dist;
|
||||
__m128i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
|
||||
|
||||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k3, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k4, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k5, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k6, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k7, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
|
||||
|
||||
_mm_store_si128(output + output_increment + 0, xin4);
|
||||
_mm_store_si128(output + output_increment + 1, xin5);
|
||||
_mm_store_si128(output + output_increment + 2, xin6);
|
||||
_mm_store_si128(output + output_increment + 3, xin7);
|
||||
_mm_store_si128(output + 0, xin0);
|
||||
_mm_store_si128(output + 1, xin1);
|
||||
_mm_store_si128(output + 2, xin2);
|
||||
_mm_store_si128(output + 3, xin3);
|
||||
|
||||
output += output_increment * 2;
|
||||
_mm_store_si128(output + output_increment + 0, xin4);
|
||||
_mm_store_si128(output + output_increment + 1, xin5);
|
||||
_mm_store_si128(output + output_increment + 2, xin6);
|
||||
_mm_store_si128(output + output_increment + 3, xin7);
|
||||
|
||||
output += output_increment * 2;
|
||||
prefetch_ptr += output_increment * 2;
|
||||
} while (output < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr = output;
|
||||
}
|
||||
|
||||
if (props.half_mem() && ctx->first_half) {
|
||||
__m128i* p = reinterpret_cast<__m128i*>(ctx->save_state);
|
||||
_mm_store_si128(p + 0, xin0);
|
||||
_mm_store_si128(p + 1, xin1);
|
||||
_mm_store_si128(p + 2, xin2);
|
||||
_mm_store_si128(p + 3, xin3);
|
||||
_mm_store_si128(p + 4, xin4);
|
||||
_mm_store_si128(p + 5, xin5);
|
||||
_mm_store_si128(p + 6, xin6);
|
||||
_mm_store_si128(p + 7, xin7);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||
static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr bool IS_HEAVY = props.isHeavy();
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m128i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i *input = reinterpret_cast<const __m128i*>(ctx->memory);
|
||||
__m128i *output = reinterpret_cast<__m128i*>(ctx->state);
|
||||
|
||||
aes_genkey<SOFT_AES>(output + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm_load_si128(output + 4);
|
||||
@@ -376,46 +436,54 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
|
||||
xout7 = _mm_load_si128(output + 11);
|
||||
|
||||
const __m128i* input_begin = input;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
|
||||
constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
|
||||
for (size_t part = 0; part < (props.half_mem() ? 2 : 1); ++part) {
|
||||
if (props.half_mem() && (part == 1)) {
|
||||
input = input_begin;
|
||||
ctx->first_half = false;
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
|
||||
|
||||
if (IS_HEAVY) {
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
constexpr int input_increment = (64 << interleave) / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm_xor_si128(_mm_load_si128(input + input_increment + 0), xout4);
|
||||
xout5 = _mm_xor_si128(_mm_load_si128(input + input_increment + 1), xout5);
|
||||
xout6 = _mm_xor_si128(_mm_load_si128(input + input_increment + 2), xout6);
|
||||
xout7 = _mm_xor_si128(_mm_load_si128(input + input_increment + 3), xout7);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k2, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k3, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k4, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k5, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k6, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k7, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k8, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
aes_round<SOFT_AES>(k9, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
|
||||
|
||||
if (IS_HEAVY) {
|
||||
mix_and_propagate(xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (IS_HEAVY) {
|
||||
input = input_begin;
|
||||
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
|
||||
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
|
||||
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
|
||||
@@ -523,6 +591,9 @@ static inline __m128i int_sqrt_v2(const uint64_t n0)
|
||||
void v4_soft_aes_compile_code(const V4_Instruction *code, int code_size, void *machine_code, xmrig::Assembly ASM);
|
||||
|
||||
|
||||
alignas(64) static const uint32_t tweak1_table[256] = { 268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,268435456,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,805306368,0,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456,805306368,268435456 };
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
@@ -541,12 +612,7 @@ static inline void cryptonight_monero_tweak(uint64_t *mem_out, const uint8_t *l,
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
uint8_t x = static_cast<uint8_t>(vh >> 24);
|
||||
static const uint16_t table = 0x7531;
|
||||
const uint8_t index = (((x >> (3)) & 6) | (x & 1)) << 1;
|
||||
vh ^= ((table >> index) & 0x3) << 28;
|
||||
|
||||
mem_out[1] = vh;
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
|
||||
}
|
||||
}
|
||||
|
||||
@@ -568,9 +634,35 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
||||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height);
|
||||
#endif
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_single_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
@@ -587,7 +679,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);
|
||||
|
||||
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint8_t *l0 = ctx[0]->memory;
|
||||
@@ -742,7 +838,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(ctx[0]);
|
||||
keccakf(h0, 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
@@ -752,13 +848,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
|
||||
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx * *ctx);
|
||||
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
@@ -795,6 +894,28 @@ extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
|
||||
extern cn_mainloop_fun cn_upx2_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_upx2_double_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_single_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_double_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_quad_mainloop_asm;
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
@@ -833,7 +954,11 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
if (ASM == Assembly::INTEL) {
|
||||
@@ -915,7 +1040,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
@@ -937,8 +1062,21 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
|
||||
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
@@ -977,8 +1115,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
|
||||
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
@@ -995,9 +1141,130 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx);
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
ctx[1]->tweak1_2 = tweak1_2_1;
|
||||
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_double_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
@@ -1029,8 +1296,21 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
VARIANT4_RANDOM_MATH_INIT(0);
|
||||
VARIANT4_RANDOM_MATH_INIT(1);
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
@@ -1225,8 +1505,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
@@ -1236,6 +1524,85 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 32 * 4);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input + size * 0, size, ctx[0]->state);
|
||||
keccak(input + size * 1, size, ctx[1]->state);
|
||||
keccak(input + size * 2, size, ctx[2]->state);
|
||||
keccak(input + size * 3, size, ctx[3]->state);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
ctx[2]->first_half = true;
|
||||
ctx[3]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
VARIANT1_INIT(0); ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
VARIANT1_INIT(1); ctx[1]->tweak1_2 = tweak1_2_1;
|
||||
VARIANT1_INIT(2); ctx[2]->tweak1_2 = tweak1_2_2;
|
||||
VARIANT1_INIT(3); ctx[3]->tweak1_2 = tweak1_2_3;
|
||||
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[2]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[3]->state), 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
extra_hashes[ctx[2]->state[0] & 3](ctx[2]->state, 200, output + 64);
|
||||
extra_hashes[ctx[3]->state[0] & 3](ctx[3]->state, 200, output + 96);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
|
||||
ptr = reinterpret_cast<__m128i*>(&l[idx & MASK]); \
|
||||
c = _mm_load_si128(ptr); \
|
||||
@@ -1371,7 +1738,10 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
@@ -1416,7 +1786,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 3; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
@@ -1426,6 +1796,27 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_quad_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
@@ -1445,7 +1836,23 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
@@ -1497,8 +1904,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
@@ -1527,7 +1947,10 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
keccak(input + size * i, size, ctx[i]->state);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
@@ -1588,7 +2011,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
||||
478
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
478
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
@@ -0,0 +1,478 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "CryptoNight_x86_vaes.h"
|
||||
#include "CryptoNight_monero.h"
|
||||
#include "CryptoNight.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
# include <x86intrin.h>
|
||||
#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
|
||||
static inline __m256i
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
|
||||
{
|
||||
return _mm256_inserti128_si256(
|
||||
_mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
|
||||
{
|
||||
_mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
|
||||
_mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static FORCEINLINE __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t rcon>
|
||||
static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = _mm256_set_m128i(xout0, xout0);
|
||||
*k1 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout2);
|
||||
*k2 = _mm256_set_m128i(xout0, xout0);
|
||||
*k3 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout2);
|
||||
*k4 = _mm256_set_m128i(xout0, xout0);
|
||||
*k5 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout2);
|
||||
*k6 = _mm256_set_m128i(xout0, xout0);
|
||||
*k7 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout2);
|
||||
*k8 = _mm256_set_m128i(xout0, xout0);
|
||||
*k9 = _mm256_set_m128i(xout2, xout2);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory1);
|
||||
__m128i xout1 = _mm_load_si128(memory1 + 1);
|
||||
__m128i xout2 = _mm_load_si128(memory2);
|
||||
__m128i xout3 = _mm_load_si128(memory2 + 1);
|
||||
*k0 = _mm256_set_m128i(xout2, xout0);
|
||||
*k1 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x01>(&xout2, &xout3);
|
||||
*k2 = _mm256_set_m128i(xout2, xout0);
|
||||
*k3 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x02>(&xout2, &xout3);
|
||||
*k4 = _mm256_set_m128i(xout2, xout0);
|
||||
*k5 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x04>(&xout2, &xout3);
|
||||
*k6 = _mm256_set_m128i(xout2, xout0);
|
||||
*k7 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x08>(&xout2, &xout3);
|
||||
*k8 = _mm256_set_m128i(xout2, xout0);
|
||||
*k9 = _mm256_set_m128i(xout3, xout1);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
|
||||
{
|
||||
x01 = _mm256_aesenc_epi128(x01, key);
|
||||
x23 = _mm256_aesenc_epi128(x23, key);
|
||||
x45 = _mm256_aesenc_epi128(x45, key);
|
||||
x67 = _mm256_aesenc_epi128(x67, key);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
|
||||
{
|
||||
x0 = _mm256_aesenc_epi128(x0, key);
|
||||
x1 = _mm256_aesenc_epi128(x1, key);
|
||||
x2 = _mm256_aesenc_epi128(x2, key);
|
||||
x3 = _mm256_aesenc_epi128(x3, key);
|
||||
x4 = _mm256_aesenc_epi128(x4, key);
|
||||
x5 = _mm256_aesenc_epi128(x5, key);
|
||||
x6 = _mm256_aesenc_epi128(x6, key);
|
||||
x7 = _mm256_aesenc_epi128(x7, key);
|
||||
}
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xin01, xin23, xin45, xin67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
|
||||
|
||||
vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
if (half_mem && !ctx->first_half) {
|
||||
const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
|
||||
xin01 = _mm256_loadu_si256(p + 0);
|
||||
xin23 = _mm256_loadu_si256(p + 1);
|
||||
xin45 = _mm256_loadu_si256(p + 2);
|
||||
xin67 = _mm256_loadu_si256(p + 3);
|
||||
}
|
||||
else {
|
||||
xin01 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 4));
|
||||
xin23 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 6));
|
||||
xin45 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 8));
|
||||
xin67 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 10));
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m256i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m256i);
|
||||
|
||||
__m256i* e = output + N - prefetch_dist;
|
||||
__m256i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k1, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k2, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k3, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k4, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k5, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k6, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k7, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k8, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k9, xin01, xin23, xin45, xin67);
|
||||
|
||||
_mm256_store_si256(output + 0, xin01);
|
||||
_mm256_store_si256(output + 1, xin23);
|
||||
|
||||
_mm256_store_si256(output + output_increment + 0, xin45);
|
||||
_mm256_store_si256(output + output_increment + 1, xin67);
|
||||
|
||||
output += output_increment * 2;
|
||||
prefetch_ptr += output_increment * 2;
|
||||
} while (output < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr = output;
|
||||
}
|
||||
|
||||
if (half_mem && ctx->first_half) {
|
||||
__m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
|
||||
_mm256_storeu_si256(p + 0, xin01);
|
||||
_mm256_storeu_si256(p + 1, xin23);
|
||||
_mm256_storeu_si256(p + 2, xin45);
|
||||
_mm256_storeu_si256(p + 3, xin67);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
|
||||
|
||||
vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
{
|
||||
const bool b = half_mem && !ctx1->first_half && !ctx2->first_half;
|
||||
const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
|
||||
const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
|
||||
xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
|
||||
xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
|
||||
xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
|
||||
xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
|
||||
xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
|
||||
xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
|
||||
xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
|
||||
xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
__m128i* e = output1 + N - prefetch_dist;
|
||||
__m128i* prefetch_ptr1 = output1 + prefetch_dist;
|
||||
__m128i* prefetch_ptr2 = output2 + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
|
||||
|
||||
output1 += output_increment * 2;
|
||||
prefetch_ptr1 += output_increment * 2;
|
||||
output2 += output_increment * 2;
|
||||
prefetch_ptr2 += output_increment * 2;
|
||||
} while (output1 < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr1 = output1;
|
||||
prefetch_ptr2 = output2;
|
||||
}
|
||||
|
||||
if (half_mem && ctx1->first_half && ctx2->first_half) {
|
||||
__m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
|
||||
__m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
|
||||
_mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
|
||||
_mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
|
||||
_mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
|
||||
_mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
|
||||
_mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xout01, xout23, xout45, xout67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->state);
|
||||
|
||||
vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout01 = _mm256_loadu_si256(output + 2);
|
||||
xout23 = _mm256_loadu_si256(output + 3);
|
||||
xout45 = _mm256_loadu_si256(output + 4);
|
||||
xout67 = _mm256_loadu_si256(output + 5);
|
||||
|
||||
const __m256i* input_begin = input;
|
||||
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
|
||||
if (half_mem && (part == 1)) {
|
||||
input = input_begin;
|
||||
ctx->first_half = false;
|
||||
cn_explode_scratchpad_vaes(ctx, memory, half_mem);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout01 = _mm256_xor_si256(xout01, input[0]);
|
||||
xout23 = _mm256_xor_si256(xout23, input[1]);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m256i);
|
||||
|
||||
xout45 = _mm256_xor_si256(xout45, input[input_increment]);
|
||||
xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 4;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k1, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k2, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k3, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k4, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k5, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k6, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k7, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k8, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k9, xout01, xout23, xout45, xout67);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_storeu_si256(output + 2, xout01);
|
||||
_mm256_storeu_si256(output + 3, xout23);
|
||||
_mm256_storeu_si256(output + 4, xout45);
|
||||
_mm256_storeu_si256(output + 5, xout67);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
|
||||
|
||||
vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
|
||||
xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
|
||||
xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
|
||||
xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
|
||||
xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
|
||||
xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
|
||||
xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
|
||||
xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
|
||||
|
||||
const __m128i* input_begin1 = input1;
|
||||
const __m128i* input_begin2 = input2;
|
||||
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
|
||||
if (half_mem && (part == 1)) {
|
||||
input1 = input_begin1;
|
||||
input2 = input_begin2;
|
||||
ctx1->first_half = false;
|
||||
ctx2->first_half = false;
|
||||
cn_explode_scratchpad_vaes_double(ctx1, ctx2, memory, half_mem);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
|
||||
xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
|
||||
xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
|
||||
xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
|
||||
xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
|
||||
xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
|
||||
xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
|
||||
|
||||
input1 += input_increment * 2;
|
||||
input2 += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
|
||||
_mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
|
||||
_mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
|
||||
_mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
|
||||
_mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
|
||||
_mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
|
||||
_mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
|
||||
_mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
} // xmrig
|
||||
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
#define XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
|
||||
|
||||
#include "crypto/cn/CnAlgo.h"
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
|
||||
void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
|
||||
void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
|
||||
void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
|
||||
|
||||
|
||||
} // xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */
|
||||
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
@@ -0,0 +1,132 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movq xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movq xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rbp
|
||||
movq xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movq xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movq xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movq xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movq r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movq QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movq QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movq rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movq xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movq xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
@@ -0,0 +1,263 @@
|
||||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movq xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movq xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movq xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movq xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movq xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movq xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movq xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movq xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movq xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movq QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movq rbx, xmm1
|
||||
movq QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movq rdi, xmm2
|
||||
movq QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movq QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movq rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movq xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movq rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movq xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movq rcx, xmm11
|
||||
movq rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movq xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movq rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movq rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movq xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
@@ -0,0 +1,66 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movq xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movq xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movq r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movq QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
||||
@@ -11,6 +11,9 @@
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv1_single_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_quad_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
@@ -19,6 +22,33 @@
|
||||
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_single_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_single_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_double_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_double_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_quad_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_quad_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
@@ -6,6 +9,27 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
|
||||
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
@@ -0,0 +1,132 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movd xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movd xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rbp
|
||||
movd xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movd xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movd xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movd xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movd r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movd QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movd QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movd rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movd xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movd xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
@@ -0,0 +1,263 @@
|
||||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movd xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movd xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movd xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movd xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movd xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movd xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movd xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movd xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movd xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movd xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movd QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movd rbx, xmm1
|
||||
movd QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movd rdi, xmm2
|
||||
movd QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movd QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movd rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movd xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movd rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movd xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movd rcx, xmm11
|
||||
movd rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movd xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movd rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movd rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movd xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
@@ -0,0 +1,66 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movd xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movd xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movd r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movd QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
||||
@@ -1,6 +1,9 @@
|
||||
#define ALIGN(x) .align 64
|
||||
.intel_syntax noprefix
|
||||
.section .text
|
||||
.global cnv1_single_mainloop_asm
|
||||
.global cnv1_double_mainloop_asm
|
||||
.global cnv1_quad_mainloop_asm
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
@@ -9,6 +12,24 @@
|
||||
.global cnv2_rwz_double_mainloop_asm
|
||||
.global cnv2_upx_double_mainloop_zen3_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm:
|
||||
#include "../cn1/cnv1_single_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm:
|
||||
#include "../cn1/cnv1_double_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm:
|
||||
#include "../cn1/cnv1_quad_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
@@ -6,28 +9,49 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ryzen.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_bulldozer_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_bulldozer_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
|
||||
ret 0
|
||||
|
||||
85
src/crypto/ghostrider/CMakeLists.txt
Normal file
85
src/crypto/ghostrider/CMakeLists.txt
Normal file
@@ -0,0 +1,85 @@
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
project(GhostRider)
|
||||
|
||||
set(HEADERS
|
||||
sph_types.h
|
||||
sph_blake.h
|
||||
sph_bmw.h
|
||||
sph_cubehash.h
|
||||
sph_echo.h
|
||||
sph_fugue.h
|
||||
sph_groestl.h
|
||||
sph_hamsi.h
|
||||
sph_jh.h
|
||||
sph_keccak.h
|
||||
sph_luffa.h
|
||||
sph_sha2.h
|
||||
sph_shabal.h
|
||||
sph_shavite.h
|
||||
sph_simd.h
|
||||
sph_skein.h
|
||||
sph_whirlpool.h
|
||||
ghostrider.h
|
||||
)
|
||||
|
||||
set(SOURCES
|
||||
sph_blake.c
|
||||
sph_bmw.c
|
||||
sph_cubehash.c
|
||||
sph_echo.c
|
||||
sph_fugue.c
|
||||
sph_groestl.c
|
||||
sph_hamsi.c
|
||||
sph_jh.c
|
||||
sph_keccak.c
|
||||
sph_luffa.c
|
||||
sph_shabal.c
|
||||
sph_shavite.c
|
||||
sph_simd.c
|
||||
sph_sha2.c
|
||||
sph_skein.c
|
||||
sph_whirlpool.c
|
||||
ghostrider.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Os -fno-tree-vrp")
|
||||
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Os -Wno-unused-const-variable")
|
||||
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
endif()
|
||||
|
||||
include_directories(.)
|
||||
include_directories(../..)
|
||||
include_directories(${UV_INCLUDE_DIR})
|
||||
|
||||
add_library(ghostrider STATIC ${HEADERS} ${SOURCES})
|
||||
39
src/crypto/ghostrider/README.md
Normal file
39
src/crypto/ghostrider/README.md
Normal file
@@ -0,0 +1,39 @@
|
||||
# GhostRider (Raptoreum) release notes
|
||||
|
||||
**XMRig** supports GhostRider algorithm starting from version **v6.16.0**.
|
||||
|
||||
No tuning is required - auto-config works well on most CPUs!
|
||||
|
||||
### Sample command line (non-SSL port)
|
||||
```
|
||||
xmrig -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS
|
||||
```
|
||||
|
||||
### Sample command line (SSL port)
|
||||
```
|
||||
xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
|
||||
```
|
||||
|
||||
You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
|
||||
|
||||
**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
|
||||
|
||||
## Performance
|
||||
|
||||
While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
|
||||
|
||||
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.
|
||||
|
||||
**Windows** (detailed results [here](https://imgur.com/a/0njIVVW))
|
||||
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.2 (MSVC build), h/s|Speedup
|
||||
-|-|-|-
|
||||
AMD Ryzen 7 4700U|632.6|733.1|+15.89%
|
||||
Intel Core i7-2600|496.4|554.6|+11.72%
|
||||
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2496.5|+1.77%
|
||||
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2337.5|+10.65%
|
||||
|
||||
**Linux (outdated)** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
|
||||
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.0 (GCC build), h/s|Speedup
|
||||
-|-|-|-
|
||||
AMD Ryzen 9 3900X|3746.51|3604.89|-3.78%
|
||||
2xIntel Xeon E5-2698v3|2563.4|2638.38|+2.925%
|
||||
392
src/crypto/ghostrider/aes_helper.c
Normal file
392
src/crypto/ghostrider/aes_helper.c
Normal file
@@ -0,0 +1,392 @@
|
||||
/* $Id: aes_helper.c 220 2010-06-09 09:21:50Z tp $ */
|
||||
/*
|
||||
* AES tables. This file is not meant to be compiled by itself; it
|
||||
* is included by some hash function implementations. It contains
|
||||
* the precomputed tables and helper macros for evaluating an AES
|
||||
* round, optionally with a final XOR with a subkey.
|
||||
*
|
||||
* By default, this file defines the tables and macros for little-endian
|
||||
* processing (i.e. it is assumed that the input bytes have been read
|
||||
* from memory and assembled with the little-endian convention). If
|
||||
* the 'AES_BIG_ENDIAN' macro is defined (to a non-zero integer value)
|
||||
* when this file is included, then the tables and macros for big-endian
|
||||
* processing are defined instead. The big-endian tables and macros have
|
||||
* names distinct from the little-endian tables and macros, hence it is
|
||||
* possible to have both simultaneously, by including this file twice
|
||||
* (with and without the AES_BIG_ENDIAN macro).
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include "sph_types.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
#if AES_BIG_ENDIAN
|
||||
|
||||
#define AESx(x) ( ((SPH_C32(x) >> 24) & SPH_C32(0x000000FF)) \
|
||||
| ((SPH_C32(x) >> 8) & SPH_C32(0x0000FF00)) \
|
||||
| ((SPH_C32(x) << 8) & SPH_C32(0x00FF0000)) \
|
||||
| ((SPH_C32(x) << 24) & SPH_C32(0xFF000000)))
|
||||
|
||||
#define AES0 AES0_BE
|
||||
#define AES1 AES1_BE
|
||||
#define AES2 AES2_BE
|
||||
#define AES3 AES3_BE
|
||||
|
||||
#define AES_ROUND_BE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
|
||||
(Y0) = AES0[((X0) >> 24) & 0xFF] \
|
||||
^ AES1[((X1) >> 16) & 0xFF] \
|
||||
^ AES2[((X2) >> 8) & 0xFF] \
|
||||
^ AES3[(X3) & 0xFF] ^ (K0); \
|
||||
(Y1) = AES0[((X1) >> 24) & 0xFF] \
|
||||
^ AES1[((X2) >> 16) & 0xFF] \
|
||||
^ AES2[((X3) >> 8) & 0xFF] \
|
||||
^ AES3[(X0) & 0xFF] ^ (K1); \
|
||||
(Y2) = AES0[((X2) >> 24) & 0xFF] \
|
||||
^ AES1[((X3) >> 16) & 0xFF] \
|
||||
^ AES2[((X0) >> 8) & 0xFF] \
|
||||
^ AES3[(X1) & 0xFF] ^ (K2); \
|
||||
(Y3) = AES0[((X3) >> 24) & 0xFF] \
|
||||
^ AES1[((X0) >> 16) & 0xFF] \
|
||||
^ AES2[((X1) >> 8) & 0xFF] \
|
||||
^ AES3[(X2) & 0xFF] ^ (K3); \
|
||||
} while (0)
|
||||
|
||||
#define AES_ROUND_NOKEY_BE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
|
||||
AES_ROUND_BE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
|
||||
|
||||
#else
|
||||
|
||||
#define AESx(x) SPH_C32(x)
|
||||
#define AES0 AES0_LE
|
||||
#define AES1 AES1_LE
|
||||
#define AES2 AES2_LE
|
||||
#define AES3 AES3_LE
|
||||
|
||||
#define AES_ROUND_LE(X0, X1, X2, X3, K0, K1, K2, K3, Y0, Y1, Y2, Y3) do { \
|
||||
(Y0) = AES0[(X0) & 0xFF] \
|
||||
^ AES1[((X1) >> 8) & 0xFF] \
|
||||
^ AES2[((X2) >> 16) & 0xFF] \
|
||||
^ AES3[((X3) >> 24) & 0xFF] ^ (K0); \
|
||||
(Y1) = AES0[(X1) & 0xFF] \
|
||||
^ AES1[((X2) >> 8) & 0xFF] \
|
||||
^ AES2[((X3) >> 16) & 0xFF] \
|
||||
^ AES3[((X0) >> 24) & 0xFF] ^ (K1); \
|
||||
(Y2) = AES0[(X2) & 0xFF] \
|
||||
^ AES1[((X3) >> 8) & 0xFF] \
|
||||
^ AES2[((X0) >> 16) & 0xFF] \
|
||||
^ AES3[((X1) >> 24) & 0xFF] ^ (K2); \
|
||||
(Y3) = AES0[(X3) & 0xFF] \
|
||||
^ AES1[((X0) >> 8) & 0xFF] \
|
||||
^ AES2[((X1) >> 16) & 0xFF] \
|
||||
^ AES3[((X2) >> 24) & 0xFF] ^ (K3); \
|
||||
} while (0)
|
||||
|
||||
#define AES_ROUND_NOKEY_LE(X0, X1, X2, X3, Y0, Y1, Y2, Y3) \
|
||||
AES_ROUND_LE(X0, X1, X2, X3, 0, 0, 0, 0, Y0, Y1, Y2, Y3)
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The AES*[] tables allow us to perform a fast evaluation of an AES
|
||||
* round; table AESi[] combines SubBytes for a byte at row i, and
|
||||
* MixColumns for the column where that byte goes after ShiftRows.
|
||||
*/
|
||||
|
||||
static const sph_u32 AES0[256] = {
|
||||
AESx(0xA56363C6), AESx(0x847C7CF8), AESx(0x997777EE), AESx(0x8D7B7BF6),
|
||||
AESx(0x0DF2F2FF), AESx(0xBD6B6BD6), AESx(0xB16F6FDE), AESx(0x54C5C591),
|
||||
AESx(0x50303060), AESx(0x03010102), AESx(0xA96767CE), AESx(0x7D2B2B56),
|
||||
AESx(0x19FEFEE7), AESx(0x62D7D7B5), AESx(0xE6ABAB4D), AESx(0x9A7676EC),
|
||||
AESx(0x45CACA8F), AESx(0x9D82821F), AESx(0x40C9C989), AESx(0x877D7DFA),
|
||||
AESx(0x15FAFAEF), AESx(0xEB5959B2), AESx(0xC947478E), AESx(0x0BF0F0FB),
|
||||
AESx(0xECADAD41), AESx(0x67D4D4B3), AESx(0xFDA2A25F), AESx(0xEAAFAF45),
|
||||
AESx(0xBF9C9C23), AESx(0xF7A4A453), AESx(0x967272E4), AESx(0x5BC0C09B),
|
||||
AESx(0xC2B7B775), AESx(0x1CFDFDE1), AESx(0xAE93933D), AESx(0x6A26264C),
|
||||
AESx(0x5A36366C), AESx(0x413F3F7E), AESx(0x02F7F7F5), AESx(0x4FCCCC83),
|
||||
AESx(0x5C343468), AESx(0xF4A5A551), AESx(0x34E5E5D1), AESx(0x08F1F1F9),
|
||||
AESx(0x937171E2), AESx(0x73D8D8AB), AESx(0x53313162), AESx(0x3F15152A),
|
||||
AESx(0x0C040408), AESx(0x52C7C795), AESx(0x65232346), AESx(0x5EC3C39D),
|
||||
AESx(0x28181830), AESx(0xA1969637), AESx(0x0F05050A), AESx(0xB59A9A2F),
|
||||
AESx(0x0907070E), AESx(0x36121224), AESx(0x9B80801B), AESx(0x3DE2E2DF),
|
||||
AESx(0x26EBEBCD), AESx(0x6927274E), AESx(0xCDB2B27F), AESx(0x9F7575EA),
|
||||
AESx(0x1B090912), AESx(0x9E83831D), AESx(0x742C2C58), AESx(0x2E1A1A34),
|
||||
AESx(0x2D1B1B36), AESx(0xB26E6EDC), AESx(0xEE5A5AB4), AESx(0xFBA0A05B),
|
||||
AESx(0xF65252A4), AESx(0x4D3B3B76), AESx(0x61D6D6B7), AESx(0xCEB3B37D),
|
||||
AESx(0x7B292952), AESx(0x3EE3E3DD), AESx(0x712F2F5E), AESx(0x97848413),
|
||||
AESx(0xF55353A6), AESx(0x68D1D1B9), AESx(0x00000000), AESx(0x2CEDEDC1),
|
||||
AESx(0x60202040), AESx(0x1FFCFCE3), AESx(0xC8B1B179), AESx(0xED5B5BB6),
|
||||
AESx(0xBE6A6AD4), AESx(0x46CBCB8D), AESx(0xD9BEBE67), AESx(0x4B393972),
|
||||
AESx(0xDE4A4A94), AESx(0xD44C4C98), AESx(0xE85858B0), AESx(0x4ACFCF85),
|
||||
AESx(0x6BD0D0BB), AESx(0x2AEFEFC5), AESx(0xE5AAAA4F), AESx(0x16FBFBED),
|
||||
AESx(0xC5434386), AESx(0xD74D4D9A), AESx(0x55333366), AESx(0x94858511),
|
||||
AESx(0xCF45458A), AESx(0x10F9F9E9), AESx(0x06020204), AESx(0x817F7FFE),
|
||||
AESx(0xF05050A0), AESx(0x443C3C78), AESx(0xBA9F9F25), AESx(0xE3A8A84B),
|
||||
AESx(0xF35151A2), AESx(0xFEA3A35D), AESx(0xC0404080), AESx(0x8A8F8F05),
|
||||
AESx(0xAD92923F), AESx(0xBC9D9D21), AESx(0x48383870), AESx(0x04F5F5F1),
|
||||
AESx(0xDFBCBC63), AESx(0xC1B6B677), AESx(0x75DADAAF), AESx(0x63212142),
|
||||
AESx(0x30101020), AESx(0x1AFFFFE5), AESx(0x0EF3F3FD), AESx(0x6DD2D2BF),
|
||||
AESx(0x4CCDCD81), AESx(0x140C0C18), AESx(0x35131326), AESx(0x2FECECC3),
|
||||
AESx(0xE15F5FBE), AESx(0xA2979735), AESx(0xCC444488), AESx(0x3917172E),
|
||||
AESx(0x57C4C493), AESx(0xF2A7A755), AESx(0x827E7EFC), AESx(0x473D3D7A),
|
||||
AESx(0xAC6464C8), AESx(0xE75D5DBA), AESx(0x2B191932), AESx(0x957373E6),
|
||||
AESx(0xA06060C0), AESx(0x98818119), AESx(0xD14F4F9E), AESx(0x7FDCDCA3),
|
||||
AESx(0x66222244), AESx(0x7E2A2A54), AESx(0xAB90903B), AESx(0x8388880B),
|
||||
AESx(0xCA46468C), AESx(0x29EEEEC7), AESx(0xD3B8B86B), AESx(0x3C141428),
|
||||
AESx(0x79DEDEA7), AESx(0xE25E5EBC), AESx(0x1D0B0B16), AESx(0x76DBDBAD),
|
||||
AESx(0x3BE0E0DB), AESx(0x56323264), AESx(0x4E3A3A74), AESx(0x1E0A0A14),
|
||||
AESx(0xDB494992), AESx(0x0A06060C), AESx(0x6C242448), AESx(0xE45C5CB8),
|
||||
AESx(0x5DC2C29F), AESx(0x6ED3D3BD), AESx(0xEFACAC43), AESx(0xA66262C4),
|
||||
AESx(0xA8919139), AESx(0xA4959531), AESx(0x37E4E4D3), AESx(0x8B7979F2),
|
||||
AESx(0x32E7E7D5), AESx(0x43C8C88B), AESx(0x5937376E), AESx(0xB76D6DDA),
|
||||
AESx(0x8C8D8D01), AESx(0x64D5D5B1), AESx(0xD24E4E9C), AESx(0xE0A9A949),
|
||||
AESx(0xB46C6CD8), AESx(0xFA5656AC), AESx(0x07F4F4F3), AESx(0x25EAEACF),
|
||||
AESx(0xAF6565CA), AESx(0x8E7A7AF4), AESx(0xE9AEAE47), AESx(0x18080810),
|
||||
AESx(0xD5BABA6F), AESx(0x887878F0), AESx(0x6F25254A), AESx(0x722E2E5C),
|
||||
AESx(0x241C1C38), AESx(0xF1A6A657), AESx(0xC7B4B473), AESx(0x51C6C697),
|
||||
AESx(0x23E8E8CB), AESx(0x7CDDDDA1), AESx(0x9C7474E8), AESx(0x211F1F3E),
|
||||
AESx(0xDD4B4B96), AESx(0xDCBDBD61), AESx(0x868B8B0D), AESx(0x858A8A0F),
|
||||
AESx(0x907070E0), AESx(0x423E3E7C), AESx(0xC4B5B571), AESx(0xAA6666CC),
|
||||
AESx(0xD8484890), AESx(0x05030306), AESx(0x01F6F6F7), AESx(0x120E0E1C),
|
||||
AESx(0xA36161C2), AESx(0x5F35356A), AESx(0xF95757AE), AESx(0xD0B9B969),
|
||||
AESx(0x91868617), AESx(0x58C1C199), AESx(0x271D1D3A), AESx(0xB99E9E27),
|
||||
AESx(0x38E1E1D9), AESx(0x13F8F8EB), AESx(0xB398982B), AESx(0x33111122),
|
||||
AESx(0xBB6969D2), AESx(0x70D9D9A9), AESx(0x898E8E07), AESx(0xA7949433),
|
||||
AESx(0xB69B9B2D), AESx(0x221E1E3C), AESx(0x92878715), AESx(0x20E9E9C9),
|
||||
AESx(0x49CECE87), AESx(0xFF5555AA), AESx(0x78282850), AESx(0x7ADFDFA5),
|
||||
AESx(0x8F8C8C03), AESx(0xF8A1A159), AESx(0x80898909), AESx(0x170D0D1A),
|
||||
AESx(0xDABFBF65), AESx(0x31E6E6D7), AESx(0xC6424284), AESx(0xB86868D0),
|
||||
AESx(0xC3414182), AESx(0xB0999929), AESx(0x772D2D5A), AESx(0x110F0F1E),
|
||||
AESx(0xCBB0B07B), AESx(0xFC5454A8), AESx(0xD6BBBB6D), AESx(0x3A16162C)
|
||||
};
|
||||
|
||||
static const sph_u32 AES1[256] = {
|
||||
AESx(0x6363C6A5), AESx(0x7C7CF884), AESx(0x7777EE99), AESx(0x7B7BF68D),
|
||||
AESx(0xF2F2FF0D), AESx(0x6B6BD6BD), AESx(0x6F6FDEB1), AESx(0xC5C59154),
|
||||
AESx(0x30306050), AESx(0x01010203), AESx(0x6767CEA9), AESx(0x2B2B567D),
|
||||
AESx(0xFEFEE719), AESx(0xD7D7B562), AESx(0xABAB4DE6), AESx(0x7676EC9A),
|
||||
AESx(0xCACA8F45), AESx(0x82821F9D), AESx(0xC9C98940), AESx(0x7D7DFA87),
|
||||
AESx(0xFAFAEF15), AESx(0x5959B2EB), AESx(0x47478EC9), AESx(0xF0F0FB0B),
|
||||
AESx(0xADAD41EC), AESx(0xD4D4B367), AESx(0xA2A25FFD), AESx(0xAFAF45EA),
|
||||
AESx(0x9C9C23BF), AESx(0xA4A453F7), AESx(0x7272E496), AESx(0xC0C09B5B),
|
||||
AESx(0xB7B775C2), AESx(0xFDFDE11C), AESx(0x93933DAE), AESx(0x26264C6A),
|
||||
AESx(0x36366C5A), AESx(0x3F3F7E41), AESx(0xF7F7F502), AESx(0xCCCC834F),
|
||||
AESx(0x3434685C), AESx(0xA5A551F4), AESx(0xE5E5D134), AESx(0xF1F1F908),
|
||||
AESx(0x7171E293), AESx(0xD8D8AB73), AESx(0x31316253), AESx(0x15152A3F),
|
||||
AESx(0x0404080C), AESx(0xC7C79552), AESx(0x23234665), AESx(0xC3C39D5E),
|
||||
AESx(0x18183028), AESx(0x969637A1), AESx(0x05050A0F), AESx(0x9A9A2FB5),
|
||||
AESx(0x07070E09), AESx(0x12122436), AESx(0x80801B9B), AESx(0xE2E2DF3D),
|
||||
AESx(0xEBEBCD26), AESx(0x27274E69), AESx(0xB2B27FCD), AESx(0x7575EA9F),
|
||||
AESx(0x0909121B), AESx(0x83831D9E), AESx(0x2C2C5874), AESx(0x1A1A342E),
|
||||
AESx(0x1B1B362D), AESx(0x6E6EDCB2), AESx(0x5A5AB4EE), AESx(0xA0A05BFB),
|
||||
AESx(0x5252A4F6), AESx(0x3B3B764D), AESx(0xD6D6B761), AESx(0xB3B37DCE),
|
||||
AESx(0x2929527B), AESx(0xE3E3DD3E), AESx(0x2F2F5E71), AESx(0x84841397),
|
||||
AESx(0x5353A6F5), AESx(0xD1D1B968), AESx(0x00000000), AESx(0xEDEDC12C),
|
||||
AESx(0x20204060), AESx(0xFCFCE31F), AESx(0xB1B179C8), AESx(0x5B5BB6ED),
|
||||
AESx(0x6A6AD4BE), AESx(0xCBCB8D46), AESx(0xBEBE67D9), AESx(0x3939724B),
|
||||
AESx(0x4A4A94DE), AESx(0x4C4C98D4), AESx(0x5858B0E8), AESx(0xCFCF854A),
|
||||
AESx(0xD0D0BB6B), AESx(0xEFEFC52A), AESx(0xAAAA4FE5), AESx(0xFBFBED16),
|
||||
AESx(0x434386C5), AESx(0x4D4D9AD7), AESx(0x33336655), AESx(0x85851194),
|
||||
AESx(0x45458ACF), AESx(0xF9F9E910), AESx(0x02020406), AESx(0x7F7FFE81),
|
||||
AESx(0x5050A0F0), AESx(0x3C3C7844), AESx(0x9F9F25BA), AESx(0xA8A84BE3),
|
||||
AESx(0x5151A2F3), AESx(0xA3A35DFE), AESx(0x404080C0), AESx(0x8F8F058A),
|
||||
AESx(0x92923FAD), AESx(0x9D9D21BC), AESx(0x38387048), AESx(0xF5F5F104),
|
||||
AESx(0xBCBC63DF), AESx(0xB6B677C1), AESx(0xDADAAF75), AESx(0x21214263),
|
||||
AESx(0x10102030), AESx(0xFFFFE51A), AESx(0xF3F3FD0E), AESx(0xD2D2BF6D),
|
||||
AESx(0xCDCD814C), AESx(0x0C0C1814), AESx(0x13132635), AESx(0xECECC32F),
|
||||
AESx(0x5F5FBEE1), AESx(0x979735A2), AESx(0x444488CC), AESx(0x17172E39),
|
||||
AESx(0xC4C49357), AESx(0xA7A755F2), AESx(0x7E7EFC82), AESx(0x3D3D7A47),
|
||||
AESx(0x6464C8AC), AESx(0x5D5DBAE7), AESx(0x1919322B), AESx(0x7373E695),
|
||||
AESx(0x6060C0A0), AESx(0x81811998), AESx(0x4F4F9ED1), AESx(0xDCDCA37F),
|
||||
AESx(0x22224466), AESx(0x2A2A547E), AESx(0x90903BAB), AESx(0x88880B83),
|
||||
AESx(0x46468CCA), AESx(0xEEEEC729), AESx(0xB8B86BD3), AESx(0x1414283C),
|
||||
AESx(0xDEDEA779), AESx(0x5E5EBCE2), AESx(0x0B0B161D), AESx(0xDBDBAD76),
|
||||
AESx(0xE0E0DB3B), AESx(0x32326456), AESx(0x3A3A744E), AESx(0x0A0A141E),
|
||||
AESx(0x494992DB), AESx(0x06060C0A), AESx(0x2424486C), AESx(0x5C5CB8E4),
|
||||
AESx(0xC2C29F5D), AESx(0xD3D3BD6E), AESx(0xACAC43EF), AESx(0x6262C4A6),
|
||||
AESx(0x919139A8), AESx(0x959531A4), AESx(0xE4E4D337), AESx(0x7979F28B),
|
||||
AESx(0xE7E7D532), AESx(0xC8C88B43), AESx(0x37376E59), AESx(0x6D6DDAB7),
|
||||
AESx(0x8D8D018C), AESx(0xD5D5B164), AESx(0x4E4E9CD2), AESx(0xA9A949E0),
|
||||
AESx(0x6C6CD8B4), AESx(0x5656ACFA), AESx(0xF4F4F307), AESx(0xEAEACF25),
|
||||
AESx(0x6565CAAF), AESx(0x7A7AF48E), AESx(0xAEAE47E9), AESx(0x08081018),
|
||||
AESx(0xBABA6FD5), AESx(0x7878F088), AESx(0x25254A6F), AESx(0x2E2E5C72),
|
||||
AESx(0x1C1C3824), AESx(0xA6A657F1), AESx(0xB4B473C7), AESx(0xC6C69751),
|
||||
AESx(0xE8E8CB23), AESx(0xDDDDA17C), AESx(0x7474E89C), AESx(0x1F1F3E21),
|
||||
AESx(0x4B4B96DD), AESx(0xBDBD61DC), AESx(0x8B8B0D86), AESx(0x8A8A0F85),
|
||||
AESx(0x7070E090), AESx(0x3E3E7C42), AESx(0xB5B571C4), AESx(0x6666CCAA),
|
||||
AESx(0x484890D8), AESx(0x03030605), AESx(0xF6F6F701), AESx(0x0E0E1C12),
|
||||
AESx(0x6161C2A3), AESx(0x35356A5F), AESx(0x5757AEF9), AESx(0xB9B969D0),
|
||||
AESx(0x86861791), AESx(0xC1C19958), AESx(0x1D1D3A27), AESx(0x9E9E27B9),
|
||||
AESx(0xE1E1D938), AESx(0xF8F8EB13), AESx(0x98982BB3), AESx(0x11112233),
|
||||
AESx(0x6969D2BB), AESx(0xD9D9A970), AESx(0x8E8E0789), AESx(0x949433A7),
|
||||
AESx(0x9B9B2DB6), AESx(0x1E1E3C22), AESx(0x87871592), AESx(0xE9E9C920),
|
||||
AESx(0xCECE8749), AESx(0x5555AAFF), AESx(0x28285078), AESx(0xDFDFA57A),
|
||||
AESx(0x8C8C038F), AESx(0xA1A159F8), AESx(0x89890980), AESx(0x0D0D1A17),
|
||||
AESx(0xBFBF65DA), AESx(0xE6E6D731), AESx(0x424284C6), AESx(0x6868D0B8),
|
||||
AESx(0x414182C3), AESx(0x999929B0), AESx(0x2D2D5A77), AESx(0x0F0F1E11),
|
||||
AESx(0xB0B07BCB), AESx(0x5454A8FC), AESx(0xBBBB6DD6), AESx(0x16162C3A)
|
||||
};
|
||||
|
||||
static const sph_u32 AES2[256] = {
|
||||
AESx(0x63C6A563), AESx(0x7CF8847C), AESx(0x77EE9977), AESx(0x7BF68D7B),
|
||||
AESx(0xF2FF0DF2), AESx(0x6BD6BD6B), AESx(0x6FDEB16F), AESx(0xC59154C5),
|
||||
AESx(0x30605030), AESx(0x01020301), AESx(0x67CEA967), AESx(0x2B567D2B),
|
||||
AESx(0xFEE719FE), AESx(0xD7B562D7), AESx(0xAB4DE6AB), AESx(0x76EC9A76),
|
||||
AESx(0xCA8F45CA), AESx(0x821F9D82), AESx(0xC98940C9), AESx(0x7DFA877D),
|
||||
AESx(0xFAEF15FA), AESx(0x59B2EB59), AESx(0x478EC947), AESx(0xF0FB0BF0),
|
||||
AESx(0xAD41ECAD), AESx(0xD4B367D4), AESx(0xA25FFDA2), AESx(0xAF45EAAF),
|
||||
AESx(0x9C23BF9C), AESx(0xA453F7A4), AESx(0x72E49672), AESx(0xC09B5BC0),
|
||||
AESx(0xB775C2B7), AESx(0xFDE11CFD), AESx(0x933DAE93), AESx(0x264C6A26),
|
||||
AESx(0x366C5A36), AESx(0x3F7E413F), AESx(0xF7F502F7), AESx(0xCC834FCC),
|
||||
AESx(0x34685C34), AESx(0xA551F4A5), AESx(0xE5D134E5), AESx(0xF1F908F1),
|
||||
AESx(0x71E29371), AESx(0xD8AB73D8), AESx(0x31625331), AESx(0x152A3F15),
|
||||
AESx(0x04080C04), AESx(0xC79552C7), AESx(0x23466523), AESx(0xC39D5EC3),
|
||||
AESx(0x18302818), AESx(0x9637A196), AESx(0x050A0F05), AESx(0x9A2FB59A),
|
||||
AESx(0x070E0907), AESx(0x12243612), AESx(0x801B9B80), AESx(0xE2DF3DE2),
|
||||
AESx(0xEBCD26EB), AESx(0x274E6927), AESx(0xB27FCDB2), AESx(0x75EA9F75),
|
||||
AESx(0x09121B09), AESx(0x831D9E83), AESx(0x2C58742C), AESx(0x1A342E1A),
|
||||
AESx(0x1B362D1B), AESx(0x6EDCB26E), AESx(0x5AB4EE5A), AESx(0xA05BFBA0),
|
||||
AESx(0x52A4F652), AESx(0x3B764D3B), AESx(0xD6B761D6), AESx(0xB37DCEB3),
|
||||
AESx(0x29527B29), AESx(0xE3DD3EE3), AESx(0x2F5E712F), AESx(0x84139784),
|
||||
AESx(0x53A6F553), AESx(0xD1B968D1), AESx(0x00000000), AESx(0xEDC12CED),
|
||||
AESx(0x20406020), AESx(0xFCE31FFC), AESx(0xB179C8B1), AESx(0x5BB6ED5B),
|
||||
AESx(0x6AD4BE6A), AESx(0xCB8D46CB), AESx(0xBE67D9BE), AESx(0x39724B39),
|
||||
AESx(0x4A94DE4A), AESx(0x4C98D44C), AESx(0x58B0E858), AESx(0xCF854ACF),
|
||||
AESx(0xD0BB6BD0), AESx(0xEFC52AEF), AESx(0xAA4FE5AA), AESx(0xFBED16FB),
|
||||
AESx(0x4386C543), AESx(0x4D9AD74D), AESx(0x33665533), AESx(0x85119485),
|
||||
AESx(0x458ACF45), AESx(0xF9E910F9), AESx(0x02040602), AESx(0x7FFE817F),
|
||||
AESx(0x50A0F050), AESx(0x3C78443C), AESx(0x9F25BA9F), AESx(0xA84BE3A8),
|
||||
AESx(0x51A2F351), AESx(0xA35DFEA3), AESx(0x4080C040), AESx(0x8F058A8F),
|
||||
AESx(0x923FAD92), AESx(0x9D21BC9D), AESx(0x38704838), AESx(0xF5F104F5),
|
||||
AESx(0xBC63DFBC), AESx(0xB677C1B6), AESx(0xDAAF75DA), AESx(0x21426321),
|
||||
AESx(0x10203010), AESx(0xFFE51AFF), AESx(0xF3FD0EF3), AESx(0xD2BF6DD2),
|
||||
AESx(0xCD814CCD), AESx(0x0C18140C), AESx(0x13263513), AESx(0xECC32FEC),
|
||||
AESx(0x5FBEE15F), AESx(0x9735A297), AESx(0x4488CC44), AESx(0x172E3917),
|
||||
AESx(0xC49357C4), AESx(0xA755F2A7), AESx(0x7EFC827E), AESx(0x3D7A473D),
|
||||
AESx(0x64C8AC64), AESx(0x5DBAE75D), AESx(0x19322B19), AESx(0x73E69573),
|
||||
AESx(0x60C0A060), AESx(0x81199881), AESx(0x4F9ED14F), AESx(0xDCA37FDC),
|
||||
AESx(0x22446622), AESx(0x2A547E2A), AESx(0x903BAB90), AESx(0x880B8388),
|
||||
AESx(0x468CCA46), AESx(0xEEC729EE), AESx(0xB86BD3B8), AESx(0x14283C14),
|
||||
AESx(0xDEA779DE), AESx(0x5EBCE25E), AESx(0x0B161D0B), AESx(0xDBAD76DB),
|
||||
AESx(0xE0DB3BE0), AESx(0x32645632), AESx(0x3A744E3A), AESx(0x0A141E0A),
|
||||
AESx(0x4992DB49), AESx(0x060C0A06), AESx(0x24486C24), AESx(0x5CB8E45C),
|
||||
AESx(0xC29F5DC2), AESx(0xD3BD6ED3), AESx(0xAC43EFAC), AESx(0x62C4A662),
|
||||
AESx(0x9139A891), AESx(0x9531A495), AESx(0xE4D337E4), AESx(0x79F28B79),
|
||||
AESx(0xE7D532E7), AESx(0xC88B43C8), AESx(0x376E5937), AESx(0x6DDAB76D),
|
||||
AESx(0x8D018C8D), AESx(0xD5B164D5), AESx(0x4E9CD24E), AESx(0xA949E0A9),
|
||||
AESx(0x6CD8B46C), AESx(0x56ACFA56), AESx(0xF4F307F4), AESx(0xEACF25EA),
|
||||
AESx(0x65CAAF65), AESx(0x7AF48E7A), AESx(0xAE47E9AE), AESx(0x08101808),
|
||||
AESx(0xBA6FD5BA), AESx(0x78F08878), AESx(0x254A6F25), AESx(0x2E5C722E),
|
||||
AESx(0x1C38241C), AESx(0xA657F1A6), AESx(0xB473C7B4), AESx(0xC69751C6),
|
||||
AESx(0xE8CB23E8), AESx(0xDDA17CDD), AESx(0x74E89C74), AESx(0x1F3E211F),
|
||||
AESx(0x4B96DD4B), AESx(0xBD61DCBD), AESx(0x8B0D868B), AESx(0x8A0F858A),
|
||||
AESx(0x70E09070), AESx(0x3E7C423E), AESx(0xB571C4B5), AESx(0x66CCAA66),
|
||||
AESx(0x4890D848), AESx(0x03060503), AESx(0xF6F701F6), AESx(0x0E1C120E),
|
||||
AESx(0x61C2A361), AESx(0x356A5F35), AESx(0x57AEF957), AESx(0xB969D0B9),
|
||||
AESx(0x86179186), AESx(0xC19958C1), AESx(0x1D3A271D), AESx(0x9E27B99E),
|
||||
AESx(0xE1D938E1), AESx(0xF8EB13F8), AESx(0x982BB398), AESx(0x11223311),
|
||||
AESx(0x69D2BB69), AESx(0xD9A970D9), AESx(0x8E07898E), AESx(0x9433A794),
|
||||
AESx(0x9B2DB69B), AESx(0x1E3C221E), AESx(0x87159287), AESx(0xE9C920E9),
|
||||
AESx(0xCE8749CE), AESx(0x55AAFF55), AESx(0x28507828), AESx(0xDFA57ADF),
|
||||
AESx(0x8C038F8C), AESx(0xA159F8A1), AESx(0x89098089), AESx(0x0D1A170D),
|
||||
AESx(0xBF65DABF), AESx(0xE6D731E6), AESx(0x4284C642), AESx(0x68D0B868),
|
||||
AESx(0x4182C341), AESx(0x9929B099), AESx(0x2D5A772D), AESx(0x0F1E110F),
|
||||
AESx(0xB07BCBB0), AESx(0x54A8FC54), AESx(0xBB6DD6BB), AESx(0x162C3A16)
|
||||
};
|
||||
|
||||
static const sph_u32 AES3[256] = {
|
||||
AESx(0xC6A56363), AESx(0xF8847C7C), AESx(0xEE997777), AESx(0xF68D7B7B),
|
||||
AESx(0xFF0DF2F2), AESx(0xD6BD6B6B), AESx(0xDEB16F6F), AESx(0x9154C5C5),
|
||||
AESx(0x60503030), AESx(0x02030101), AESx(0xCEA96767), AESx(0x567D2B2B),
|
||||
AESx(0xE719FEFE), AESx(0xB562D7D7), AESx(0x4DE6ABAB), AESx(0xEC9A7676),
|
||||
AESx(0x8F45CACA), AESx(0x1F9D8282), AESx(0x8940C9C9), AESx(0xFA877D7D),
|
||||
AESx(0xEF15FAFA), AESx(0xB2EB5959), AESx(0x8EC94747), AESx(0xFB0BF0F0),
|
||||
AESx(0x41ECADAD), AESx(0xB367D4D4), AESx(0x5FFDA2A2), AESx(0x45EAAFAF),
|
||||
AESx(0x23BF9C9C), AESx(0x53F7A4A4), AESx(0xE4967272), AESx(0x9B5BC0C0),
|
||||
AESx(0x75C2B7B7), AESx(0xE11CFDFD), AESx(0x3DAE9393), AESx(0x4C6A2626),
|
||||
AESx(0x6C5A3636), AESx(0x7E413F3F), AESx(0xF502F7F7), AESx(0x834FCCCC),
|
||||
AESx(0x685C3434), AESx(0x51F4A5A5), AESx(0xD134E5E5), AESx(0xF908F1F1),
|
||||
AESx(0xE2937171), AESx(0xAB73D8D8), AESx(0x62533131), AESx(0x2A3F1515),
|
||||
AESx(0x080C0404), AESx(0x9552C7C7), AESx(0x46652323), AESx(0x9D5EC3C3),
|
||||
AESx(0x30281818), AESx(0x37A19696), AESx(0x0A0F0505), AESx(0x2FB59A9A),
|
||||
AESx(0x0E090707), AESx(0x24361212), AESx(0x1B9B8080), AESx(0xDF3DE2E2),
|
||||
AESx(0xCD26EBEB), AESx(0x4E692727), AESx(0x7FCDB2B2), AESx(0xEA9F7575),
|
||||
AESx(0x121B0909), AESx(0x1D9E8383), AESx(0x58742C2C), AESx(0x342E1A1A),
|
||||
AESx(0x362D1B1B), AESx(0xDCB26E6E), AESx(0xB4EE5A5A), AESx(0x5BFBA0A0),
|
||||
AESx(0xA4F65252), AESx(0x764D3B3B), AESx(0xB761D6D6), AESx(0x7DCEB3B3),
|
||||
AESx(0x527B2929), AESx(0xDD3EE3E3), AESx(0x5E712F2F), AESx(0x13978484),
|
||||
AESx(0xA6F55353), AESx(0xB968D1D1), AESx(0x00000000), AESx(0xC12CEDED),
|
||||
AESx(0x40602020), AESx(0xE31FFCFC), AESx(0x79C8B1B1), AESx(0xB6ED5B5B),
|
||||
AESx(0xD4BE6A6A), AESx(0x8D46CBCB), AESx(0x67D9BEBE), AESx(0x724B3939),
|
||||
AESx(0x94DE4A4A), AESx(0x98D44C4C), AESx(0xB0E85858), AESx(0x854ACFCF),
|
||||
AESx(0xBB6BD0D0), AESx(0xC52AEFEF), AESx(0x4FE5AAAA), AESx(0xED16FBFB),
|
||||
AESx(0x86C54343), AESx(0x9AD74D4D), AESx(0x66553333), AESx(0x11948585),
|
||||
AESx(0x8ACF4545), AESx(0xE910F9F9), AESx(0x04060202), AESx(0xFE817F7F),
|
||||
AESx(0xA0F05050), AESx(0x78443C3C), AESx(0x25BA9F9F), AESx(0x4BE3A8A8),
|
||||
AESx(0xA2F35151), AESx(0x5DFEA3A3), AESx(0x80C04040), AESx(0x058A8F8F),
|
||||
AESx(0x3FAD9292), AESx(0x21BC9D9D), AESx(0x70483838), AESx(0xF104F5F5),
|
||||
AESx(0x63DFBCBC), AESx(0x77C1B6B6), AESx(0xAF75DADA), AESx(0x42632121),
|
||||
AESx(0x20301010), AESx(0xE51AFFFF), AESx(0xFD0EF3F3), AESx(0xBF6DD2D2),
|
||||
AESx(0x814CCDCD), AESx(0x18140C0C), AESx(0x26351313), AESx(0xC32FECEC),
|
||||
AESx(0xBEE15F5F), AESx(0x35A29797), AESx(0x88CC4444), AESx(0x2E391717),
|
||||
AESx(0x9357C4C4), AESx(0x55F2A7A7), AESx(0xFC827E7E), AESx(0x7A473D3D),
|
||||
AESx(0xC8AC6464), AESx(0xBAE75D5D), AESx(0x322B1919), AESx(0xE6957373),
|
||||
AESx(0xC0A06060), AESx(0x19988181), AESx(0x9ED14F4F), AESx(0xA37FDCDC),
|
||||
AESx(0x44662222), AESx(0x547E2A2A), AESx(0x3BAB9090), AESx(0x0B838888),
|
||||
AESx(0x8CCA4646), AESx(0xC729EEEE), AESx(0x6BD3B8B8), AESx(0x283C1414),
|
||||
AESx(0xA779DEDE), AESx(0xBCE25E5E), AESx(0x161D0B0B), AESx(0xAD76DBDB),
|
||||
AESx(0xDB3BE0E0), AESx(0x64563232), AESx(0x744E3A3A), AESx(0x141E0A0A),
|
||||
AESx(0x92DB4949), AESx(0x0C0A0606), AESx(0x486C2424), AESx(0xB8E45C5C),
|
||||
AESx(0x9F5DC2C2), AESx(0xBD6ED3D3), AESx(0x43EFACAC), AESx(0xC4A66262),
|
||||
AESx(0x39A89191), AESx(0x31A49595), AESx(0xD337E4E4), AESx(0xF28B7979),
|
||||
AESx(0xD532E7E7), AESx(0x8B43C8C8), AESx(0x6E593737), AESx(0xDAB76D6D),
|
||||
AESx(0x018C8D8D), AESx(0xB164D5D5), AESx(0x9CD24E4E), AESx(0x49E0A9A9),
|
||||
AESx(0xD8B46C6C), AESx(0xACFA5656), AESx(0xF307F4F4), AESx(0xCF25EAEA),
|
||||
AESx(0xCAAF6565), AESx(0xF48E7A7A), AESx(0x47E9AEAE), AESx(0x10180808),
|
||||
AESx(0x6FD5BABA), AESx(0xF0887878), AESx(0x4A6F2525), AESx(0x5C722E2E),
|
||||
AESx(0x38241C1C), AESx(0x57F1A6A6), AESx(0x73C7B4B4), AESx(0x9751C6C6),
|
||||
AESx(0xCB23E8E8), AESx(0xA17CDDDD), AESx(0xE89C7474), AESx(0x3E211F1F),
|
||||
AESx(0x96DD4B4B), AESx(0x61DCBDBD), AESx(0x0D868B8B), AESx(0x0F858A8A),
|
||||
AESx(0xE0907070), AESx(0x7C423E3E), AESx(0x71C4B5B5), AESx(0xCCAA6666),
|
||||
AESx(0x90D84848), AESx(0x06050303), AESx(0xF701F6F6), AESx(0x1C120E0E),
|
||||
AESx(0xC2A36161), AESx(0x6A5F3535), AESx(0xAEF95757), AESx(0x69D0B9B9),
|
||||
AESx(0x17918686), AESx(0x9958C1C1), AESx(0x3A271D1D), AESx(0x27B99E9E),
|
||||
AESx(0xD938E1E1), AESx(0xEB13F8F8), AESx(0x2BB39898), AESx(0x22331111),
|
||||
AESx(0xD2BB6969), AESx(0xA970D9D9), AESx(0x07898E8E), AESx(0x33A79494),
|
||||
AESx(0x2DB69B9B), AESx(0x3C221E1E), AESx(0x15928787), AESx(0xC920E9E9),
|
||||
AESx(0x8749CECE), AESx(0xAAFF5555), AESx(0x50782828), AESx(0xA57ADFDF),
|
||||
AESx(0x038F8C8C), AESx(0x59F8A1A1), AESx(0x09808989), AESx(0x1A170D0D),
|
||||
AESx(0x65DABFBF), AESx(0xD731E6E6), AESx(0x84C64242), AESx(0xD0B86868),
|
||||
AESx(0x82C34141), AESx(0x29B09999), AESx(0x5A772D2D), AESx(0x1E110F0F),
|
||||
AESx(0x7BCBB0B0), AESx(0xA8FC5454), AESx(0x6DD6BBBB), AESx(0x2C3A1616)
|
||||
};
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
832
src/crypto/ghostrider/ghostrider.cpp
Normal file
832
src/crypto/ghostrider/ghostrider.cpp
Normal file
@@ -0,0 +1,832 @@
|
||||
/* XMRig
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
|
||||
#include "ghostrider.h"
|
||||
#include "sph_blake.h"
|
||||
#include "sph_bmw.h"
|
||||
#include "sph_groestl.h"
|
||||
#include "sph_jh.h"
|
||||
#include "sph_keccak.h"
|
||||
#include "sph_skein.h"
|
||||
#include "sph_luffa.h"
|
||||
#include "sph_cubehash.h"
|
||||
#include "sph_shavite.h"
|
||||
#include "sph_simd.h"
|
||||
#include "sph_echo.h"
|
||||
#include "sph_hamsi.h"
|
||||
#include "sph_fugue.h"
|
||||
#include "sph_shabal.h"
|
||||
#include "sph_whirlpool.h"
|
||||
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "crypto/cn/CnHash.h"
|
||||
#include "crypto/cn/CnCtx.h"
|
||||
#include "crypto/cn/CryptoNight.h"
|
||||
#include "crypto/common/VirtualMemory.h"
|
||||
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <uv.h>
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "backend/cpu/platform/HwlocCpuInfo.h"
|
||||
#include <hwloc.h>
|
||||
#endif
|
||||
|
||||
#if defined(XMRIG_ARM)
|
||||
# include "crypto/cn/sse2neon.h"
|
||||
#elif defined(__GNUC__)
|
||||
# include <x86intrin.h>
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
#define CORE_HASH(i, x) static void h##i(const uint8_t* data, size_t size, uint8_t* output) \
|
||||
{ \
|
||||
sph_##x##_context ctx; \
|
||||
sph_##x##_init(&ctx); \
|
||||
sph_##x(&ctx, data, size); \
|
||||
sph_##x##_close(&ctx, output); \
|
||||
}
|
||||
|
||||
CORE_HASH( 0, blake512 );
|
||||
CORE_HASH( 1, bmw512 );
|
||||
CORE_HASH( 2, groestl512 );
|
||||
CORE_HASH( 3, jh512 );
|
||||
CORE_HASH( 4, keccak512 );
|
||||
CORE_HASH( 5, skein512 );
|
||||
CORE_HASH( 6, luffa512 );
|
||||
CORE_HASH( 7, cubehash512);
|
||||
CORE_HASH( 8, shavite512 );
|
||||
CORE_HASH( 9, simd512 );
|
||||
CORE_HASH(10, echo512 );
|
||||
CORE_HASH(11, hamsi512 );
|
||||
CORE_HASH(12, fugue512 );
|
||||
CORE_HASH(13, shabal512 );
|
||||
CORE_HASH(14, whirlpool );
|
||||
|
||||
#undef CORE_HASH
|
||||
|
||||
typedef void (*core_hash_func)(const uint8_t* data, size_t size, uint8_t* output);
|
||||
static const core_hash_func core_hash[15] = { h0, h1, h2, h3, h4, h5, h6, h7, h8, h9, h10, h11, h12, h13, h14 };
|
||||
|
||||
namespace xmrig
|
||||
{
|
||||
|
||||
|
||||
static constexpr Algorithm::Id cn_hash[6] = {
|
||||
Algorithm::CN_GR_0,
|
||||
Algorithm::CN_GR_1,
|
||||
Algorithm::CN_GR_2,
|
||||
Algorithm::CN_GR_3,
|
||||
Algorithm::CN_GR_4,
|
||||
Algorithm::CN_GR_5,
|
||||
};
|
||||
|
||||
static constexpr const char* cn_names[6] = {
|
||||
"cn/dark (512 KB)",
|
||||
"cn/dark-lite (256 KB)",
|
||||
"cn/fast (2 MB)",
|
||||
"cn/lite (1 MB)",
|
||||
"cn/turtle (256 KB)",
|
||||
"cn/turtle-lite (128 KB)",
|
||||
};
|
||||
|
||||
static constexpr size_t cn_sizes[6] = {
|
||||
Algorithm::l3(Algorithm::CN_GR_0), // 512 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_1) / 2, // 256 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_2), // 2 MB
|
||||
Algorithm::l3(Algorithm::CN_GR_3), // 1 MB
|
||||
Algorithm::l3(Algorithm::CN_GR_4), // 256 KB
|
||||
Algorithm::l3(Algorithm::CN_GR_5) / 2, // 128 KB
|
||||
};
|
||||
|
||||
static constexpr CnHash::AlgoVariant av_hw_aes[5] = { CnHash::AV_SINGLE, CnHash::AV_SINGLE, CnHash::AV_DOUBLE, CnHash::AV_TRIPLE, CnHash::AV_QUAD };
|
||||
static constexpr CnHash::AlgoVariant av_soft_aes[5] = { CnHash::AV_SINGLE_SOFT, CnHash::AV_SINGLE_SOFT, CnHash::AV_DOUBLE_SOFT, CnHash::AV_TRIPLE_SOFT, CnHash::AV_QUAD_SOFT };
|
||||
|
||||
template<size_t N>
|
||||
static inline void select_indices(uint32_t (&indices)[N], const uint8_t* seed)
|
||||
{
|
||||
bool selected[N] = {};
|
||||
|
||||
uint32_t k = 0;
|
||||
for (uint32_t i = 0; i < 64; ++i) {
|
||||
const uint8_t index = ((seed[i / 2] >> ((i & 1) * 4)) & 0xF) % N;
|
||||
if (!selected[index]) {
|
||||
selected[index] = true;
|
||||
indices[k++] = index;
|
||||
if (k >= N) {
|
||||
return;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (uint32_t i = 0; i < N; ++i) {
|
||||
if (!selected[i]) {
|
||||
indices[k++] = i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
namespace ghostrider
|
||||
{
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
static struct AlgoTune
|
||||
{
|
||||
double hashrate = 0.0;
|
||||
uint32_t step = 1;
|
||||
uint32_t threads = 1;
|
||||
} tuneDefault[6], tune8MB[6];
|
||||
|
||||
|
||||
struct HelperThread
|
||||
{
|
||||
HelperThread(hwloc_bitmap_t cpu_set, bool is8MB) : m_cpuSet(cpu_set), m_is8MB(is8MB)
|
||||
{
|
||||
uv_mutex_init(&m_mutex);
|
||||
uv_cond_init(&m_cond);
|
||||
|
||||
m_thread = new std::thread(&HelperThread::run, this);
|
||||
do {
|
||||
std::this_thread::sleep_for(std::chrono::milliseconds(1));
|
||||
} while (!m_ready);
|
||||
}
|
||||
|
||||
~HelperThread()
|
||||
{
|
||||
uv_mutex_lock(&m_mutex);
|
||||
m_finished = true;
|
||||
uv_cond_signal(&m_cond);
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
|
||||
m_thread->join();
|
||||
delete m_thread;
|
||||
|
||||
uv_mutex_destroy(&m_mutex);
|
||||
uv_cond_destroy(&m_cond);
|
||||
|
||||
hwloc_bitmap_free(m_cpuSet);
|
||||
}
|
||||
|
||||
struct TaskBase
|
||||
{
|
||||
virtual ~TaskBase() {}
|
||||
virtual void run() = 0;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
struct Task : TaskBase
|
||||
{
|
||||
inline Task(T&& task) : m_task(std::move(task))
|
||||
{
|
||||
static_assert(sizeof(Task) <= 128, "Task struct is too large");
|
||||
}
|
||||
|
||||
void run() override
|
||||
{
|
||||
m_task();
|
||||
this->~Task();
|
||||
}
|
||||
|
||||
T m_task;
|
||||
};
|
||||
|
||||
template<typename T>
|
||||
inline void launch_task(T&& task)
|
||||
{
|
||||
uv_mutex_lock(&m_mutex);
|
||||
new (&m_tasks[m_numTasks++]) Task<T>(std::move(task));
|
||||
uv_cond_signal(&m_cond);
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
}
|
||||
|
||||
inline void wait() const
|
||||
{
|
||||
while (m_numTasks) {
|
||||
_mm_pause();
|
||||
}
|
||||
}
|
||||
|
||||
void run()
|
||||
{
|
||||
if (hwloc_bitmap_weight(m_cpuSet) > 0) {
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
if (hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD | HWLOC_CPUBIND_STRICT) < 0) {
|
||||
hwloc_set_cpubind(topology, m_cpuSet, HWLOC_CPUBIND_THREAD);
|
||||
}
|
||||
}
|
||||
|
||||
uv_mutex_lock(&m_mutex);
|
||||
m_ready = true;
|
||||
|
||||
do {
|
||||
uv_cond_wait(&m_cond, &m_mutex);
|
||||
|
||||
const uint32_t n = m_numTasks;
|
||||
if (n > 0) {
|
||||
for (uint32_t i = 0; i < n; ++i) {
|
||||
reinterpret_cast<TaskBase*>(&m_tasks[i])->run();
|
||||
}
|
||||
std::atomic_thread_fence(std::memory_order_seq_cst);
|
||||
m_numTasks = 0;
|
||||
}
|
||||
} while (!m_finished);
|
||||
|
||||
uv_mutex_unlock(&m_mutex);
|
||||
}
|
||||
|
||||
uv_mutex_t m_mutex;
|
||||
uv_cond_t m_cond;
|
||||
|
||||
alignas(16) uint8_t m_tasks[4][128] = {};
|
||||
volatile uint32_t m_numTasks = 0;
|
||||
volatile bool m_ready = false;
|
||||
volatile bool m_finished = false;
|
||||
hwloc_bitmap_t m_cpuSet = {};
|
||||
bool m_is8MB = false;
|
||||
|
||||
std::thread* m_thread = nullptr;
|
||||
};
|
||||
|
||||
|
||||
void benchmark()
|
||||
{
|
||||
#ifndef XMRIG_ARM
|
||||
static std::atomic<int> done{ 0 };
|
||||
if (done.exchange(1)) {
|
||||
return;
|
||||
}
|
||||
|
||||
std::thread t([]() {
|
||||
// Try to avoid CPU core 0 because many system threads use it and can interfere
|
||||
uint32_t thread_index1 = (Cpu::info()->threads() > 2) ? 2 : 0;
|
||||
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
hwloc_obj_t pu = hwloc_get_pu_obj_by_os_index(topology, thread_index1);
|
||||
hwloc_obj_t pu2;
|
||||
hwloc_get_closest_objs(topology, pu, &pu2, 1);
|
||||
uint32_t thread_index2 = pu2->os_index;
|
||||
|
||||
if (thread_index2 < thread_index1) {
|
||||
std::swap(thread_index1, thread_index2);
|
||||
}
|
||||
|
||||
Platform::setThreadAffinity(thread_index1);
|
||||
|
||||
constexpr uint32_t N = 1U << 21;
|
||||
|
||||
VirtualMemory::init(0, N);
|
||||
VirtualMemory* memory = new VirtualMemory(N * 8, true, false, false);
|
||||
|
||||
// 2 MB cache per core by default
|
||||
size_t max_scratchpad_size = 1U << 21;
|
||||
|
||||
if ((Cpu::info()->L3() >> 22) > Cpu::info()->cores()) {
|
||||
// At least 1 core can run with 8 MB cache
|
||||
max_scratchpad_size = 1U << 23;
|
||||
}
|
||||
else if ((Cpu::info()->L3() >> 22) >= Cpu::info()->cores()) {
|
||||
// All cores can run with 4 MB cache
|
||||
max_scratchpad_size = 1U << 22;
|
||||
}
|
||||
|
||||
LOG_VERBOSE("Running GhostRider benchmark on logical CPUs %u and %u (max scratchpad size %zu MB, huge pages %s)", thread_index1, thread_index2, max_scratchpad_size >> 20, memory->isHugePages() ? "on" : "off");
|
||||
|
||||
cryptonight_ctx* ctx[8];
|
||||
CnCtx::create(ctx, memory->scratchpad(), N, 8);
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
uint8_t buf[80];
|
||||
uint8_t hash[32 * 8];
|
||||
|
||||
LOG_VERBOSE("%24s | N | Hashrate", "Algorithm");
|
||||
LOG_VERBOSE("-------------------------|-----|-------------");
|
||||
|
||||
for (uint32_t algo = 0; algo < 6; ++algo) {
|
||||
for (uint64_t step : { 1, 2, 4}) {
|
||||
const size_t cur_scratchpad_size = cn_sizes[algo] * step;
|
||||
if (cur_scratchpad_size > max_scratchpad_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
double start_time = Chrono::highResolutionMSecs();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
double t1 = Chrono::highResolutionMSecs();
|
||||
|
||||
// Stop after 15 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (t1 - start_time >= 15.0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
|
||||
const double dt = Chrono::highResolutionMSecs() - t1;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step * 1e3 / min_dt;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x1 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
tune8MB[algo].hashrate = hashrate;
|
||||
tune8MB[algo].step = static_cast<uint32_t>(step);
|
||||
tune8MB[algo].threads = 1;
|
||||
}
|
||||
|
||||
if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
|
||||
tuneDefault[algo].hashrate = hashrate;
|
||||
tuneDefault[algo].step = static_cast<uint32_t>(step);
|
||||
tuneDefault[algo].threads = 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_bitmap_t helper_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_set(helper_set, thread_index2);
|
||||
HelperThread* helper = new HelperThread(helper_set, false);
|
||||
|
||||
for (uint32_t algo = 0; algo < 6; ++algo) {
|
||||
for (uint64_t step : { 1, 2, 4}) {
|
||||
const size_t cur_scratchpad_size = cn_sizes[algo] * step * 2;
|
||||
if (cur_scratchpad_size > max_scratchpad_size) {
|
||||
continue;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
double start_time = Chrono::highResolutionMSecs();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
double t1 = Chrono::highResolutionMSecs();
|
||||
|
||||
// Stop after 30 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (t1 - start_time >= 30.0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
helper->launch_task([&f, &buf, &hash, &ctx, &step]() { f(buf, sizeof(buf), hash + step * 32, ctx + step, 0); });
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
helper->wait();
|
||||
|
||||
const double dt = Chrono::highResolutionMSecs() - t1;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step * 2e3 / min_dt * 1.0075;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x2 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
tune8MB[algo].hashrate = hashrate;
|
||||
tune8MB[algo].step = static_cast<uint32_t>(step);
|
||||
tune8MB[algo].threads = 2;
|
||||
}
|
||||
|
||||
if ((cur_scratchpad_size < (1U << 23)) && (hashrate > tuneDefault[algo].hashrate)) {
|
||||
tuneDefault[algo].hashrate = hashrate;
|
||||
tuneDefault[algo].step = static_cast<uint32_t>(step);
|
||||
tuneDefault[algo].threads = 2;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
delete helper;
|
||||
|
||||
CnCtx::release(ctx, 8);
|
||||
delete memory;
|
||||
});
|
||||
|
||||
t.join();
|
||||
|
||||
LOG_VERBOSE("---------------------------------------------");
|
||||
LOG_VERBOSE("| GhostRider tuning results |");
|
||||
LOG_VERBOSE("---------------------------------------------");
|
||||
|
||||
for (int algo = 0; algo < 6; ++algo) {
|
||||
LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tuneDefault[algo].step, tuneDefault[algo].threads, tuneDefault[algo].hashrate);
|
||||
if ((tune8MB[algo].step != tuneDefault[algo].step) || (tune8MB[algo].threads != tuneDefault[algo].threads)) {
|
||||
LOG_VERBOSE("%24s | %ux%u | %.2f h/s", cn_names[algo], tune8MB[algo].step, tune8MB[algo].threads, tune8MB[algo].hashrate);
|
||||
}
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
template <typename func>
|
||||
static inline bool findByType(hwloc_obj_t obj, hwloc_obj_type_t type, func lambda)
|
||||
{
|
||||
for (size_t i = 0; i < obj->arity; i++) {
|
||||
if (obj->children[i]->type == type) {
|
||||
if (lambda(obj->children[i])) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (findByType(obj->children[i], type, lambda)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities)
|
||||
{
|
||||
#ifndef XMRIG_ARM
|
||||
hwloc_bitmap_t helper_cpu_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t main_threads_set = hwloc_bitmap_alloc();
|
||||
|
||||
for (int64_t i : affinities) {
|
||||
if (i >= 0) {
|
||||
hwloc_bitmap_set(main_threads_set, i);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu_index >= 0) {
|
||||
hwloc_topology_t topology = reinterpret_cast<HwlocCpuInfo*>(Cpu::info())->topology();
|
||||
hwloc_obj_t root = hwloc_get_root_obj(topology);
|
||||
|
||||
bool is8MB = false;
|
||||
|
||||
findByType(root, HWLOC_OBJ_L3CACHE, [cpu_index, &is8MB](hwloc_obj_t obj) {
|
||||
if (!hwloc_bitmap_isset(obj->cpuset, cpu_index)) {
|
||||
return false;
|
||||
}
|
||||
|
||||
uint32_t num_cores = 0;
|
||||
findByType(obj, HWLOC_OBJ_CORE, [&num_cores](hwloc_obj_t) { ++num_cores; return false; });
|
||||
|
||||
if ((obj->attr->cache.size >> 22) > num_cores) {
|
||||
uint32_t num_8MB_cores = (obj->attr->cache.size >> 22) - num_cores;
|
||||
|
||||
is8MB = findByType(obj, HWLOC_OBJ_CORE, [cpu_index, &num_8MB_cores](hwloc_obj_t obj2) {
|
||||
if (num_8MB_cores > 0) {
|
||||
--num_8MB_cores;
|
||||
if (hwloc_bitmap_isset(obj2->cpuset, cpu_index)) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
}
|
||||
return true;
|
||||
});
|
||||
|
||||
for (auto obj_type : { HWLOC_OBJ_CORE, HWLOC_OBJ_L1CACHE, HWLOC_OBJ_L2CACHE, HWLOC_OBJ_L3CACHE }) {
|
||||
findByType(root, obj_type, [cpu_index, helper_cpu_set, main_threads_set](hwloc_obj_t obj) {
|
||||
const hwloc_cpuset_t& s = obj->cpuset;
|
||||
if (hwloc_bitmap_isset(s, cpu_index)) {
|
||||
hwloc_bitmap_andnot(helper_cpu_set, s, main_threads_set);
|
||||
if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
return false;
|
||||
});
|
||||
|
||||
if (hwloc_bitmap_weight(helper_cpu_set) > 0) {
|
||||
return new HelperThread(helper_cpu_set, is8MB);
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
return nullptr;
|
||||
}
|
||||
|
||||
|
||||
void destroy_helper_thread(HelperThread* t)
|
||||
{
|
||||
delete t;
|
||||
}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
|
||||
{
|
||||
enum { N = 8 };
|
||||
|
||||
uint8_t* ctx_memory[N];
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx_memory[i] = ctx[i]->memory;
|
||||
}
|
||||
|
||||
// PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
|
||||
uint32_t core_indices[15];
|
||||
select_indices(core_indices, data + 4);
|
||||
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, data + 4);
|
||||
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
const AlgoTune* tune = (helper && helper->m_is8MB) ? tune8MB : tuneDefault;
|
||||
|
||||
uint8_t tmp[64 * N];
|
||||
|
||||
if (helper && (tune[cn_indices[0]].threads == 2) && (tune[cn_indices[1]].threads == 2) && (tune[cn_indices[2]].threads == 2)) {
|
||||
const size_t n = N / 2;
|
||||
|
||||
helper->launch_task([n, av, data, size, &ctx_memory, ctx, &cn_indices, &core_indices, &tmp, output, tune]() {
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[4];
|
||||
|
||||
for (size_t i = n, k = 4; i < N; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 4;
|
||||
p = ctx_memory[4];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = n; j < N; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
|
||||
}
|
||||
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
});
|
||||
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[0];
|
||||
|
||||
for (size_t i = 0, k = 0; i < n; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 0;
|
||||
p = ctx_memory[0];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = 0; j < n; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
|
||||
helper->wait();
|
||||
}
|
||||
else {
|
||||
for (size_t part = 0; part < 3; ++part) {
|
||||
const AlgoTune& t = tune[cn_indices[part]];
|
||||
|
||||
// Allocate scratchpads
|
||||
{
|
||||
uint8_t* p = ctx_memory[0];
|
||||
const size_t n = N / t.threads;
|
||||
|
||||
// Thread 1
|
||||
for (size_t i = 0, k = 0; i < n; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 0;
|
||||
p = ctx_memory[0];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
|
||||
// Thread 2
|
||||
for (size_t i = n, k = 4; i < N; ++i) {
|
||||
if ((i % t.step) == 0) {
|
||||
k = 4;
|
||||
p = ctx_memory[4];
|
||||
}
|
||||
else if (p - ctx_memory[k] >= (1 << 21)) {
|
||||
++k;
|
||||
p = ctx_memory[k];
|
||||
}
|
||||
ctx[i]->memory = p;
|
||||
p += cn_sizes[cn_indices[part]];
|
||||
}
|
||||
}
|
||||
|
||||
size_t n = N;
|
||||
|
||||
if (helper && (t.threads == 2)) {
|
||||
n = N / 2;
|
||||
|
||||
helper->launch_task([data, size, n, &cn_indices, &core_indices, part, &tmp, av, &t, output, ctx]() {
|
||||
const uint8_t* input = data;
|
||||
size_t input_size = size;
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](input + j * input_size, input_size, tmp + j * 64);
|
||||
}
|
||||
input = tmp;
|
||||
input_size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = n; j < N; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx + n, 0);
|
||||
}
|
||||
|
||||
for (size_t j = n; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 5; ++i) {
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
}
|
||||
data = tmp;
|
||||
size = 64;
|
||||
}
|
||||
|
||||
auto f = CnHash::fn(cn_hash[cn_indices[part]], av[t.step], Assembly::AUTO);
|
||||
for (size_t j = 0; j < n; j += t.step) {
|
||||
f(tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
|
||||
for (size_t j = 0; j < n; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
|
||||
if (helper && (t.threads == 2)) {
|
||||
helper->wait();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
ctx[i]->memory = ctx_memory[i];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#else // XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
void benchmark() {}
|
||||
HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { return nullptr; }
|
||||
void destroy_helper_thread(HelperThread*) {}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
|
||||
{
|
||||
constexpr uint32_t N = 8;
|
||||
|
||||
// PrevBlockHash (GhostRider's seed) is stored in bytes [4; 36)
|
||||
const uint8_t* seed = data + 4;
|
||||
|
||||
uint32_t core_indices[15];
|
||||
select_indices(core_indices, seed);
|
||||
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, seed);
|
||||
|
||||
#ifdef XMRIG_ARM
|
||||
uint32_t step[6] = { 1, 1, 1, 1, 1, 1 };
|
||||
#else
|
||||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
const cn_hash_fun f[3] = {
|
||||
CnHash::fn(cn_hash[cn_indices[0]], av[step[cn_indices[0]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[1]], av[step[cn_indices[1]]], Assembly::AUTO),
|
||||
CnHash::fn(cn_hash[cn_indices[2]], av[step[cn_indices[2]]], Assembly::AUTO),
|
||||
};
|
||||
|
||||
uint8_t tmp[64 * N];
|
||||
|
||||
for (uint64_t part = 0; part < 3; ++part) {
|
||||
for (uint64_t i = 0; i < 5; ++i) {
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
core_hash[core_indices[part * 5 + i]](data + j * size, size, tmp + j * 64);
|
||||
data = tmp;
|
||||
size = 64;
|
||||
}
|
||||
}
|
||||
for (uint64_t j = 0, k = step[cn_indices[part]]; j < N; j += k) {
|
||||
f[part](tmp + j * 64, 64, output + j * 32, ctx, 0);
|
||||
}
|
||||
for (uint64_t j = 0; j < N; ++j) {
|
||||
memcpy(tmp + j * 64, output + j * 32, 32);
|
||||
memset(tmp + j * 64 + 32, 0, 32);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
#endif // XMRIG_FEATURE_HWLOC
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
52
src/crypto/ghostrider/ghostrider.h
Normal file
52
src/crypto/ghostrider/ghostrider.h
Normal file
@@ -0,0 +1,52 @@
|
||||
/* XMRig
|
||||
* Copyright 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_GR_HASH_H
|
||||
#define XMRIG_GR_HASH_H
|
||||
|
||||
|
||||
#include <cstddef>
|
||||
#include <cstdint>
|
||||
#include <vector>
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
|
||||
|
||||
namespace xmrig
|
||||
{
|
||||
|
||||
|
||||
namespace ghostrider
|
||||
{
|
||||
|
||||
|
||||
struct HelperThread;
|
||||
|
||||
void benchmark();
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
|
||||
void destroy_helper_thread(HelperThread* t);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
#endif // XMRIG_GR_HASH_H
|
||||
346
src/crypto/ghostrider/md_helper.c
Normal file
346
src/crypto/ghostrider/md_helper.c
Normal file
@@ -0,0 +1,346 @@
|
||||
/* $Id: md_helper.c 216 2010-06-08 09:46:57Z tp $ */
|
||||
/*
|
||||
* This file contains some functions which implement the external data
|
||||
* handling and padding for Merkle-Damgard hash functions which follow
|
||||
* the conventions set out by MD4 (little-endian) or SHA-1 (big-endian).
|
||||
*
|
||||
* API: this file is meant to be included, not compiled as a stand-alone
|
||||
* file. Some macros must be defined:
|
||||
* RFUN name for the round function
|
||||
* HASH "short name" for the hash function
|
||||
* BE32 defined for big-endian, 32-bit based (e.g. SHA-1)
|
||||
* LE32 defined for little-endian, 32-bit based (e.g. MD5)
|
||||
* BE64 defined for big-endian, 64-bit based (e.g. SHA-512)
|
||||
* LE64 defined for little-endian, 64-bit based (no example yet)
|
||||
* PW01 if defined, append 0x01 instead of 0x80 (for Tiger)
|
||||
* BLEN if defined, length of a message block (in bytes)
|
||||
* PLW1 if defined, length is defined on one 64-bit word only (for Tiger)
|
||||
* PLW4 if defined, length is defined on four 64-bit words (for WHIRLPOOL)
|
||||
* SVAL if defined, reference to the context state information
|
||||
*
|
||||
* BLEN is used when a message block is not 16 (32-bit or 64-bit) words:
|
||||
* this is used for instance for Tiger, which works on 64-bit words but
|
||||
* uses 512-bit message blocks (eight 64-bit words). PLW1 and PLW4 are
|
||||
* ignored if 32-bit words are used; if 64-bit words are used and PLW1 is
|
||||
* set, then only one word (64 bits) will be used to encode the input
|
||||
* message length (in bits), otherwise two words will be used (as in
|
||||
* SHA-384 and SHA-512). If 64-bit words are used and PLW4 is defined (but
|
||||
* not PLW1), four 64-bit words will be used to encode the message length
|
||||
* (in bits). Note that regardless of those settings, only 64-bit message
|
||||
* lengths are supported (in bits): messages longer than 2 Exabytes will be
|
||||
* improperly hashed (this is unlikely to happen soon: 2 Exabytes is about
|
||||
* 2 millions Terabytes, which is huge).
|
||||
*
|
||||
* If CLOSE_ONLY is defined, then this file defines only the sph_XXX_close()
|
||||
* function. This is used for Tiger2, which is identical to Tiger except
|
||||
* when it comes to the padding (Tiger2 uses the standard 0x80 byte instead
|
||||
* of the 0x01 from original Tiger).
|
||||
*
|
||||
* The RFUN function is invoked with two arguments, the first pointing to
|
||||
* aligned data (as a "const void *"), the second being state information
|
||||
* from the context structure. By default, this state information is the
|
||||
* "val" field from the context, and this field is assumed to be an array
|
||||
* of words ("sph_u32" or "sph_u64", depending on BE32/LE32/BE64/LE64).
|
||||
* from the context structure. The "val" field can have any type, except
|
||||
* for the output encoding which assumes that it is an array of "sph_u32"
|
||||
* values. By defining NO_OUTPUT, this last step is deactivated; the
|
||||
* includer code is then responsible for writing out the hash result. When
|
||||
* NO_OUTPUT is defined, the third parameter to the "close()" function is
|
||||
* ignored.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#undef SPH_XCAT
|
||||
#define SPH_XCAT(a, b) SPH_XCAT_(a, b)
|
||||
#undef SPH_XCAT_
|
||||
#define SPH_XCAT_(a, b) a ## b
|
||||
|
||||
#undef SPH_BLEN
|
||||
#undef SPH_WLEN
|
||||
#if defined BE64 || defined LE64
|
||||
#define SPH_BLEN 128U
|
||||
#define SPH_WLEN 8U
|
||||
#else
|
||||
#define SPH_BLEN 64U
|
||||
#define SPH_WLEN 4U
|
||||
#endif
|
||||
|
||||
#ifdef BLEN
|
||||
#undef SPH_BLEN
|
||||
#define SPH_BLEN BLEN
|
||||
#endif
|
||||
|
||||
#undef SPH_MAXPAD
|
||||
#if defined PLW1
|
||||
#define SPH_MAXPAD (SPH_BLEN - SPH_WLEN)
|
||||
#elif defined PLW4
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 2))
|
||||
#else
|
||||
#define SPH_MAXPAD (SPH_BLEN - (SPH_WLEN << 1))
|
||||
#endif
|
||||
|
||||
#undef SPH_VAL
|
||||
#undef SPH_NO_OUTPUT
|
||||
#ifdef SVAL
|
||||
#define SPH_VAL SVAL
|
||||
#define SPH_NO_OUTPUT 1
|
||||
#else
|
||||
#define SPH_VAL sc->val
|
||||
#endif
|
||||
|
||||
#ifndef CLOSE_ONLY
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
static void
|
||||
SPH_XCAT(HASH, _short)(void *cc, const void *data, size_t len)
|
||||
#else
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
#endif
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
size_t current;
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
clen = SPH_BLEN - current;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(sc->buf + current, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
current += clen;
|
||||
len -= clen;
|
||||
if (current == SPH_BLEN) {
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
current = 0;
|
||||
}
|
||||
#if SPH_64
|
||||
sc->count += clen;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + clen);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef SPH_UPTR
|
||||
void
|
||||
SPH_XCAT(sph_, HASH)(void *cc, const void *data, size_t len)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current;
|
||||
size_t orig_len;
|
||||
#if !SPH_64
|
||||
sph_u32 clow, clow2;
|
||||
#endif
|
||||
|
||||
if (len < (2 * SPH_BLEN)) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
if (current > 0) {
|
||||
unsigned t;
|
||||
|
||||
t = SPH_BLEN - current;
|
||||
SPH_XCAT(HASH, _short)(cc, data, t);
|
||||
data = (const unsigned char *)data + t;
|
||||
len -= t;
|
||||
}
|
||||
#if !SPH_UNALIGNED
|
||||
if (((SPH_UPTR)data & (SPH_WLEN - 1U)) != 0) {
|
||||
SPH_XCAT(HASH, _short)(cc, data, len);
|
||||
return;
|
||||
}
|
||||
#endif
|
||||
orig_len = len;
|
||||
while (len >= SPH_BLEN) {
|
||||
RFUN(data, SPH_VAL);
|
||||
len -= SPH_BLEN;
|
||||
data = (const unsigned char *)data + SPH_BLEN;
|
||||
}
|
||||
if (len > 0)
|
||||
memcpy(sc->buf, data, len);
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)orig_len;
|
||||
#else
|
||||
clow = sc->count_low;
|
||||
clow2 = SPH_T32(clow + orig_len);
|
||||
sc->count_low = clow2;
|
||||
if (clow2 < clow)
|
||||
sc->count_high ++;
|
||||
/*
|
||||
* This code handles the improbable situation where "size_t" is
|
||||
* greater than 32 bits, and yet we do not have a 64-bit type.
|
||||
*/
|
||||
orig_len >>= 12;
|
||||
orig_len >>= 10;
|
||||
orig_len >>= 10;
|
||||
sc->count_high += orig_len;
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Perform padding and produce result. The context is NOT reinitialized
|
||||
* by this function.
|
||||
*/
|
||||
static void
|
||||
SPH_XCAT(HASH, _addbits_and_close)(void *cc,
|
||||
unsigned ub, unsigned n, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(sph_, SPH_XCAT(HASH, _context)) *sc;
|
||||
unsigned current, u;
|
||||
#if !SPH_64
|
||||
sph_u32 low, high;
|
||||
#endif
|
||||
|
||||
sc = cc;
|
||||
#if SPH_64
|
||||
current = (unsigned)sc->count & (SPH_BLEN - 1U);
|
||||
#else
|
||||
current = (unsigned)sc->count_low & (SPH_BLEN - 1U);
|
||||
#endif
|
||||
#ifdef PW01
|
||||
sc->buf[current ++] = (0x100 | (ub & 0xFF)) >> (8 - n);
|
||||
#else
|
||||
{
|
||||
unsigned z;
|
||||
|
||||
z = 0x80 >> n;
|
||||
sc->buf[current ++] = ((ub & -z) | z) & 0xFF;
|
||||
}
|
||||
#endif
|
||||
if (current > SPH_MAXPAD) {
|
||||
memset(sc->buf + current, 0, SPH_BLEN - current);
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
memset(sc->buf, 0, SPH_MAXPAD);
|
||||
} else {
|
||||
memset(sc->buf + current, 0, SPH_MAXPAD - current);
|
||||
}
|
||||
#if defined BE64
|
||||
#if defined PLW1
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#elif defined PLW4
|
||||
memset(sc->buf + SPH_MAXPAD, 0, 2 * SPH_WLEN);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN,
|
||||
sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + 3 * SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD, sc->count >> 61);
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#elif defined LE64
|
||||
#if defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#elif defined PLW1
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
memset(sc->buf + SPH_MAXPAD + 2 * SPH_WLEN, 0, 2 * SPH_WLEN);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD + SPH_WLEN, sc->count >> 61);
|
||||
#endif
|
||||
#else
|
||||
#if SPH_64
|
||||
#ifdef BE32
|
||||
sph_enc64be_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#else
|
||||
sph_enc64le_aligned(sc->buf + SPH_MAXPAD,
|
||||
SPH_T64(sc->count << 3) + (sph_u64)n);
|
||||
#endif
|
||||
#else
|
||||
low = sc->count_low;
|
||||
high = SPH_T32((sc->count_high << 3) | (low >> 29));
|
||||
low = SPH_T32(low << 3) + (sph_u32)n;
|
||||
#ifdef BE32
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD, high);
|
||||
sph_enc32be(sc->buf + SPH_MAXPAD + SPH_WLEN, low);
|
||||
#else
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD, low);
|
||||
sph_enc32le(sc->buf + SPH_MAXPAD + SPH_WLEN, high);
|
||||
#endif
|
||||
#endif
|
||||
#endif
|
||||
RFUN(sc->buf, SPH_VAL);
|
||||
#ifdef SPH_NO_OUTPUT
|
||||
(void)dst;
|
||||
(void)rnum;
|
||||
(void)u;
|
||||
#else
|
||||
for (u = 0; u < rnum; u ++) {
|
||||
#if defined BE64
|
||||
sph_enc64be((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined LE64
|
||||
sph_enc64le((unsigned char *)dst + 8 * u, sc->val[u]);
|
||||
#elif defined BE32
|
||||
sph_enc32be((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#else
|
||||
sph_enc32le((unsigned char *)dst + 4 * u, sc->val[u]);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
SPH_XCAT(HASH, _close)(void *cc, void *dst, unsigned rnum)
|
||||
{
|
||||
SPH_XCAT(HASH, _addbits_and_close)(cc, 0, 0, dst, rnum);
|
||||
}
|
||||
1132
src/crypto/ghostrider/sph_blake.c
Normal file
1132
src/crypto/ghostrider/sph_blake.c
Normal file
File diff suppressed because it is too large
Load Diff
327
src/crypto/ghostrider/sph_blake.h
Normal file
327
src/crypto/ghostrider/sph_blake.h
Normal file
@@ -0,0 +1,327 @@
|
||||
/* $Id: sph_blake.h 252 2011-06-07 17:55:14Z tp $ */
|
||||
/**
|
||||
* BLAKE interface. BLAKE is a family of functions which differ by their
|
||||
* output size; this implementation defines BLAKE for output sizes 224,
|
||||
* 256, 384 and 512 bits. This implementation conforms to the "third
|
||||
* round" specification.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_blake.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BLAKE_H__
|
||||
#define SPH_BLAKE_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-224.
|
||||
*/
|
||||
#define SPH_SIZE_blake224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-256.
|
||||
*/
|
||||
#define SPH_SIZE_blake256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-384.
|
||||
*/
|
||||
#define SPH_SIZE_blake384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BLAKE-512.
|
||||
*/
|
||||
#define SPH_SIZE_blake512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 and BLAKE-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[8];
|
||||
sph_u32 S[4];
|
||||
sph_u32 T0, T1;
|
||||
#endif
|
||||
} sph_blake_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-224 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-256 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_small_context sph_blake256_context;
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 and BLAKE-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BLAKE computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BLAKE
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[8];
|
||||
sph_u64 S[4];
|
||||
sph_u64 T0, T1;
|
||||
#endif
|
||||
} sph_blake_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-384 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BLAKE-512 computations. It is
|
||||
* identical to the common <code>sph_blake_small_context</code>.
|
||||
*/
|
||||
typedef sph_blake_big_context sph_blake512_context;
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-224 context (pointer to a
|
||||
* <code>sph_blake224_context</code>)
|
||||
*/
|
||||
void sph_blake224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-256 context (pointer to a
|
||||
* <code>sph_blake256_context</code>)
|
||||
*/
|
||||
void sph_blake256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-384 context (pointer to a
|
||||
* <code>sph_blake384_context</code>)
|
||||
*/
|
||||
void sph_blake384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BLAKE-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BLAKE-512 context (pointer to a
|
||||
* <code>sph_blake512_context</code>)
|
||||
*/
|
||||
void sph_blake512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_blake512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BLAKE-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BLAKE-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_blake512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
986
src/crypto/ghostrider/sph_bmw.c
Normal file
986
src/crypto/ghostrider/sph_bmw.c
Normal file
@@ -0,0 +1,986 @@
|
||||
/* $Id: bmw.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* BMW implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include "sph_bmw.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_BMW
|
||||
#define SPH_SMALL_FOOTPRINT_BMW 1
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x00010203), SPH_C32(0x04050607),
|
||||
SPH_C32(0x08090A0B), SPH_C32(0x0C0D0E0F),
|
||||
SPH_C32(0x10111213), SPH_C32(0x14151617),
|
||||
SPH_C32(0x18191A1B), SPH_C32(0x1C1D1E1F),
|
||||
SPH_C32(0x20212223), SPH_C32(0x24252627),
|
||||
SPH_C32(0x28292A2B), SPH_C32(0x2C2D2E2F),
|
||||
SPH_C32(0x30313233), SPH_C32(0x34353637),
|
||||
SPH_C32(0x38393A3B), SPH_C32(0x3C3D3E3F)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x40414243), SPH_C32(0x44454647),
|
||||
SPH_C32(0x48494A4B), SPH_C32(0x4C4D4E4F),
|
||||
SPH_C32(0x50515253), SPH_C32(0x54555657),
|
||||
SPH_C32(0x58595A5B), SPH_C32(0x5C5D5E5F),
|
||||
SPH_C32(0x60616263), SPH_C32(0x64656667),
|
||||
SPH_C32(0x68696A6B), SPH_C32(0x6C6D6E6F),
|
||||
SPH_C32(0x70717273), SPH_C32(0x74757677),
|
||||
SPH_C32(0x78797A7B), SPH_C32(0x7C7D7E7F)
|
||||
};
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static const sph_u64 IV384[] = {
|
||||
SPH_C64(0x0001020304050607), SPH_C64(0x08090A0B0C0D0E0F),
|
||||
SPH_C64(0x1011121314151617), SPH_C64(0x18191A1B1C1D1E1F),
|
||||
SPH_C64(0x2021222324252627), SPH_C64(0x28292A2B2C2D2E2F),
|
||||
SPH_C64(0x3031323334353637), SPH_C64(0x38393A3B3C3D3E3F),
|
||||
SPH_C64(0x4041424344454647), SPH_C64(0x48494A4B4C4D4E4F),
|
||||
SPH_C64(0x5051525354555657), SPH_C64(0x58595A5B5C5D5E5F),
|
||||
SPH_C64(0x6061626364656667), SPH_C64(0x68696A6B6C6D6E6F),
|
||||
SPH_C64(0x7071727374757677), SPH_C64(0x78797A7B7C7D7E7F)
|
||||
};
|
||||
|
||||
static const sph_u64 IV512[] = {
|
||||
SPH_C64(0x8081828384858687), SPH_C64(0x88898A8B8C8D8E8F),
|
||||
SPH_C64(0x9091929394959697), SPH_C64(0x98999A9B9C9D9E9F),
|
||||
SPH_C64(0xA0A1A2A3A4A5A6A7), SPH_C64(0xA8A9AAABACADAEAF),
|
||||
SPH_C64(0xB0B1B2B3B4B5B6B7), SPH_C64(0xB8B9BABBBCBDBEBF),
|
||||
SPH_C64(0xC0C1C2C3C4C5C6C7), SPH_C64(0xC8C9CACBCCCDCECF),
|
||||
SPH_C64(0xD0D1D2D3D4D5D6D7), SPH_C64(0xD8D9DADBDCDDDEDF),
|
||||
SPH_C64(0xE0E1E2E3E4E5E6E7), SPH_C64(0xE8E9EAEBECEDEEEF),
|
||||
SPH_C64(0xF0F1F2F3F4F5F6F7), SPH_C64(0xF8F9FAFBFCFDFEFF)
|
||||
};
|
||||
|
||||
#endif
|
||||
|
||||
#define XCAT(x, y) XCAT_(x, y)
|
||||
#define XCAT_(x, y) x ## y
|
||||
|
||||
#define LPAR (
|
||||
|
||||
#define I16_16 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
|
||||
#define I16_17 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16
|
||||
#define I16_18 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
|
||||
#define I16_19 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18
|
||||
#define I16_20 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19
|
||||
#define I16_21 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20
|
||||
#define I16_22 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21
|
||||
#define I16_23 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22
|
||||
#define I16_24 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23
|
||||
#define I16_25 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24
|
||||
#define I16_26 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25
|
||||
#define I16_27 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26
|
||||
#define I16_28 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27
|
||||
#define I16_29 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28
|
||||
#define I16_30 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29
|
||||
#define I16_31 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30
|
||||
|
||||
#define M16_16 0, 1, 3, 4, 7, 10, 11
|
||||
#define M16_17 1, 2, 4, 5, 8, 11, 12
|
||||
#define M16_18 2, 3, 5, 6, 9, 12, 13
|
||||
#define M16_19 3, 4, 6, 7, 10, 13, 14
|
||||
#define M16_20 4, 5, 7, 8, 11, 14, 15
|
||||
#define M16_21 5, 6, 8, 9, 12, 15, 16
|
||||
#define M16_22 6, 7, 9, 10, 13, 0, 1
|
||||
#define M16_23 7, 8, 10, 11, 14, 1, 2
|
||||
#define M16_24 8, 9, 11, 12, 15, 2, 3
|
||||
#define M16_25 9, 10, 12, 13, 0, 3, 4
|
||||
#define M16_26 10, 11, 13, 14, 1, 4, 5
|
||||
#define M16_27 11, 12, 14, 15, 2, 5, 6
|
||||
#define M16_28 12, 13, 15, 16, 3, 6, 7
|
||||
#define M16_29 13, 14, 0, 1, 4, 7, 8
|
||||
#define M16_30 14, 15, 1, 2, 5, 8, 9
|
||||
#define M16_31 15, 16, 2, 3, 6, 9, 10
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define ss0(x) (((x) >> 1) ^ SPH_T32((x) << 3) \
|
||||
^ SPH_ROTL32(x, 4) ^ SPH_ROTL32(x, 19))
|
||||
#define ss1(x) (((x) >> 1) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 8) ^ SPH_ROTL32(x, 23))
|
||||
#define ss2(x) (((x) >> 2) ^ SPH_T32((x) << 1) \
|
||||
^ SPH_ROTL32(x, 12) ^ SPH_ROTL32(x, 25))
|
||||
#define ss3(x) (((x) >> 2) ^ SPH_T32((x) << 2) \
|
||||
^ SPH_ROTL32(x, 15) ^ SPH_ROTL32(x, 29))
|
||||
#define ss4(x) (((x) >> 1) ^ (x))
|
||||
#define ss5(x) (((x) >> 2) ^ (x))
|
||||
#define rs1(x) SPH_ROTL32(x, 3)
|
||||
#define rs2(x) SPH_ROTL32(x, 7)
|
||||
#define rs3(x) SPH_ROTL32(x, 13)
|
||||
#define rs4(x) SPH_ROTL32(x, 16)
|
||||
#define rs5(x) SPH_ROTL32(x, 19)
|
||||
#define rs6(x) SPH_ROTL32(x, 23)
|
||||
#define rs7(x) SPH_ROTL32(x, 27)
|
||||
|
||||
#define Ks(j) SPH_T32((sph_u32)(j) * SPH_C32(0x05555555))
|
||||
|
||||
#define add_elt_s(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T32(SPH_ROTL32(mf(j0m), j1m) + SPH_ROTL32(mf(j3m), j4m) \
|
||||
- SPH_ROTL32(mf(j10m), j11m) + Ks(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(ss1(qf(i0)) + ss2(qf(i1)) + ss3(qf(i2)) + ss0(qf(i3)) \
|
||||
+ ss1(qf(i4)) + ss2(qf(i5)) + ss3(qf(i6)) + ss0(qf(i7)) \
|
||||
+ ss1(qf(i8)) + ss2(qf(i9)) + ss3(qf(i10)) + ss0(qf(i11)) \
|
||||
+ ss1(qf(i12)) + ss2(qf(i13)) + ss3(qf(i14)) + ss0(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1s(qf, mf, hf, i16) \
|
||||
expand1s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2s_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T32(qf(i0) + rs1(qf(i1)) + qf(i2) + rs2(qf(i3)) \
|
||||
+ qf(i4) + rs3(qf(i5)) + qf(i6) + rs4(qf(i7)) \
|
||||
+ qf(i8) + rs5(qf(i9)) + qf(i10) + rs6(qf(i11)) \
|
||||
+ qf(i12) + rs7(qf(i13)) + ss4(qf(i14)) + ss5(qf(i15)) \
|
||||
+ add_elt_s(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2s(qf, mf, hf, i16) \
|
||||
expand2s_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2s_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2s_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define sb0(x) (((x) >> 1) ^ SPH_T64((x) << 3) \
|
||||
^ SPH_ROTL64(x, 4) ^ SPH_ROTL64(x, 37))
|
||||
#define sb1(x) (((x) >> 1) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 13) ^ SPH_ROTL64(x, 43))
|
||||
#define sb2(x) (((x) >> 2) ^ SPH_T64((x) << 1) \
|
||||
^ SPH_ROTL64(x, 19) ^ SPH_ROTL64(x, 53))
|
||||
#define sb3(x) (((x) >> 2) ^ SPH_T64((x) << 2) \
|
||||
^ SPH_ROTL64(x, 28) ^ SPH_ROTL64(x, 59))
|
||||
#define sb4(x) (((x) >> 1) ^ (x))
|
||||
#define sb5(x) (((x) >> 2) ^ (x))
|
||||
#define rb1(x) SPH_ROTL64(x, 5)
|
||||
#define rb2(x) SPH_ROTL64(x, 11)
|
||||
#define rb3(x) SPH_ROTL64(x, 27)
|
||||
#define rb4(x) SPH_ROTL64(x, 32)
|
||||
#define rb5(x) SPH_ROTL64(x, 37)
|
||||
#define rb6(x) SPH_ROTL64(x, 43)
|
||||
#define rb7(x) SPH_ROTL64(x, 53)
|
||||
|
||||
#define Kb(j) SPH_T64((sph_u64)(j) * SPH_C64(0x0555555555555555))
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
static const sph_u64 Kb_tab[] = {
|
||||
Kb(16), Kb(17), Kb(18), Kb(19), Kb(20), Kb(21), Kb(22), Kb(23),
|
||||
Kb(24), Kb(25), Kb(26), Kb(27), Kb(28), Kb(29), Kb(30), Kb(31)
|
||||
};
|
||||
|
||||
#define rol_off(mf, j, off) \
|
||||
SPH_ROTL64(mf(((j) + (off)) & 15), (((j) + (off)) & 15) + 1)
|
||||
|
||||
#define add_elt_b(mf, hf, j) \
|
||||
(SPH_T64(rol_off(mf, j, 0) + rol_off(mf, j, 3) \
|
||||
- rol_off(mf, j, 10) + Kb_tab[j]) ^ hf(((j) + 7) & 15))
|
||||
|
||||
#define expand1b(qf, mf, hf, i) \
|
||||
SPH_T64(sb1(qf((i) - 16)) + sb2(qf((i) - 15)) \
|
||||
+ sb3(qf((i) - 14)) + sb0(qf((i) - 13)) \
|
||||
+ sb1(qf((i) - 12)) + sb2(qf((i) - 11)) \
|
||||
+ sb3(qf((i) - 10)) + sb0(qf((i) - 9)) \
|
||||
+ sb1(qf((i) - 8)) + sb2(qf((i) - 7)) \
|
||||
+ sb3(qf((i) - 6)) + sb0(qf((i) - 5)) \
|
||||
+ sb1(qf((i) - 4)) + sb2(qf((i) - 3)) \
|
||||
+ sb3(qf((i) - 2)) + sb0(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i) \
|
||||
SPH_T64(qf((i) - 16) + rb1(qf((i) - 15)) \
|
||||
+ qf((i) - 14) + rb2(qf((i) - 13)) \
|
||||
+ qf((i) - 12) + rb3(qf((i) - 11)) \
|
||||
+ qf((i) - 10) + rb4(qf((i) - 9)) \
|
||||
+ qf((i) - 8) + rb5(qf((i) - 7)) \
|
||||
+ qf((i) - 6) + rb6(qf((i) - 5)) \
|
||||
+ qf((i) - 4) + rb7(qf((i) - 3)) \
|
||||
+ sb4(qf((i) - 2)) + sb5(qf((i) - 1)) \
|
||||
+ add_elt_b(mf, hf, (i) - 16))
|
||||
|
||||
#else
|
||||
|
||||
#define add_elt_b(mf, hf, j0m, j1m, j3m, j4m, j7m, j10m, j11m, j16) \
|
||||
(SPH_T64(SPH_ROTL64(mf(j0m), j1m) + SPH_ROTL64(mf(j3m), j4m) \
|
||||
- SPH_ROTL64(mf(j10m), j11m) + Kb(j16)) ^ hf(j7m))
|
||||
|
||||
#define expand1b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(sb1(qf(i0)) + sb2(qf(i1)) + sb3(qf(i2)) + sb0(qf(i3)) \
|
||||
+ sb1(qf(i4)) + sb2(qf(i5)) + sb3(qf(i6)) + sb0(qf(i7)) \
|
||||
+ sb1(qf(i8)) + sb2(qf(i9)) + sb3(qf(i10)) + sb0(qf(i11)) \
|
||||
+ sb1(qf(i12)) + sb2(qf(i13)) + sb3(qf(i14)) + sb0(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand1b(qf, mf, hf, i16) \
|
||||
expand1b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand1b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand1b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#define expand2b_inner(qf, mf, hf, i16, \
|
||||
i0, i1, i2, i3, i4, i5, i6, i7, i8, \
|
||||
i9, i10, i11, i12, i13, i14, i15, \
|
||||
i0m, i1m, i3m, i4m, i7m, i10m, i11m) \
|
||||
SPH_T64(qf(i0) + rb1(qf(i1)) + qf(i2) + rb2(qf(i3)) \
|
||||
+ qf(i4) + rb3(qf(i5)) + qf(i6) + rb4(qf(i7)) \
|
||||
+ qf(i8) + rb5(qf(i9)) + qf(i10) + rb6(qf(i11)) \
|
||||
+ qf(i12) + rb7(qf(i13)) + sb4(qf(i14)) + sb5(qf(i15)) \
|
||||
+ add_elt_b(mf, hf, i0m, i1m, i3m, i4m, i7m, i10m, i11m, i16))
|
||||
|
||||
#define expand2b(qf, mf, hf, i16) \
|
||||
expand2b_(qf, mf, hf, i16, I16_ ## i16, M16_ ## i16)
|
||||
#define expand2b_(qf, mf, hf, i16, ix, iy) \
|
||||
expand2b_inner LPAR qf, mf, hf, i16, ix, iy)
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_W(tt, i0, op01, i1, op12, i2, op23, i3, op34, i4) \
|
||||
tt((M(i0) ^ H(i0)) op01 (M(i1) ^ H(i1)) op12 (M(i2) ^ H(i2)) \
|
||||
op23 (M(i3) ^ H(i3)) op34 (M(i4) ^ H(i4)))
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define Ws0 MAKE_W(SPH_T32, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Ws1 MAKE_W(SPH_T32, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Ws2 MAKE_W(SPH_T32, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Ws3 MAKE_W(SPH_T32, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Ws4 MAKE_W(SPH_T32, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Ws5 MAKE_W(SPH_T32, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Ws6 MAKE_W(SPH_T32, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Ws7 MAKE_W(SPH_T32, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Ws8 MAKE_W(SPH_T32, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Ws9 MAKE_W(SPH_T32, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Ws10 MAKE_W(SPH_T32, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Ws11 MAKE_W(SPH_T32, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Ws12 MAKE_W(SPH_T32, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Ws13 MAKE_W(SPH_T32, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Ws14 MAKE_W(SPH_T32, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Ws15 MAKE_W(SPH_T32, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
unsigned u; \
|
||||
sph_u32 Ws[16]; \
|
||||
Ws[ 0] = Ws0; \
|
||||
Ws[ 1] = Ws1; \
|
||||
Ws[ 2] = Ws2; \
|
||||
Ws[ 3] = Ws3; \
|
||||
Ws[ 4] = Ws4; \
|
||||
Ws[ 5] = Ws5; \
|
||||
Ws[ 6] = Ws6; \
|
||||
Ws[ 7] = Ws7; \
|
||||
Ws[ 8] = Ws8; \
|
||||
Ws[ 9] = Ws9; \
|
||||
Ws[10] = Ws10; \
|
||||
Ws[11] = Ws11; \
|
||||
Ws[12] = Ws12; \
|
||||
Ws[13] = Ws13; \
|
||||
Ws[14] = Ws14; \
|
||||
Ws[15] = Ws15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T32(ss0(Ws[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T32(ss1(Ws[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T32(ss2(Ws[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T32(ss3(Ws[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T32(ss4(Ws[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T32(ss0(Ws[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qas do { \
|
||||
qt[ 0] = SPH_T32(ss0(Ws0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T32(ss1(Ws1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T32(ss2(Ws2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T32(ss3(Ws3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T32(ss4(Ws4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T32(ss0(Ws5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T32(ss1(Ws6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T32(ss2(Ws7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T32(ss3(Ws8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T32(ss4(Ws9 ) + H(10)); \
|
||||
qt[10] = SPH_T32(ss0(Ws10) + H(11)); \
|
||||
qt[11] = SPH_T32(ss1(Ws11) + H(12)); \
|
||||
qt[12] = SPH_T32(ss2(Ws12) + H(13)); \
|
||||
qt[13] = SPH_T32(ss3(Ws13) + H(14)); \
|
||||
qt[14] = SPH_T32(ss4(Ws14) + H(15)); \
|
||||
qt[15] = SPH_T32(ss0(Ws15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbs do { \
|
||||
qt[16] = expand1s(Qs, M, H, 16); \
|
||||
qt[17] = expand1s(Qs, M, H, 17); \
|
||||
qt[18] = expand2s(Qs, M, H, 18); \
|
||||
qt[19] = expand2s(Qs, M, H, 19); \
|
||||
qt[20] = expand2s(Qs, M, H, 20); \
|
||||
qt[21] = expand2s(Qs, M, H, 21); \
|
||||
qt[22] = expand2s(Qs, M, H, 22); \
|
||||
qt[23] = expand2s(Qs, M, H, 23); \
|
||||
qt[24] = expand2s(Qs, M, H, 24); \
|
||||
qt[25] = expand2s(Qs, M, H, 25); \
|
||||
qt[26] = expand2s(Qs, M, H, 26); \
|
||||
qt[27] = expand2s(Qs, M, H, 27); \
|
||||
qt[28] = expand2s(Qs, M, H, 28); \
|
||||
qt[29] = expand2s(Qs, M, H, 29); \
|
||||
qt[30] = expand2s(Qs, M, H, 30); \
|
||||
qt[31] = expand2s(Qs, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qs do { \
|
||||
MAKE_Qas; \
|
||||
MAKE_Qbs; \
|
||||
} while (0)
|
||||
|
||||
#define Qs(j) (qt[j])
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define Wb0 MAKE_W(SPH_T64, 5, -, 7, +, 10, +, 13, +, 14)
|
||||
#define Wb1 MAKE_W(SPH_T64, 6, -, 8, +, 11, +, 14, -, 15)
|
||||
#define Wb2 MAKE_W(SPH_T64, 0, +, 7, +, 9, -, 12, +, 15)
|
||||
#define Wb3 MAKE_W(SPH_T64, 0, -, 1, +, 8, -, 10, +, 13)
|
||||
#define Wb4 MAKE_W(SPH_T64, 1, +, 2, +, 9, -, 11, -, 14)
|
||||
#define Wb5 MAKE_W(SPH_T64, 3, -, 2, +, 10, -, 12, +, 15)
|
||||
#define Wb6 MAKE_W(SPH_T64, 4, -, 0, -, 3, -, 11, +, 13)
|
||||
#define Wb7 MAKE_W(SPH_T64, 1, -, 4, -, 5, -, 12, -, 14)
|
||||
#define Wb8 MAKE_W(SPH_T64, 2, -, 5, -, 6, +, 13, -, 15)
|
||||
#define Wb9 MAKE_W(SPH_T64, 0, -, 3, +, 6, -, 7, +, 14)
|
||||
#define Wb10 MAKE_W(SPH_T64, 8, -, 1, -, 4, -, 7, +, 15)
|
||||
#define Wb11 MAKE_W(SPH_T64, 8, -, 0, -, 2, -, 5, +, 9)
|
||||
#define Wb12 MAKE_W(SPH_T64, 1, +, 3, -, 6, -, 9, +, 10)
|
||||
#define Wb13 MAKE_W(SPH_T64, 2, +, 4, +, 7, +, 10, +, 11)
|
||||
#define Wb14 MAKE_W(SPH_T64, 3, -, 5, +, 8, -, 11, -, 12)
|
||||
#define Wb15 MAKE_W(SPH_T64, 12, -, 4, -, 6, -, 9, +, 13)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_BMW
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
unsigned u; \
|
||||
sph_u64 Wb[16]; \
|
||||
Wb[ 0] = Wb0; \
|
||||
Wb[ 1] = Wb1; \
|
||||
Wb[ 2] = Wb2; \
|
||||
Wb[ 3] = Wb3; \
|
||||
Wb[ 4] = Wb4; \
|
||||
Wb[ 5] = Wb5; \
|
||||
Wb[ 6] = Wb6; \
|
||||
Wb[ 7] = Wb7; \
|
||||
Wb[ 8] = Wb8; \
|
||||
Wb[ 9] = Wb9; \
|
||||
Wb[10] = Wb10; \
|
||||
Wb[11] = Wb11; \
|
||||
Wb[12] = Wb12; \
|
||||
Wb[13] = Wb13; \
|
||||
Wb[14] = Wb14; \
|
||||
Wb[15] = Wb15; \
|
||||
for (u = 0; u < 15; u += 5) { \
|
||||
qt[u + 0] = SPH_T64(sb0(Wb[u + 0]) + H(u + 1)); \
|
||||
qt[u + 1] = SPH_T64(sb1(Wb[u + 1]) + H(u + 2)); \
|
||||
qt[u + 2] = SPH_T64(sb2(Wb[u + 2]) + H(u + 3)); \
|
||||
qt[u + 3] = SPH_T64(sb3(Wb[u + 3]) + H(u + 4)); \
|
||||
qt[u + 4] = SPH_T64(sb4(Wb[u + 4]) + H(u + 5)); \
|
||||
} \
|
||||
qt[15] = SPH_T64(sb0(Wb[15]) + H(0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
unsigned u; \
|
||||
for (u = 16; u < 18; u ++) \
|
||||
qt[u] = expand1b(Qb, M, H, u); \
|
||||
for (u = 18; u < 32; u ++) \
|
||||
qt[u] = expand2b(Qb, M, H, u); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define MAKE_Qab do { \
|
||||
qt[ 0] = SPH_T64(sb0(Wb0 ) + H( 1)); \
|
||||
qt[ 1] = SPH_T64(sb1(Wb1 ) + H( 2)); \
|
||||
qt[ 2] = SPH_T64(sb2(Wb2 ) + H( 3)); \
|
||||
qt[ 3] = SPH_T64(sb3(Wb3 ) + H( 4)); \
|
||||
qt[ 4] = SPH_T64(sb4(Wb4 ) + H( 5)); \
|
||||
qt[ 5] = SPH_T64(sb0(Wb5 ) + H( 6)); \
|
||||
qt[ 6] = SPH_T64(sb1(Wb6 ) + H( 7)); \
|
||||
qt[ 7] = SPH_T64(sb2(Wb7 ) + H( 8)); \
|
||||
qt[ 8] = SPH_T64(sb3(Wb8 ) + H( 9)); \
|
||||
qt[ 9] = SPH_T64(sb4(Wb9 ) + H(10)); \
|
||||
qt[10] = SPH_T64(sb0(Wb10) + H(11)); \
|
||||
qt[11] = SPH_T64(sb1(Wb11) + H(12)); \
|
||||
qt[12] = SPH_T64(sb2(Wb12) + H(13)); \
|
||||
qt[13] = SPH_T64(sb3(Wb13) + H(14)); \
|
||||
qt[14] = SPH_T64(sb4(Wb14) + H(15)); \
|
||||
qt[15] = SPH_T64(sb0(Wb15) + H( 0)); \
|
||||
} while (0)
|
||||
|
||||
#define MAKE_Qbb do { \
|
||||
qt[16] = expand1b(Qb, M, H, 16); \
|
||||
qt[17] = expand1b(Qb, M, H, 17); \
|
||||
qt[18] = expand2b(Qb, M, H, 18); \
|
||||
qt[19] = expand2b(Qb, M, H, 19); \
|
||||
qt[20] = expand2b(Qb, M, H, 20); \
|
||||
qt[21] = expand2b(Qb, M, H, 21); \
|
||||
qt[22] = expand2b(Qb, M, H, 22); \
|
||||
qt[23] = expand2b(Qb, M, H, 23); \
|
||||
qt[24] = expand2b(Qb, M, H, 24); \
|
||||
qt[25] = expand2b(Qb, M, H, 25); \
|
||||
qt[26] = expand2b(Qb, M, H, 26); \
|
||||
qt[27] = expand2b(Qb, M, H, 27); \
|
||||
qt[28] = expand2b(Qb, M, H, 28); \
|
||||
qt[29] = expand2b(Qb, M, H, 29); \
|
||||
qt[30] = expand2b(Qb, M, H, 30); \
|
||||
qt[31] = expand2b(Qb, M, H, 31); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define MAKE_Qb do { \
|
||||
MAKE_Qab; \
|
||||
MAKE_Qbb; \
|
||||
} while (0)
|
||||
|
||||
#define Qb(j) (qt[j])
|
||||
|
||||
#endif
|
||||
|
||||
#define FOLD(type, mkQ, tt, rol, mf, qf, dhf) do { \
|
||||
type qt[32], xl, xh; \
|
||||
mkQ; \
|
||||
xl = qf(16) ^ qf(17) ^ qf(18) ^ qf(19) \
|
||||
^ qf(20) ^ qf(21) ^ qf(22) ^ qf(23); \
|
||||
xh = xl ^ qf(24) ^ qf(25) ^ qf(26) ^ qf(27) \
|
||||
^ qf(28) ^ qf(29) ^ qf(30) ^ qf(31); \
|
||||
dhf( 0) = tt(((xh << 5) ^ (qf(16) >> 5) ^ mf( 0)) \
|
||||
+ (xl ^ qf(24) ^ qf( 0))); \
|
||||
dhf( 1) = tt(((xh >> 7) ^ (qf(17) << 8) ^ mf( 1)) \
|
||||
+ (xl ^ qf(25) ^ qf( 1))); \
|
||||
dhf( 2) = tt(((xh >> 5) ^ (qf(18) << 5) ^ mf( 2)) \
|
||||
+ (xl ^ qf(26) ^ qf( 2))); \
|
||||
dhf( 3) = tt(((xh >> 1) ^ (qf(19) << 5) ^ mf( 3)) \
|
||||
+ (xl ^ qf(27) ^ qf( 3))); \
|
||||
dhf( 4) = tt(((xh >> 3) ^ (qf(20) << 0) ^ mf( 4)) \
|
||||
+ (xl ^ qf(28) ^ qf( 4))); \
|
||||
dhf( 5) = tt(((xh << 6) ^ (qf(21) >> 6) ^ mf( 5)) \
|
||||
+ (xl ^ qf(29) ^ qf( 5))); \
|
||||
dhf( 6) = tt(((xh >> 4) ^ (qf(22) << 6) ^ mf( 6)) \
|
||||
+ (xl ^ qf(30) ^ qf( 6))); \
|
||||
dhf( 7) = tt(((xh >> 11) ^ (qf(23) << 2) ^ mf( 7)) \
|
||||
+ (xl ^ qf(31) ^ qf( 7))); \
|
||||
dhf( 8) = tt(rol(dhf(4), 9) + (xh ^ qf(24) ^ mf( 8)) \
|
||||
+ ((xl << 8) ^ qf(23) ^ qf( 8))); \
|
||||
dhf( 9) = tt(rol(dhf(5), 10) + (xh ^ qf(25) ^ mf( 9)) \
|
||||
+ ((xl >> 6) ^ qf(16) ^ qf( 9))); \
|
||||
dhf(10) = tt(rol(dhf(6), 11) + (xh ^ qf(26) ^ mf(10)) \
|
||||
+ ((xl << 6) ^ qf(17) ^ qf(10))); \
|
||||
dhf(11) = tt(rol(dhf(7), 12) + (xh ^ qf(27) ^ mf(11)) \
|
||||
+ ((xl << 4) ^ qf(18) ^ qf(11))); \
|
||||
dhf(12) = tt(rol(dhf(0), 13) + (xh ^ qf(28) ^ mf(12)) \
|
||||
+ ((xl >> 3) ^ qf(19) ^ qf(12))); \
|
||||
dhf(13) = tt(rol(dhf(1), 14) + (xh ^ qf(29) ^ mf(13)) \
|
||||
+ ((xl >> 4) ^ qf(20) ^ qf(13))); \
|
||||
dhf(14) = tt(rol(dhf(2), 15) + (xh ^ qf(30) ^ mf(14)) \
|
||||
+ ((xl >> 7) ^ qf(21) ^ qf(14))); \
|
||||
dhf(15) = tt(rol(dhf(3), 16) + (xh ^ qf(31) ^ mf(15)) \
|
||||
+ ((xl >> 2) ^ qf(22) ^ qf(15))); \
|
||||
} while (0)
|
||||
|
||||
|
||||
#if SPH_64
|
||||
|
||||
#define FOLDb FOLD(sph_u64, MAKE_Qb, SPH_T64, SPH_ROTL64, M, Qb, dH)
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
#define FOLDs FOLD(sph_u32, MAKE_Qs, SPH_T32, SPH_ROTL32, M, Qs, dH)
|
||||
|
||||
static void
|
||||
compress_small(const unsigned char *data, const sph_u32 h[16], sph_u32 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec32le_aligned(data + 4 * (x))
|
||||
#else
|
||||
sph_u32 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec32le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec32le_aligned(data + 4);
|
||||
mv[ 2] = sph_dec32le_aligned(data + 8);
|
||||
mv[ 3] = sph_dec32le_aligned(data + 12);
|
||||
mv[ 4] = sph_dec32le_aligned(data + 16);
|
||||
mv[ 5] = sph_dec32le_aligned(data + 20);
|
||||
mv[ 6] = sph_dec32le_aligned(data + 24);
|
||||
mv[ 7] = sph_dec32le_aligned(data + 28);
|
||||
mv[ 8] = sph_dec32le_aligned(data + 32);
|
||||
mv[ 9] = sph_dec32le_aligned(data + 36);
|
||||
mv[10] = sph_dec32le_aligned(data + 40);
|
||||
mv[11] = sph_dec32le_aligned(data + 44);
|
||||
mv[12] = sph_dec32le_aligned(data + 48);
|
||||
mv[13] = sph_dec32le_aligned(data + 52);
|
||||
mv[14] = sph_dec32le_aligned(data + 56);
|
||||
mv[15] = sph_dec32le_aligned(data + 60);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDs;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u32 final_s[16] = {
|
||||
SPH_C32(0xaaaaaaa0), SPH_C32(0xaaaaaaa1), SPH_C32(0xaaaaaaa2),
|
||||
SPH_C32(0xaaaaaaa3), SPH_C32(0xaaaaaaa4), SPH_C32(0xaaaaaaa5),
|
||||
SPH_C32(0xaaaaaaa6), SPH_C32(0xaaaaaaa7), SPH_C32(0xaaaaaaa8),
|
||||
SPH_C32(0xaaaaaaa9), SPH_C32(0xaaaaaaaa), SPH_C32(0xaaaaaaab),
|
||||
SPH_C32(0xaaaaaaac), SPH_C32(0xaaaaaaad), SPH_C32(0xaaaaaaae),
|
||||
SPH_C32(0xaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw32_init(sph_bmw_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
#if SPH_64
|
||||
sc->bit_count = 0;
|
||||
#else
|
||||
sc->bit_count_high = 0;
|
||||
sc->bit_count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32(sph_bmw_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u32 htmp[16];
|
||||
sph_u32 *h1, *h2;
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
#else
|
||||
tmp = sc->bit_count_low;
|
||||
sc->bit_count_low = SPH_T32(tmp + ((sph_u32)len << 3));
|
||||
if (sc->bit_count_low < tmp)
|
||||
sc->bit_count_high ++;
|
||||
sc->bit_count_high += len >> 29;
|
||||
#endif
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u32 *ht;
|
||||
|
||||
compress_small(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw32_close(sph_bmw_small_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u32 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_small(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
#if SPH_64
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
#else
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
sc->bit_count_low + n);
|
||||
sph_enc32le_aligned(buf + (sizeof sc->buf) - 4,
|
||||
SPH_T32(sc->bit_count_high));
|
||||
#endif
|
||||
compress_small(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32le_aligned(buf + 4 * u, h2[u]);
|
||||
compress_small(buf, final_s, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w32; u < out_size_w32; u ++, v ++)
|
||||
sph_enc32le(out + 4 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
static void
|
||||
compress_big(const unsigned char *data, const sph_u64 h[16], sph_u64 dh[16])
|
||||
{
|
||||
#if SPH_LITTLE_FAST
|
||||
#define M(x) sph_dec64le_aligned(data + 8 * (x))
|
||||
#else
|
||||
sph_u64 mv[16];
|
||||
|
||||
mv[ 0] = sph_dec64le_aligned(data + 0);
|
||||
mv[ 1] = sph_dec64le_aligned(data + 8);
|
||||
mv[ 2] = sph_dec64le_aligned(data + 16);
|
||||
mv[ 3] = sph_dec64le_aligned(data + 24);
|
||||
mv[ 4] = sph_dec64le_aligned(data + 32);
|
||||
mv[ 5] = sph_dec64le_aligned(data + 40);
|
||||
mv[ 6] = sph_dec64le_aligned(data + 48);
|
||||
mv[ 7] = sph_dec64le_aligned(data + 56);
|
||||
mv[ 8] = sph_dec64le_aligned(data + 64);
|
||||
mv[ 9] = sph_dec64le_aligned(data + 72);
|
||||
mv[10] = sph_dec64le_aligned(data + 80);
|
||||
mv[11] = sph_dec64le_aligned(data + 88);
|
||||
mv[12] = sph_dec64le_aligned(data + 96);
|
||||
mv[13] = sph_dec64le_aligned(data + 104);
|
||||
mv[14] = sph_dec64le_aligned(data + 112);
|
||||
mv[15] = sph_dec64le_aligned(data + 120);
|
||||
#define M(x) (mv[x])
|
||||
#endif
|
||||
#define H(x) (h[x])
|
||||
#define dH(x) (dh[x])
|
||||
|
||||
FOLDb;
|
||||
|
||||
#undef M
|
||||
#undef H
|
||||
#undef dH
|
||||
}
|
||||
|
||||
static const sph_u64 final_b[16] = {
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa0), SPH_C64(0xaaaaaaaaaaaaaaa1),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa2), SPH_C64(0xaaaaaaaaaaaaaaa3),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa4), SPH_C64(0xaaaaaaaaaaaaaaa5),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa6), SPH_C64(0xaaaaaaaaaaaaaaa7),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaa8), SPH_C64(0xaaaaaaaaaaaaaaa9),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaaa), SPH_C64(0xaaaaaaaaaaaaaaab),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaac), SPH_C64(0xaaaaaaaaaaaaaaad),
|
||||
SPH_C64(0xaaaaaaaaaaaaaaae), SPH_C64(0xaaaaaaaaaaaaaaaf)
|
||||
};
|
||||
|
||||
static void
|
||||
bmw64_init(sph_bmw_big_context *sc, const sph_u64 *iv)
|
||||
{
|
||||
memcpy(sc->H, iv, sizeof sc->H);
|
||||
sc->ptr = 0;
|
||||
sc->bit_count = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64(sph_bmw_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
sph_u64 htmp[16];
|
||||
sph_u64 *h1, *h2;
|
||||
|
||||
sc->bit_count += (sph_u64)len << 3;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
h1 = sc->H;
|
||||
h2 = htmp;
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
ptr += clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
sph_u64 *ht;
|
||||
|
||||
compress_big(buf, h1, h2);
|
||||
ht = h1;
|
||||
h1 = h2;
|
||||
h2 = ht;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
sc->ptr = ptr;
|
||||
if (h1 != sc->H)
|
||||
memcpy(sc->H, h1, sizeof sc->H);
|
||||
}
|
||||
|
||||
static void
|
||||
bmw64_close(sph_bmw_big_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w64)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr, u, v;
|
||||
unsigned z;
|
||||
sph_u64 h1[16], h2[16], *h;
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
h = sc->H;
|
||||
if (ptr > (sizeof sc->buf) - 8) {
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
compress_big(buf, h, h1);
|
||||
ptr = 0;
|
||||
h = h1;
|
||||
}
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - 8 - ptr);
|
||||
sph_enc64le_aligned(buf + (sizeof sc->buf) - 8,
|
||||
SPH_T64(sc->bit_count + n));
|
||||
compress_big(buf, h, h2);
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc64le_aligned(buf + 8 * u, h2[u]);
|
||||
compress_big(buf, final_b, h1);
|
||||
out = dst;
|
||||
for (u = 0, v = 16 - out_size_w64; u < out_size_w64; u ++, v ++)
|
||||
sph_enc64le(out + 8 * u, h1[v]);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 7);
|
||||
// sph_bmw224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_init(void *cc)
|
||||
{
|
||||
bmw32_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw32(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw32_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw256_init(cc);
|
||||
}
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 6);
|
||||
// sph_bmw384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_init(void *cc)
|
||||
{
|
||||
bmw64_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
bmw64(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_bmw512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_bmw.h */
|
||||
void
|
||||
sph_bmw512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
bmw64_close(cc, ub, n, dst, 8);
|
||||
// sph_bmw512_init(cc);
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
337
src/crypto/ghostrider/sph_bmw.h
Normal file
337
src/crypto/ghostrider/sph_bmw.h
Normal file
@@ -0,0 +1,337 @@
|
||||
/* $Id: sph_bmw.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* BMW interface. BMW (aka "Blue Midnight Wish") is a family of
|
||||
* functions which differ by their output size; this implementation
|
||||
* defines BMW for output sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_bmw.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_BMW_H__
|
||||
#define SPH_BMW_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-224.
|
||||
*/
|
||||
#define SPH_SIZE_bmw224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-256.
|
||||
*/
|
||||
#define SPH_SIZE_bmw256 256
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-384.
|
||||
*/
|
||||
#define SPH_SIZE_bmw384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for BMW-512.
|
||||
*/
|
||||
#define SPH_SIZE_bmw512 512
|
||||
|
||||
#endif
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 and BMW-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 H[16];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_bmw_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-224 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-256 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_small_context sph_bmw256_context;
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 and BMW-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a BMW computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running BMW
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 H[16];
|
||||
sph_u64 bit_count;
|
||||
#endif
|
||||
} sph_bmw_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-384 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for BMW-512 computations. It is
|
||||
* identical to the common <code>sph_bmw_small_context</code>.
|
||||
*/
|
||||
typedef sph_bmw_big_context sph_bmw512_context;
|
||||
|
||||
#endif
|
||||
|
||||
#if !defined(__AVX2__)
|
||||
|
||||
/**
|
||||
* Initialize a BMW-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-224 context (pointer to a
|
||||
* <code>sph_bmw224_context</code>)
|
||||
*/
|
||||
void sph_bmw224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-256 context (pointer to a
|
||||
* <code>sph_bmw256_context</code>)
|
||||
*/
|
||||
void sph_bmw256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif // !AVX2
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Initialize a BMW-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-384 context (pointer to a
|
||||
* <code>sph_bmw384_context</code>)
|
||||
*/
|
||||
void sph_bmw384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a BMW-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the BMW-512 context (pointer to a
|
||||
* <code>sph_bmw512_context</code>)
|
||||
*/
|
||||
void sph_bmw512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_bmw512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current BMW-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the BMW-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_bmw512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
723
src/crypto/ghostrider/sph_cubehash.c
Normal file
723
src/crypto/ghostrider/sph_cubehash.c
Normal file
@@ -0,0 +1,723 @@
|
||||
/* $Id: cubehash.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* CubeHash implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
#include <limits.h>
|
||||
|
||||
#include "sph_cubehash.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
#define SPH_SMALL_FOOTPRINT_CUBEHASH 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Some tests were conducted on an Intel Core2 Q6600 (32-bit and 64-bit
|
||||
* mode), a PowerPC G3, and a MIPS-compatible CPU (Broadcom BCM3302).
|
||||
* It appears that the optimal settings are:
|
||||
* -- full unroll, no state copy on the "big" systems (x86, PowerPC)
|
||||
* -- unroll to 4 or 8, state copy on the "small" system (MIPS)
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_CUBEHASH
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 4
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 1
|
||||
#endif
|
||||
|
||||
#else
|
||||
|
||||
#if !defined SPH_CUBEHASH_UNROLL
|
||||
#define SPH_CUBEHASH_UNROLL 0
|
||||
#endif
|
||||
#if !defined SPH_CUBEHASH_NOCOPY
|
||||
#define SPH_CUBEHASH_NOCOPY 0
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xB0FC8217), SPH_C32(0x1BEE1A90), SPH_C32(0x829E1A22),
|
||||
SPH_C32(0x6362C342), SPH_C32(0x24D91C30), SPH_C32(0x03A7AA24),
|
||||
SPH_C32(0xA63721C8), SPH_C32(0x85B0E2EF), SPH_C32(0xF35D13F3),
|
||||
SPH_C32(0x41DA807D), SPH_C32(0x21A70CA6), SPH_C32(0x1F4E9774),
|
||||
SPH_C32(0xB3E1C932), SPH_C32(0xEB0A79A8), SPH_C32(0xCDDAAA66),
|
||||
SPH_C32(0xE2F6ECAA), SPH_C32(0x0A713362), SPH_C32(0xAA3080E0),
|
||||
SPH_C32(0xD8F23A32), SPH_C32(0xCEF15E28), SPH_C32(0xDB086314),
|
||||
SPH_C32(0x7F709DF7), SPH_C32(0xACD228A4), SPH_C32(0x704D6ECE),
|
||||
SPH_C32(0xAA3EC95F), SPH_C32(0xE387C214), SPH_C32(0x3A6445FF),
|
||||
SPH_C32(0x9CAB81C3), SPH_C32(0xC73D4B98), SPH_C32(0xD277AEBE),
|
||||
SPH_C32(0xFD20151C), SPH_C32(0x00CB573E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0xEA2BD4B4), SPH_C32(0xCCD6F29F), SPH_C32(0x63117E71),
|
||||
SPH_C32(0x35481EAE), SPH_C32(0x22512D5B), SPH_C32(0xE5D94E63),
|
||||
SPH_C32(0x7E624131), SPH_C32(0xF4CC12BE), SPH_C32(0xC2D0B696),
|
||||
SPH_C32(0x42AF2070), SPH_C32(0xD0720C35), SPH_C32(0x3361DA8C),
|
||||
SPH_C32(0x28CCECA4), SPH_C32(0x8EF8AD83), SPH_C32(0x4680AC00),
|
||||
SPH_C32(0x40E5FBAB), SPH_C32(0xD89041C3), SPH_C32(0x6107FBD5),
|
||||
SPH_C32(0x6C859D41), SPH_C32(0xF0B26679), SPH_C32(0x09392549),
|
||||
SPH_C32(0x5FA25603), SPH_C32(0x65C892FD), SPH_C32(0x93CB6285),
|
||||
SPH_C32(0x2AF2B5AE), SPH_C32(0x9E4B4E60), SPH_C32(0x774ABFDD),
|
||||
SPH_C32(0x85254725), SPH_C32(0x15815AEB), SPH_C32(0x4AB6AAD6),
|
||||
SPH_C32(0x9CDAF8AF), SPH_C32(0xD6032C0A)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0xE623087E), SPH_C32(0x04C00C87), SPH_C32(0x5EF46453),
|
||||
SPH_C32(0x69524B13), SPH_C32(0x1A05C7A9), SPH_C32(0x3528DF88),
|
||||
SPH_C32(0x6BDD01B5), SPH_C32(0x5057B792), SPH_C32(0x6AA7A922),
|
||||
SPH_C32(0x649C7EEE), SPH_C32(0xF426309F), SPH_C32(0xCB629052),
|
||||
SPH_C32(0xFC8E20ED), SPH_C32(0xB3482BAB), SPH_C32(0xF89E5E7E),
|
||||
SPH_C32(0xD83D4DE4), SPH_C32(0x44BFC10D), SPH_C32(0x5FC1E63D),
|
||||
SPH_C32(0x2104E6CB), SPH_C32(0x17958F7F), SPH_C32(0xDBEAEF70),
|
||||
SPH_C32(0xB4B97E1E), SPH_C32(0x32C195F6), SPH_C32(0x6184A8E4),
|
||||
SPH_C32(0x796C2543), SPH_C32(0x23DE176D), SPH_C32(0xD33BBAEC),
|
||||
SPH_C32(0x0C12E5D2), SPH_C32(0x4EB95A7B), SPH_C32(0x2D18BA01),
|
||||
SPH_C32(0x04EE475F), SPH_C32(0x1FC5F22E)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x2AEA2A61), SPH_C32(0x50F494D4), SPH_C32(0x2D538B8B),
|
||||
SPH_C32(0x4167D83E), SPH_C32(0x3FEE2313), SPH_C32(0xC701CF8C),
|
||||
SPH_C32(0xCC39968E), SPH_C32(0x50AC5695), SPH_C32(0x4D42C787),
|
||||
SPH_C32(0xA647A8B3), SPH_C32(0x97CF0BEF), SPH_C32(0x825B4537),
|
||||
SPH_C32(0xEEF864D2), SPH_C32(0xF22090C4), SPH_C32(0xD0E5CD33),
|
||||
SPH_C32(0xA23911AE), SPH_C32(0xFCD398D9), SPH_C32(0x148FE485),
|
||||
SPH_C32(0x1B017BEF), SPH_C32(0xB6444532), SPH_C32(0x6A536159),
|
||||
SPH_C32(0x2FF5781C), SPH_C32(0x91FA7934), SPH_C32(0x0DBADEA9),
|
||||
SPH_C32(0xD65C8A2B), SPH_C32(0xA5A70E75), SPH_C32(0xB1C62456),
|
||||
SPH_C32(0xBC796576), SPH_C32(0x1921C8F7), SPH_C32(0xE7989AF1),
|
||||
SPH_C32(0x7795D246), SPH_C32(0xD43E3B44)
|
||||
};
|
||||
|
||||
#define T32 SPH_T32
|
||||
#define ROTL32 SPH_ROTL32
|
||||
|
||||
#if SPH_CUBEHASH_NOCOPY
|
||||
|
||||
#define DECL_STATE
|
||||
#define READ_STATE(cc)
|
||||
#define WRITE_STATE(cc)
|
||||
|
||||
#define x0 ((sc)->state[ 0])
|
||||
#define x1 ((sc)->state[ 1])
|
||||
#define x2 ((sc)->state[ 2])
|
||||
#define x3 ((sc)->state[ 3])
|
||||
#define x4 ((sc)->state[ 4])
|
||||
#define x5 ((sc)->state[ 5])
|
||||
#define x6 ((sc)->state[ 6])
|
||||
#define x7 ((sc)->state[ 7])
|
||||
#define x8 ((sc)->state[ 8])
|
||||
#define x9 ((sc)->state[ 9])
|
||||
#define xa ((sc)->state[10])
|
||||
#define xb ((sc)->state[11])
|
||||
#define xc ((sc)->state[12])
|
||||
#define xd ((sc)->state[13])
|
||||
#define xe ((sc)->state[14])
|
||||
#define xf ((sc)->state[15])
|
||||
#define xg ((sc)->state[16])
|
||||
#define xh ((sc)->state[17])
|
||||
#define xi ((sc)->state[18])
|
||||
#define xj ((sc)->state[19])
|
||||
#define xk ((sc)->state[20])
|
||||
#define xl ((sc)->state[21])
|
||||
#define xm ((sc)->state[22])
|
||||
#define xn ((sc)->state[23])
|
||||
#define xo ((sc)->state[24])
|
||||
#define xp ((sc)->state[25])
|
||||
#define xq ((sc)->state[26])
|
||||
#define xr ((sc)->state[27])
|
||||
#define xs ((sc)->state[28])
|
||||
#define xt ((sc)->state[29])
|
||||
#define xu ((sc)->state[30])
|
||||
#define xv ((sc)->state[31])
|
||||
|
||||
#else
|
||||
|
||||
#define DECL_STATE \
|
||||
sph_u32 x0, x1, x2, x3, x4, x5, x6, x7; \
|
||||
sph_u32 x8, x9, xa, xb, xc, xd, xe, xf; \
|
||||
sph_u32 xg, xh, xi, xj, xk, xl, xm, xn; \
|
||||
sph_u32 xo, xp, xq, xr, xs, xt, xu, xv;
|
||||
|
||||
#define READ_STATE(cc) do { \
|
||||
x0 = (cc)->state[ 0]; \
|
||||
x1 = (cc)->state[ 1]; \
|
||||
x2 = (cc)->state[ 2]; \
|
||||
x3 = (cc)->state[ 3]; \
|
||||
x4 = (cc)->state[ 4]; \
|
||||
x5 = (cc)->state[ 5]; \
|
||||
x6 = (cc)->state[ 6]; \
|
||||
x7 = (cc)->state[ 7]; \
|
||||
x8 = (cc)->state[ 8]; \
|
||||
x9 = (cc)->state[ 9]; \
|
||||
xa = (cc)->state[10]; \
|
||||
xb = (cc)->state[11]; \
|
||||
xc = (cc)->state[12]; \
|
||||
xd = (cc)->state[13]; \
|
||||
xe = (cc)->state[14]; \
|
||||
xf = (cc)->state[15]; \
|
||||
xg = (cc)->state[16]; \
|
||||
xh = (cc)->state[17]; \
|
||||
xi = (cc)->state[18]; \
|
||||
xj = (cc)->state[19]; \
|
||||
xk = (cc)->state[20]; \
|
||||
xl = (cc)->state[21]; \
|
||||
xm = (cc)->state[22]; \
|
||||
xn = (cc)->state[23]; \
|
||||
xo = (cc)->state[24]; \
|
||||
xp = (cc)->state[25]; \
|
||||
xq = (cc)->state[26]; \
|
||||
xr = (cc)->state[27]; \
|
||||
xs = (cc)->state[28]; \
|
||||
xt = (cc)->state[29]; \
|
||||
xu = (cc)->state[30]; \
|
||||
xv = (cc)->state[31]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(cc) do { \
|
||||
(cc)->state[ 0] = x0; \
|
||||
(cc)->state[ 1] = x1; \
|
||||
(cc)->state[ 2] = x2; \
|
||||
(cc)->state[ 3] = x3; \
|
||||
(cc)->state[ 4] = x4; \
|
||||
(cc)->state[ 5] = x5; \
|
||||
(cc)->state[ 6] = x6; \
|
||||
(cc)->state[ 7] = x7; \
|
||||
(cc)->state[ 8] = x8; \
|
||||
(cc)->state[ 9] = x9; \
|
||||
(cc)->state[10] = xa; \
|
||||
(cc)->state[11] = xb; \
|
||||
(cc)->state[12] = xc; \
|
||||
(cc)->state[13] = xd; \
|
||||
(cc)->state[14] = xe; \
|
||||
(cc)->state[15] = xf; \
|
||||
(cc)->state[16] = xg; \
|
||||
(cc)->state[17] = xh; \
|
||||
(cc)->state[18] = xi; \
|
||||
(cc)->state[19] = xj; \
|
||||
(cc)->state[20] = xk; \
|
||||
(cc)->state[21] = xl; \
|
||||
(cc)->state[22] = xm; \
|
||||
(cc)->state[23] = xn; \
|
||||
(cc)->state[24] = xo; \
|
||||
(cc)->state[25] = xp; \
|
||||
(cc)->state[26] = xq; \
|
||||
(cc)->state[27] = xr; \
|
||||
(cc)->state[28] = xs; \
|
||||
(cc)->state[29] = xt; \
|
||||
(cc)->state[30] = xu; \
|
||||
(cc)->state[31] = xv; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define INPUT_BLOCK do { \
|
||||
x0 ^= sph_dec32le_aligned(buf + 0); \
|
||||
x1 ^= sph_dec32le_aligned(buf + 4); \
|
||||
x2 ^= sph_dec32le_aligned(buf + 8); \
|
||||
x3 ^= sph_dec32le_aligned(buf + 12); \
|
||||
x4 ^= sph_dec32le_aligned(buf + 16); \
|
||||
x5 ^= sph_dec32le_aligned(buf + 20); \
|
||||
x6 ^= sph_dec32le_aligned(buf + 24); \
|
||||
x7 ^= sph_dec32le_aligned(buf + 28); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_EVEN do { \
|
||||
xg = T32(x0 + xg); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xh = T32(x1 + xh); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xi = T32(x2 + xi); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xj = T32(x3 + xj); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
xk = T32(x4 + xk); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xl = T32(x5 + xl); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xm = T32(x6 + xm); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xn = T32(x7 + xn); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xo = T32(x8 + xo); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xp = T32(x9 + xp); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xq = T32(xa + xq); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xr = T32(xb + xr); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xs = T32(xc + xs); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xt = T32(xd + xt); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xu = T32(xe + xu); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xv = T32(xf + xv); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
x8 ^= xg; \
|
||||
x9 ^= xh; \
|
||||
xa ^= xi; \
|
||||
xb ^= xj; \
|
||||
xc ^= xk; \
|
||||
xd ^= xl; \
|
||||
xe ^= xm; \
|
||||
xf ^= xn; \
|
||||
x0 ^= xo; \
|
||||
x1 ^= xp; \
|
||||
x2 ^= xq; \
|
||||
x3 ^= xr; \
|
||||
x4 ^= xs; \
|
||||
x5 ^= xt; \
|
||||
x6 ^= xu; \
|
||||
x7 ^= xv; \
|
||||
xi = T32(x8 + xi); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xj = T32(x9 + xj); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xg = T32(xa + xg); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xh = T32(xb + xh); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
xm = T32(xc + xm); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xn = T32(xd + xn); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xk = T32(xe + xk); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xl = T32(xf + xl); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xq = T32(x0 + xq); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xr = T32(x1 + xr); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xo = T32(x2 + xo); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xp = T32(x3 + xp); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xu = T32(x4 + xu); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xv = T32(x5 + xv); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xs = T32(x6 + xs); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xt = T32(x7 + xt); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xc ^= xi; \
|
||||
xd ^= xj; \
|
||||
xe ^= xg; \
|
||||
xf ^= xh; \
|
||||
x8 ^= xm; \
|
||||
x9 ^= xn; \
|
||||
xa ^= xk; \
|
||||
xb ^= xl; \
|
||||
x4 ^= xq; \
|
||||
x5 ^= xr; \
|
||||
x6 ^= xo; \
|
||||
x7 ^= xp; \
|
||||
x0 ^= xu; \
|
||||
x1 ^= xv; \
|
||||
x2 ^= xs; \
|
||||
x3 ^= xt; \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_ODD do { \
|
||||
xj = T32(xc + xj); \
|
||||
xc = ROTL32(xc, 7); \
|
||||
xi = T32(xd + xi); \
|
||||
xd = ROTL32(xd, 7); \
|
||||
xh = T32(xe + xh); \
|
||||
xe = ROTL32(xe, 7); \
|
||||
xg = T32(xf + xg); \
|
||||
xf = ROTL32(xf, 7); \
|
||||
xn = T32(x8 + xn); \
|
||||
x8 = ROTL32(x8, 7); \
|
||||
xm = T32(x9 + xm); \
|
||||
x9 = ROTL32(x9, 7); \
|
||||
xl = T32(xa + xl); \
|
||||
xa = ROTL32(xa, 7); \
|
||||
xk = T32(xb + xk); \
|
||||
xb = ROTL32(xb, 7); \
|
||||
xr = T32(x4 + xr); \
|
||||
x4 = ROTL32(x4, 7); \
|
||||
xq = T32(x5 + xq); \
|
||||
x5 = ROTL32(x5, 7); \
|
||||
xp = T32(x6 + xp); \
|
||||
x6 = ROTL32(x6, 7); \
|
||||
xo = T32(x7 + xo); \
|
||||
x7 = ROTL32(x7, 7); \
|
||||
xv = T32(x0 + xv); \
|
||||
x0 = ROTL32(x0, 7); \
|
||||
xu = T32(x1 + xu); \
|
||||
x1 = ROTL32(x1, 7); \
|
||||
xt = T32(x2 + xt); \
|
||||
x2 = ROTL32(x2, 7); \
|
||||
xs = T32(x3 + xs); \
|
||||
x3 = ROTL32(x3, 7); \
|
||||
x4 ^= xj; \
|
||||
x5 ^= xi; \
|
||||
x6 ^= xh; \
|
||||
x7 ^= xg; \
|
||||
x0 ^= xn; \
|
||||
x1 ^= xm; \
|
||||
x2 ^= xl; \
|
||||
x3 ^= xk; \
|
||||
xc ^= xr; \
|
||||
xd ^= xq; \
|
||||
xe ^= xp; \
|
||||
xf ^= xo; \
|
||||
x8 ^= xv; \
|
||||
x9 ^= xu; \
|
||||
xa ^= xt; \
|
||||
xb ^= xs; \
|
||||
xh = T32(x4 + xh); \
|
||||
x4 = ROTL32(x4, 11); \
|
||||
xg = T32(x5 + xg); \
|
||||
x5 = ROTL32(x5, 11); \
|
||||
xj = T32(x6 + xj); \
|
||||
x6 = ROTL32(x6, 11); \
|
||||
xi = T32(x7 + xi); \
|
||||
x7 = ROTL32(x7, 11); \
|
||||
xl = T32(x0 + xl); \
|
||||
x0 = ROTL32(x0, 11); \
|
||||
xk = T32(x1 + xk); \
|
||||
x1 = ROTL32(x1, 11); \
|
||||
xn = T32(x2 + xn); \
|
||||
x2 = ROTL32(x2, 11); \
|
||||
xm = T32(x3 + xm); \
|
||||
x3 = ROTL32(x3, 11); \
|
||||
xp = T32(xc + xp); \
|
||||
xc = ROTL32(xc, 11); \
|
||||
xo = T32(xd + xo); \
|
||||
xd = ROTL32(xd, 11); \
|
||||
xr = T32(xe + xr); \
|
||||
xe = ROTL32(xe, 11); \
|
||||
xq = T32(xf + xq); \
|
||||
xf = ROTL32(xf, 11); \
|
||||
xt = T32(x8 + xt); \
|
||||
x8 = ROTL32(x8, 11); \
|
||||
xs = T32(x9 + xs); \
|
||||
x9 = ROTL32(x9, 11); \
|
||||
xv = T32(xa + xv); \
|
||||
xa = ROTL32(xa, 11); \
|
||||
xu = T32(xb + xu); \
|
||||
xb = ROTL32(xb, 11); \
|
||||
x0 ^= xh; \
|
||||
x1 ^= xg; \
|
||||
x2 ^= xj; \
|
||||
x3 ^= xi; \
|
||||
x4 ^= xl; \
|
||||
x5 ^= xk; \
|
||||
x6 ^= xn; \
|
||||
x7 ^= xm; \
|
||||
x8 ^= xp; \
|
||||
x9 ^= xo; \
|
||||
xa ^= xr; \
|
||||
xb ^= xq; \
|
||||
xc ^= xt; \
|
||||
xd ^= xs; \
|
||||
xe ^= xv; \
|
||||
xf ^= xu; \
|
||||
} while (0)
|
||||
|
||||
/*
|
||||
* There is no need to unroll all 16 rounds. The word-swapping permutation
|
||||
* is an involution, so we need to unroll an even number of rounds. On
|
||||
* "big" systems, unrolling 4 rounds yields about 97% of the speed
|
||||
* achieved with full unrolling; and it keeps the code more compact
|
||||
* for small architectures.
|
||||
*/
|
||||
|
||||
#if SPH_CUBEHASH_UNROLL == 2
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 8; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 4
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 4; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#elif SPH_CUBEHASH_UNROLL == 8
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
int j; \
|
||||
for (j = 0; j < 2; j ++) { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define SIXTEEN_ROUNDS do { \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
ROUND_EVEN; \
|
||||
ROUND_ODD; \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
static void
|
||||
cubehash_init(sph_cubehash_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
memcpy(sc->state, iv, sizeof sc->state);
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_core(sph_cubehash_context *sc, const void *data, size_t len)
|
||||
{
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data = (const unsigned char *)data + clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
INPUT_BLOCK;
|
||||
SIXTEEN_ROUNDS;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
cubehash_close(sph_cubehash_context *sc, unsigned ub, unsigned n,
|
||||
void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char *buf, *out;
|
||||
size_t ptr;
|
||||
unsigned z;
|
||||
int i;
|
||||
DECL_STATE
|
||||
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
memset(buf + ptr, 0, (sizeof sc->buf) - ptr);
|
||||
READ_STATE(sc);
|
||||
INPUT_BLOCK;
|
||||
for (i = 0; i < 11; i ++) {
|
||||
SIXTEEN_ROUNDS;
|
||||
if (i == 0)
|
||||
xv ^= SPH_C32(1);
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
out = dst;
|
||||
for (z = 0; z < out_size_w32; z ++)
|
||||
sph_enc32le(out + (z << 2), sc->state[z]);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash224_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 7);
|
||||
sph_cubehash224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash256_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 8);
|
||||
sph_cubehash256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash384_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 12);
|
||||
sph_cubehash384_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_init(void *cc)
|
||||
{
|
||||
cubehash_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
cubehash_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_close(void *cc, void *dst)
|
||||
{
|
||||
sph_cubehash512_addbits_and_close(cc, 0, 0, dst);
|
||||
}
|
||||
|
||||
/* see sph_cubehash.h */
|
||||
void
|
||||
sph_cubehash512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
cubehash_close(cc, ub, n, dst, 16);
|
||||
sph_cubehash512_init(cc);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
292
src/crypto/ghostrider/sph_cubehash.h
Normal file
292
src/crypto/ghostrider/sph_cubehash.h
Normal file
@@ -0,0 +1,292 @@
|
||||
/* $Id: sph_cubehash.h 180 2010-05-08 02:29:25Z tp $ */
|
||||
/**
|
||||
* CubeHash interface. CubeHash is a family of functions which differ by
|
||||
* their output size; this implementation defines CubeHash for output
|
||||
* sizes 224, 256, 384 and 512 bits, with the "standard parameters"
|
||||
* (CubeHash16/32 with the CubeHash specification notations).
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_cubehash.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_CUBEHASH_H__
|
||||
#define SPH_CUBEHASH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-224.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-256.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-384.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for CubeHash-512.
|
||||
*/
|
||||
#define SPH_SIZE_cubehash512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for CubeHash computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a CubeHash computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running CubeHash computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[32];
|
||||
#endif
|
||||
} sph_cubehash_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash224_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash256_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash384_context;
|
||||
|
||||
/**
|
||||
* Type for a CubeHash-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_cubehash_context sph_cubehash512_context;
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-224 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-224 context (pointer to a
|
||||
* <code>sph_cubehash224_context</code>)
|
||||
*/
|
||||
void sph_cubehash224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-256 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-256 context (pointer to a
|
||||
* <code>sph_cubehash256_context</code>)
|
||||
*/
|
||||
void sph_cubehash256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-384 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-384 context (pointer to a
|
||||
* <code>sph_cubehash384_context</code>)
|
||||
*/
|
||||
void sph_cubehash384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a CubeHash-512 context. This process performs no memory
|
||||
* allocation.
|
||||
*
|
||||
* @param cc the CubeHash-512 context (pointer to a
|
||||
* <code>sph_cubehash512_context</code>)
|
||||
*/
|
||||
void sph_cubehash512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_cubehash512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current CubeHash-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the CubeHash-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_cubehash512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1032
src/crypto/ghostrider/sph_echo.c
Normal file
1032
src/crypto/ghostrider/sph_echo.c
Normal file
File diff suppressed because it is too large
Load Diff
319
src/crypto/ghostrider/sph_echo.h
Normal file
319
src/crypto/ghostrider/sph_echo.h
Normal file
@@ -0,0 +1,319 @@
|
||||
/* $Id: sph_echo.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* ECHO interface. ECHO is a family of functions which differ by
|
||||
* their output size; this implementation defines ECHO for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_echo.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_ECHO_H__
|
||||
#define SPH_ECHO_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-224.
|
||||
*/
|
||||
#define SPH_SIZE_echo224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-256.
|
||||
*/
|
||||
#define SPH_SIZE_echo256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-384.
|
||||
*/
|
||||
#define SPH_SIZE_echo384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for ECHO-512.
|
||||
*/
|
||||
#define SPH_SIZE_echo512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-224
|
||||
* and ECHO-256.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[192]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[4][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[4][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for ECHO computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an ECHO computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for ECHO-384
|
||||
* and ECHO-512.
|
||||
*
|
||||
* The contents of this structure are private. A running ECHO computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
sph_u32 Vs[8][4];
|
||||
#if SPH_64
|
||||
sph_u64 Vb[8][2];
|
||||
#endif
|
||||
} u;
|
||||
sph_u32 C0, C1, C2, C3;
|
||||
#endif
|
||||
} sph_echo_big_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo224_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_echo_small_context sph_echo256_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo384_context;
|
||||
|
||||
/**
|
||||
* Type for a ECHO-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_echo_big_context sph_echo512_context;
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-224 context (pointer to a
|
||||
* <code>sph_echo224_context</code>)
|
||||
*/
|
||||
void sph_echo224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-256 context (pointer to a
|
||||
* <code>sph_echo256_context</code>)
|
||||
*/
|
||||
void sph_echo256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-384 context (pointer to a
|
||||
* <code>sph_echo384_context</code>)
|
||||
*/
|
||||
void sph_echo384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an ECHO-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the ECHO-512 context (pointer to a
|
||||
* <code>sph_echo512_context</code>)
|
||||
*/
|
||||
void sph_echo512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_echo512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current ECHO-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the ECHO-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_echo512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
1210
src/crypto/ghostrider/sph_fugue.c
Normal file
1210
src/crypto/ghostrider/sph_fugue.c
Normal file
File diff suppressed because it is too large
Load Diff
89
src/crypto/ghostrider/sph_fugue.h
Normal file
89
src/crypto/ghostrider/sph_fugue.h
Normal file
@@ -0,0 +1,89 @@
|
||||
#ifndef SPH_FUGUE_H__
|
||||
#define SPH_FUGUE_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#define SPH_SIZE_fugue224 224
|
||||
|
||||
#define SPH_SIZE_fugue256 256
|
||||
|
||||
#define SPH_SIZE_fugue384 384
|
||||
|
||||
#define SPH_SIZE_fugue512 512
|
||||
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
sph_u32 partial;
|
||||
unsigned partial_len;
|
||||
unsigned round_shift;
|
||||
sph_u32 S[36];
|
||||
#if SPH_64
|
||||
sph_u64 bit_count;
|
||||
#else
|
||||
sph_u32 bit_count_high, bit_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_fugue_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue224_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue256_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue384_context;
|
||||
|
||||
typedef sph_fugue_context sph_fugue512_context;
|
||||
|
||||
void sph_fugue224_init(void *cc);
|
||||
|
||||
void sph_fugue224(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue224_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue256_init(void *cc);
|
||||
|
||||
void sph_fugue256(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue256_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue384_init(void *cc);
|
||||
|
||||
void sph_fugue384(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue384_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
void sph_fugue512_init(void *cc);
|
||||
|
||||
void sph_fugue512(void *cc, const void *data, size_t len);
|
||||
|
||||
void sph_fugue512_close(void *cc, void *dst);
|
||||
|
||||
void sph_fugue512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#define sph_fugue512_full( cc, dst, data, len ) \
|
||||
do{ \
|
||||
sph_fugue512_init( cc ); \
|
||||
sph_fugue512( cc, data, len ); \
|
||||
sph_fugue512_close( cc, dst ); \
|
||||
}while(0)
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
3121
src/crypto/ghostrider/sph_groestl.c
Normal file
3121
src/crypto/ghostrider/sph_groestl.c
Normal file
File diff suppressed because it is too large
Load Diff
329
src/crypto/ghostrider/sph_groestl.h
Normal file
329
src/crypto/ghostrider/sph_groestl.h
Normal file
@@ -0,0 +1,329 @@
|
||||
/* $Id: sph_groestl.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Groestl interface. This code implements Groestl with the recommended
|
||||
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_groestl.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_GROESTL_H__
|
||||
#define SPH_GROESTL_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-224.
|
||||
*/
|
||||
#define SPH_SIZE_groestl224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-256.
|
||||
*/
|
||||
#define SPH_SIZE_groestl256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-384.
|
||||
*/
|
||||
#define SPH_SIZE_groestl384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Groestl-512.
|
||||
*/
|
||||
#define SPH_SIZE_groestl512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-224 and Groestl-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Groestl computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Groestl
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[8];
|
||||
#endif
|
||||
sph_u32 narrow[16];
|
||||
} state;
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_groestl_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-224 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_small_context sph_groestl224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-256 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_small_context sph_groestl256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-384 and Groestl-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Groestl computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Groestl
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[16];
|
||||
#endif
|
||||
sph_u32 narrow[32];
|
||||
} state;
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_groestl_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-384 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_big_context sph_groestl384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Groestl-512 computations. It is
|
||||
* identical to the common <code>sph_groestl_small_context</code>.
|
||||
*/
|
||||
typedef sph_groestl_big_context sph_groestl512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-224 context (pointer to a
|
||||
* <code>sph_groestl224_context</code>)
|
||||
*/
|
||||
void sph_groestl224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-256 context (pointer to a
|
||||
* <code>sph_groestl256_context</code>)
|
||||
*/
|
||||
void sph_groestl256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-384 context (pointer to a
|
||||
* <code>sph_groestl384_context</code>)
|
||||
*/
|
||||
void sph_groestl384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Groestl-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Groestl-512 context (pointer to a
|
||||
* <code>sph_groestl512_context</code>)
|
||||
*/
|
||||
void sph_groestl512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_groestl512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Groestl-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Groestl-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_groestl512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
867
src/crypto/ghostrider/sph_hamsi.c
Normal file
867
src/crypto/ghostrider/sph_hamsi.c
Normal file
@@ -0,0 +1,867 @@
|
||||
/* $Id: hamsi.c 251 2010-10-19 14:31:51Z tp $ */
|
||||
/*
|
||||
* Hamsi implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_hamsi.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_SMALL_FOOTPRINT_HAMSI 1
|
||||
#endif
|
||||
|
||||
/*
|
||||
* The SPH_HAMSI_EXPAND_* define how many input bits we handle in one
|
||||
* table lookup during message expansion (1 to 8, inclusive). If we note
|
||||
* w the number of bits per message word (w=32 for Hamsi-224/256, w=64
|
||||
* for Hamsi-384/512), r the size of a "row" in 32-bit words (r=8 for
|
||||
* Hamsi-224/256, r=16 for Hamsi-384/512), and n the expansion level,
|
||||
* then we will get t tables (where t=ceil(w/n)) of individual size
|
||||
* 2^n*r*4 (in bytes). The last table may be shorter (e.g. with w=32 and
|
||||
* n=5, there are 7 tables, but the last one uses only two bits on
|
||||
* input, not five).
|
||||
*
|
||||
* Also, we read t rows of r words from RAM. Words in a given row are
|
||||
* concatenated in RAM in that order, so most of the cost is about
|
||||
* reading the first row word; comparatively, cache misses are thus
|
||||
* less expensive with Hamsi-512 (r=16) than with Hamsi-256 (r=8).
|
||||
*
|
||||
* When n=1, tables are "special" in that we omit the first entry of
|
||||
* each table (which always contains 0), so that total table size is
|
||||
* halved.
|
||||
*
|
||||
* We thus have the following (size1 is the cumulative table size of
|
||||
* Hamsi-224/256; size2 is for Hamsi-384/512; similarly, t1 and t2
|
||||
* are for Hamsi-224/256 and Hamsi-384/512, respectively).
|
||||
*
|
||||
* n size1 size2 t1 t2
|
||||
* ---------------------------------------
|
||||
* 1 1024 4096 32 64
|
||||
* 2 2048 8192 16 32
|
||||
* 3 2688 10880 11 22
|
||||
* 4 4096 16384 8 16
|
||||
* 5 6272 25600 7 13
|
||||
* 6 10368 41984 6 11
|
||||
* 7 16896 73856 5 10
|
||||
* 8 32768 131072 4 8
|
||||
*
|
||||
* So there is a trade-off: a lower n makes the tables fit better in
|
||||
* L1 cache, but increases the number of memory accesses. The optimal
|
||||
* value depends on the amount of available L1 cache and the relative
|
||||
* impact of a cache miss.
|
||||
*
|
||||
* Experimentally, in ideal benchmark conditions (which are not necessarily
|
||||
* realistic with regards to L1 cache contention), it seems that n=8 is
|
||||
* the best value on "big" architectures (those with 32 kB or more of L1
|
||||
* cache), while n=4 is better on "small" architectures. This was tested
|
||||
* on an Intel Core2 Q6600 (both 32-bit and 64-bit mode), a PowerPC G3
|
||||
* (32 kB L1 cache, hence "big"), and a MIPS-compatible Broadcom BCM3302
|
||||
* (8 kB L1 cache).
|
||||
*
|
||||
* Note: with n=1, the 32 tables (actually implemented as one big table)
|
||||
* are read entirely and sequentially, regardless of the input data,
|
||||
* thus avoiding any data-dependent table access pattern.
|
||||
*/
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_SMALL
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
#define SPH_HAMSI_EXPAND_SMALL 4
|
||||
#else
|
||||
#define SPH_HAMSI_EXPAND_SMALL 8
|
||||
#endif
|
||||
#endif
|
||||
|
||||
#if !defined SPH_HAMSI_EXPAND_BIG
|
||||
#define SPH_HAMSI_EXPAND_BIG 8
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
#include "sph_hamsi_helper.c"
|
||||
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0xc3967a67), SPH_C32(0xc3bc6c20), SPH_C32(0x4bc3bcc3),
|
||||
SPH_C32(0xa7c3bc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
|
||||
/*
|
||||
* This version is the one used in the Hamsi submission package for
|
||||
* round 2 of the SHA-3 competition; the UTF-8 encoding is wrong and
|
||||
* shall soon be corrected in the official Hamsi specification.
|
||||
*
|
||||
static const sph_u32 IV224[] = {
|
||||
SPH_C32(0x3c967a67), SPH_C32(0x3cbc6c20), SPH_C32(0xb4c343c3),
|
||||
SPH_C32(0xa73cbc6b), SPH_C32(0x2c204b61), SPH_C32(0x74686f6c),
|
||||
SPH_C32(0x69656b65), SPH_C32(0x20556e69)
|
||||
};
|
||||
*/
|
||||
|
||||
static const sph_u32 IV256[] = {
|
||||
SPH_C32(0x76657273), SPH_C32(0x69746569), SPH_C32(0x74204c65),
|
||||
SPH_C32(0x7576656e), SPH_C32(0x2c204465), SPH_C32(0x70617274),
|
||||
SPH_C32(0x656d656e), SPH_C32(0x7420456c)
|
||||
};
|
||||
|
||||
static const sph_u32 IV384[] = {
|
||||
SPH_C32(0x656b7472), SPH_C32(0x6f746563), SPH_C32(0x686e6965),
|
||||
SPH_C32(0x6b2c2043), SPH_C32(0x6f6d7075), SPH_C32(0x74657220),
|
||||
SPH_C32(0x53656375), SPH_C32(0x72697479), SPH_C32(0x20616e64),
|
||||
SPH_C32(0x20496e64), SPH_C32(0x75737472), SPH_C32(0x69616c20),
|
||||
SPH_C32(0x43727970), SPH_C32(0x746f6772), SPH_C32(0x61706879),
|
||||
SPH_C32(0x2c204b61)
|
||||
};
|
||||
|
||||
static const sph_u32 IV512[] = {
|
||||
SPH_C32(0x73746565), SPH_C32(0x6c706172), SPH_C32(0x6b204172),
|
||||
SPH_C32(0x656e6265), SPH_C32(0x72672031), SPH_C32(0x302c2062),
|
||||
SPH_C32(0x75732032), SPH_C32(0x3434362c), SPH_C32(0x20422d33),
|
||||
SPH_C32(0x30303120), SPH_C32(0x4c657576), SPH_C32(0x656e2d48),
|
||||
SPH_C32(0x65766572), SPH_C32(0x6c65652c), SPH_C32(0x2042656c),
|
||||
SPH_C32(0x6769756d)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_n[] = {
|
||||
SPH_C32(0xff00f0f0), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xaaaacccc), SPH_C32(0xf0f0ff00), SPH_C32(0xf0f0cccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xccccff00), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xaaaaf0f0), SPH_C32(0xff00cccc), SPH_C32(0xccccf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccaaaa), SPH_C32(0xff00f0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xf0f0cccc), SPH_C32(0xf0f0ff00),
|
||||
SPH_C32(0xccccaaaa), SPH_C32(0xf0f0ff00), SPH_C32(0xaaaacccc),
|
||||
SPH_C32(0xaaaaff00), SPH_C32(0xf0f0cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xccccff00), SPH_C32(0xff00cccc), SPH_C32(0xaaaaf0f0),
|
||||
SPH_C32(0xff00aaaa), SPH_C32(0xccccf0f0)
|
||||
};
|
||||
|
||||
static const sph_u32 alpha_f[] = {
|
||||
SPH_C32(0xcaf9639c), SPH_C32(0x0ff0f9c0), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0xf9c00ff0), SPH_C32(0x639ccaf9), SPH_C32(0x639c0ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x0ff0caf9), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xf9c0639c), SPH_C32(0xcaf90ff0), SPH_C32(0x0ff0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0f9c0), SPH_C32(0xcaf9639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x639c0ff0), SPH_C32(0x639ccaf9),
|
||||
SPH_C32(0x0ff0f9c0), SPH_C32(0x639ccaf9), SPH_C32(0xf9c00ff0),
|
||||
SPH_C32(0xf9c0caf9), SPH_C32(0x639c0ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0x0ff0caf9), SPH_C32(0xcaf90ff0), SPH_C32(0xf9c0639c),
|
||||
SPH_C32(0xcaf9f9c0), SPH_C32(0x0ff0639c)
|
||||
};
|
||||
|
||||
#define DECL_STATE_SMALL \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7;
|
||||
|
||||
#define READ_STATE_SMALL(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_SMALL(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
} while (0)
|
||||
|
||||
#define s0 m0
|
||||
#define s1 m1
|
||||
#define s2 c0
|
||||
#define s3 c1
|
||||
#define s4 c2
|
||||
#define s5 c3
|
||||
#define s6 m2
|
||||
#define s7 m3
|
||||
#define s8 m4
|
||||
#define s9 m5
|
||||
#define sA c4
|
||||
#define sB c5
|
||||
#define sC c6
|
||||
#define sD c7
|
||||
#define sE m6
|
||||
#define sF m7
|
||||
|
||||
#define SBOX(a, b, c, d) do { \
|
||||
sph_u32 t; \
|
||||
t = (a); \
|
||||
(a) &= (c); \
|
||||
(a) ^= (d); \
|
||||
(c) ^= (b); \
|
||||
(c) ^= (a); \
|
||||
(d) |= t; \
|
||||
(d) ^= (b); \
|
||||
t ^= (c); \
|
||||
(b) = (d); \
|
||||
(d) |= t; \
|
||||
(d) ^= (a); \
|
||||
(a) &= (b); \
|
||||
t ^= (a); \
|
||||
(b) ^= (d); \
|
||||
(b) ^= t; \
|
||||
(a) = (c); \
|
||||
(c) = (b); \
|
||||
(b) = (d); \
|
||||
(d) = SPH_T32(~t); \
|
||||
} while (0)
|
||||
|
||||
#define L(a, b, c, d) do { \
|
||||
(a) = SPH_ROTL32(a, 13); \
|
||||
(c) = SPH_ROTL32(c, 3); \
|
||||
(b) ^= (a) ^ (c); \
|
||||
(d) ^= (c) ^ SPH_T32((a) << 3); \
|
||||
(b) = SPH_ROTL32(b, 1); \
|
||||
(d) = SPH_ROTL32(d, 7); \
|
||||
(a) ^= (b) ^ (d); \
|
||||
(c) ^= (d) ^ SPH_T32((b) << 7); \
|
||||
(a) = SPH_ROTL32(a, 5); \
|
||||
(c) = SPH_ROTL32(c, 22); \
|
||||
} while (0)
|
||||
|
||||
#define ROUND_SMALL(rc, alpha) do { \
|
||||
s0 ^= alpha[0x00]; \
|
||||
s1 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s2 ^= alpha[0x02]; \
|
||||
s3 ^= alpha[0x03]; \
|
||||
s4 ^= alpha[0x08]; \
|
||||
s5 ^= alpha[0x09]; \
|
||||
s6 ^= alpha[0x0A]; \
|
||||
s7 ^= alpha[0x0B]; \
|
||||
s8 ^= alpha[0x10]; \
|
||||
s9 ^= alpha[0x11]; \
|
||||
sA ^= alpha[0x12]; \
|
||||
sB ^= alpha[0x13]; \
|
||||
sC ^= alpha[0x18]; \
|
||||
sD ^= alpha[0x19]; \
|
||||
sE ^= alpha[0x1A]; \
|
||||
sF ^= alpha[0x1B]; \
|
||||
SBOX(s0, s4, s8, sC); \
|
||||
SBOX(s1, s5, s9, sD); \
|
||||
SBOX(s2, s6, sA, sE); \
|
||||
SBOX(s3, s7, sB, sF); \
|
||||
L(s0, s5, sA, sF); \
|
||||
L(s1, s6, sB, sC); \
|
||||
L(s2, s7, s8, sD); \
|
||||
L(s3, s4, s9, sE); \
|
||||
} while (0)
|
||||
|
||||
#define P_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_n); \
|
||||
ROUND_SMALL(1, alpha_n); \
|
||||
ROUND_SMALL(2, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_SMALL do { \
|
||||
ROUND_SMALL(0, alpha_f); \
|
||||
ROUND_SMALL(1, alpha_f); \
|
||||
ROUND_SMALL(2, alpha_f); \
|
||||
ROUND_SMALL(3, alpha_f); \
|
||||
ROUND_SMALL(4, alpha_f); \
|
||||
ROUND_SMALL(5, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#define T_SMALL do { \
|
||||
/* order is important */ \
|
||||
c7 = (sc->h[7] ^= sB); \
|
||||
c6 = (sc->h[6] ^= sA); \
|
||||
c5 = (sc->h[5] ^= s9); \
|
||||
c4 = (sc->h[4] ^= s8); \
|
||||
c3 = (sc->h[3] ^= s3); \
|
||||
c2 = (sc->h[2] ^= s2); \
|
||||
c1 = (sc->h[1] ^= s1); \
|
||||
c0 = (sc->h[0] ^= s0); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_small(sph_hamsi_small_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_SMALL
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 5;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 5);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 14);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_SMALL(sc);
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
|
||||
INPUT_SMALL;
|
||||
P_SMALL;
|
||||
T_SMALL;
|
||||
buf += 4;
|
||||
}
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_final(sph_hamsi_small_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
DECL_STATE_SMALL
|
||||
|
||||
READ_STATE_SMALL(sc);
|
||||
INPUT_SMALL;
|
||||
PF_SMALL;
|
||||
T_SMALL;
|
||||
WRITE_STATE_SMALL(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_init(sph_hamsi_small_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_core(sph_hamsi_small_context *sc, const void *data, size_t len)
|
||||
{
|
||||
if (sc->partial_len != 0) {
|
||||
size_t mlen;
|
||||
|
||||
mlen = 4 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_small(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_small(sc, data, (len >> 2));
|
||||
data = (const unsigned char *)data + (len & ~(size_t)3);
|
||||
len &= (size_t)3;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_small_close(sph_hamsi_small_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[12];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
memcpy(pad, sc->partial, ptr);
|
||||
#if SPH_64
|
||||
sph_enc64be(pad + 4, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad + 4, sc->count_high);
|
||||
sph_enc32be(pad + 8, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
pad[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 4)
|
||||
pad[ptr ++] = 0;
|
||||
hamsi_small(sc, pad, 2);
|
||||
hamsi_small_final(sc, pad + 8);
|
||||
out = dst;
|
||||
for (u = 0; u < out_size_w32; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
|
||||
#define DECL_STATE_BIG \
|
||||
sph_u32 c0, c1, c2, c3, c4, c5, c6, c7; \
|
||||
sph_u32 c8, c9, cA, cB, cC, cD, cE, cF;
|
||||
|
||||
#define READ_STATE_BIG(sc) do { \
|
||||
c0 = sc->h[0x0]; \
|
||||
c1 = sc->h[0x1]; \
|
||||
c2 = sc->h[0x2]; \
|
||||
c3 = sc->h[0x3]; \
|
||||
c4 = sc->h[0x4]; \
|
||||
c5 = sc->h[0x5]; \
|
||||
c6 = sc->h[0x6]; \
|
||||
c7 = sc->h[0x7]; \
|
||||
c8 = sc->h[0x8]; \
|
||||
c9 = sc->h[0x9]; \
|
||||
cA = sc->h[0xA]; \
|
||||
cB = sc->h[0xB]; \
|
||||
cC = sc->h[0xC]; \
|
||||
cD = sc->h[0xD]; \
|
||||
cE = sc->h[0xE]; \
|
||||
cF = sc->h[0xF]; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE_BIG(sc) do { \
|
||||
sc->h[0x0] = c0; \
|
||||
sc->h[0x1] = c1; \
|
||||
sc->h[0x2] = c2; \
|
||||
sc->h[0x3] = c3; \
|
||||
sc->h[0x4] = c4; \
|
||||
sc->h[0x5] = c5; \
|
||||
sc->h[0x6] = c6; \
|
||||
sc->h[0x7] = c7; \
|
||||
sc->h[0x8] = c8; \
|
||||
sc->h[0x9] = c9; \
|
||||
sc->h[0xA] = cA; \
|
||||
sc->h[0xB] = cB; \
|
||||
sc->h[0xC] = cC; \
|
||||
sc->h[0xD] = cD; \
|
||||
sc->h[0xE] = cE; \
|
||||
sc->h[0xF] = cF; \
|
||||
} while (0)
|
||||
|
||||
#define s00 m0
|
||||
#define s01 m1
|
||||
#define s02 c0
|
||||
#define s03 c1
|
||||
#define s04 m2
|
||||
#define s05 m3
|
||||
#define s06 c2
|
||||
#define s07 c3
|
||||
#define s08 c4
|
||||
#define s09 c5
|
||||
#define s0A m4
|
||||
#define s0B m5
|
||||
#define s0C c6
|
||||
#define s0D c7
|
||||
#define s0E m6
|
||||
#define s0F m7
|
||||
#define s10 m8
|
||||
#define s11 m9
|
||||
#define s12 c8
|
||||
#define s13 c9
|
||||
#define s14 mA
|
||||
#define s15 mB
|
||||
#define s16 cA
|
||||
#define s17 cB
|
||||
#define s18 cC
|
||||
#define s19 cD
|
||||
#define s1A mC
|
||||
#define s1B mD
|
||||
#define s1C cE
|
||||
#define s1D cF
|
||||
#define s1E mE
|
||||
#define s1F mF
|
||||
|
||||
#define ROUND_BIG(rc, alpha) do { \
|
||||
s00 ^= alpha[0x00]; \
|
||||
s01 ^= alpha[0x01] ^ (sph_u32)(rc); \
|
||||
s02 ^= alpha[0x02]; \
|
||||
s03 ^= alpha[0x03]; \
|
||||
s04 ^= alpha[0x04]; \
|
||||
s05 ^= alpha[0x05]; \
|
||||
s06 ^= alpha[0x06]; \
|
||||
s07 ^= alpha[0x07]; \
|
||||
s08 ^= alpha[0x08]; \
|
||||
s09 ^= alpha[0x09]; \
|
||||
s0A ^= alpha[0x0A]; \
|
||||
s0B ^= alpha[0x0B]; \
|
||||
s0C ^= alpha[0x0C]; \
|
||||
s0D ^= alpha[0x0D]; \
|
||||
s0E ^= alpha[0x0E]; \
|
||||
s0F ^= alpha[0x0F]; \
|
||||
s10 ^= alpha[0x10]; \
|
||||
s11 ^= alpha[0x11]; \
|
||||
s12 ^= alpha[0x12]; \
|
||||
s13 ^= alpha[0x13]; \
|
||||
s14 ^= alpha[0x14]; \
|
||||
s15 ^= alpha[0x15]; \
|
||||
s16 ^= alpha[0x16]; \
|
||||
s17 ^= alpha[0x17]; \
|
||||
s18 ^= alpha[0x18]; \
|
||||
s19 ^= alpha[0x19]; \
|
||||
s1A ^= alpha[0x1A]; \
|
||||
s1B ^= alpha[0x1B]; \
|
||||
s1C ^= alpha[0x1C]; \
|
||||
s1D ^= alpha[0x1D]; \
|
||||
s1E ^= alpha[0x1E]; \
|
||||
s1F ^= alpha[0x1F]; \
|
||||
SBOX(s00, s08, s10, s18); \
|
||||
SBOX(s01, s09, s11, s19); \
|
||||
SBOX(s02, s0A, s12, s1A); \
|
||||
SBOX(s03, s0B, s13, s1B); \
|
||||
SBOX(s04, s0C, s14, s1C); \
|
||||
SBOX(s05, s0D, s15, s1D); \
|
||||
SBOX(s06, s0E, s16, s1E); \
|
||||
SBOX(s07, s0F, s17, s1F); \
|
||||
L(s00, s09, s12, s1B); \
|
||||
L(s01, s0A, s13, s1C); \
|
||||
L(s02, s0B, s14, s1D); \
|
||||
L(s03, s0C, s15, s1E); \
|
||||
L(s04, s0D, s16, s1F); \
|
||||
L(s05, s0E, s17, s18); \
|
||||
L(s06, s0F, s10, s19); \
|
||||
L(s07, s08, s11, s1A); \
|
||||
L(s00, s02, s05, s07); \
|
||||
L(s10, s13, s15, s16); \
|
||||
L(s09, s0B, s0C, s0E); \
|
||||
L(s19, s1A, s1C, s1F); \
|
||||
} while (0)
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_HAMSI
|
||||
|
||||
#define P_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 6; r ++) \
|
||||
ROUND_BIG(r, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
unsigned r; \
|
||||
for (r = 0; r < 12; r ++) \
|
||||
ROUND_BIG(r, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#else
|
||||
|
||||
#define P_BIG do { \
|
||||
ROUND_BIG(0, alpha_n); \
|
||||
ROUND_BIG(1, alpha_n); \
|
||||
ROUND_BIG(2, alpha_n); \
|
||||
ROUND_BIG(3, alpha_n); \
|
||||
ROUND_BIG(4, alpha_n); \
|
||||
ROUND_BIG(5, alpha_n); \
|
||||
} while (0)
|
||||
|
||||
#define PF_BIG do { \
|
||||
ROUND_BIG(0, alpha_f); \
|
||||
ROUND_BIG(1, alpha_f); \
|
||||
ROUND_BIG(2, alpha_f); \
|
||||
ROUND_BIG(3, alpha_f); \
|
||||
ROUND_BIG(4, alpha_f); \
|
||||
ROUND_BIG(5, alpha_f); \
|
||||
ROUND_BIG(6, alpha_f); \
|
||||
ROUND_BIG(7, alpha_f); \
|
||||
ROUND_BIG(8, alpha_f); \
|
||||
ROUND_BIG(9, alpha_f); \
|
||||
ROUND_BIG(10, alpha_f); \
|
||||
ROUND_BIG(11, alpha_f); \
|
||||
} while (0)
|
||||
|
||||
#endif
|
||||
|
||||
#define T_BIG do { \
|
||||
/* order is important */ \
|
||||
cF = (sc->h[0xF] ^= s17); \
|
||||
cE = (sc->h[0xE] ^= s16); \
|
||||
cD = (sc->h[0xD] ^= s15); \
|
||||
cC = (sc->h[0xC] ^= s14); \
|
||||
cB = (sc->h[0xB] ^= s13); \
|
||||
cA = (sc->h[0xA] ^= s12); \
|
||||
c9 = (sc->h[0x9] ^= s11); \
|
||||
c8 = (sc->h[0x8] ^= s10); \
|
||||
c7 = (sc->h[0x7] ^= s07); \
|
||||
c6 = (sc->h[0x6] ^= s06); \
|
||||
c5 = (sc->h[0x5] ^= s05); \
|
||||
c4 = (sc->h[0x4] ^= s04); \
|
||||
c3 = (sc->h[0x3] ^= s03); \
|
||||
c2 = (sc->h[0x2] ^= s02); \
|
||||
c1 = (sc->h[0x1] ^= s01); \
|
||||
c0 = (sc->h[0x0] ^= s00); \
|
||||
} while (0)
|
||||
|
||||
static void
|
||||
hamsi_big(sph_hamsi_big_context *sc, const unsigned char *buf, size_t num)
|
||||
{
|
||||
DECL_STATE_BIG
|
||||
#if !SPH_64
|
||||
sph_u32 tmp;
|
||||
#endif
|
||||
|
||||
#if SPH_64
|
||||
sc->count += (sph_u64)num << 6;
|
||||
#else
|
||||
tmp = SPH_T32((sph_u32)num << 6);
|
||||
sc->count_low = SPH_T32(sc->count_low + tmp);
|
||||
sc->count_high += (sph_u32)((num >> 13) >> 13);
|
||||
if (sc->count_low < tmp)
|
||||
sc->count_high ++;
|
||||
#endif
|
||||
READ_STATE_BIG(sc);
|
||||
while (num -- > 0) {
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
|
||||
INPUT_BIG;
|
||||
P_BIG;
|
||||
T_BIG;
|
||||
buf += 8;
|
||||
}
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_final(sph_hamsi_big_context *sc, const unsigned char *buf)
|
||||
{
|
||||
sph_u32 m0, m1, m2, m3, m4, m5, m6, m7;
|
||||
sph_u32 m8, m9, mA, mB, mC, mD, mE, mF;
|
||||
DECL_STATE_BIG
|
||||
|
||||
READ_STATE_BIG(sc);
|
||||
INPUT_BIG;
|
||||
PF_BIG;
|
||||
T_BIG;
|
||||
WRITE_STATE_BIG(sc);
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_init(sph_hamsi_big_context *sc, const sph_u32 *iv)
|
||||
{
|
||||
sc->partial_len = 0;
|
||||
memcpy(sc->h, iv, sizeof sc->h);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_core(sph_hamsi_big_context *sc, const void *data, size_t len)
|
||||
{
|
||||
if (sc->partial_len != 0) {
|
||||
size_t mlen;
|
||||
|
||||
mlen = 8 - sc->partial_len;
|
||||
if (len < mlen) {
|
||||
memcpy(sc->partial + sc->partial_len, data, len);
|
||||
sc->partial_len += len;
|
||||
return;
|
||||
} else {
|
||||
memcpy(sc->partial + sc->partial_len, data, mlen);
|
||||
len -= mlen;
|
||||
data = (const unsigned char *)data + mlen;
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
sc->partial_len = 0;
|
||||
}
|
||||
}
|
||||
|
||||
hamsi_big(sc, data, (len >> 3));
|
||||
data = (const unsigned char *)data + (len & ~(size_t)7);
|
||||
len &= (size_t)7;
|
||||
memcpy(sc->partial, data, len);
|
||||
sc->partial_len = len;
|
||||
}
|
||||
|
||||
static void
|
||||
hamsi_big_close(sph_hamsi_big_context *sc,
|
||||
unsigned ub, unsigned n, void *dst, size_t out_size_w32)
|
||||
{
|
||||
unsigned char pad[8];
|
||||
size_t ptr, u;
|
||||
unsigned z;
|
||||
unsigned char *out;
|
||||
|
||||
ptr = sc->partial_len;
|
||||
#if SPH_64
|
||||
sph_enc64be(pad, sc->count + (ptr << 3) + n);
|
||||
#else
|
||||
sph_enc32be(pad, sc->count_high);
|
||||
sph_enc32be(pad + 4, sc->count_low + (ptr << 3) + n);
|
||||
#endif
|
||||
z = 0x80 >> n;
|
||||
sc->partial[ptr ++] = ((ub & -z) | z) & 0xFF;
|
||||
while (ptr < 8)
|
||||
sc->partial[ptr ++] = 0;
|
||||
hamsi_big(sc, sc->partial, 1);
|
||||
hamsi_big_final(sc, pad);
|
||||
out = dst;
|
||||
if (out_size_w32 == 12) {
|
||||
sph_enc32be(out + 0, sc->h[ 0]);
|
||||
sph_enc32be(out + 4, sc->h[ 1]);
|
||||
sph_enc32be(out + 8, sc->h[ 3]);
|
||||
sph_enc32be(out + 12, sc->h[ 4]);
|
||||
sph_enc32be(out + 16, sc->h[ 5]);
|
||||
sph_enc32be(out + 20, sc->h[ 6]);
|
||||
sph_enc32be(out + 24, sc->h[ 8]);
|
||||
sph_enc32be(out + 28, sc->h[ 9]);
|
||||
sph_enc32be(out + 32, sc->h[10]);
|
||||
sph_enc32be(out + 36, sc->h[12]);
|
||||
sph_enc32be(out + 40, sc->h[13]);
|
||||
sph_enc32be(out + 44, sc->h[15]);
|
||||
} else {
|
||||
for (u = 0; u < 16; u ++)
|
||||
sph_enc32be(out + (u << 2), sc->h[u]);
|
||||
}
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 7);
|
||||
// hamsi_small_init(cc, IV224);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_init(void *cc)
|
||||
{
|
||||
hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_small_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, 0, 0, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_small_close(cc, ub, n, dst, 8);
|
||||
// hamsi_small_init(cc, IV256);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 12);
|
||||
// hamsi_big_init(cc, IV384);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_init(void *cc)
|
||||
{
|
||||
hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
hamsi_big_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_close(void *cc, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, 0, 0, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
/* see sph_hamsi.h */
|
||||
void
|
||||
sph_hamsi512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
hamsi_big_close(cc, ub, n, dst, 16);
|
||||
// hamsi_big_init(cc, IV512);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
321
src/crypto/ghostrider/sph_hamsi.h
Normal file
321
src/crypto/ghostrider/sph_hamsi.h
Normal file
@@ -0,0 +1,321 @@
|
||||
/* $Id: sph_hamsi.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Hamsi interface. This code implements Hamsi with the recommended
|
||||
* parameters for SHA-3, with outputs of 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_hamsi.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_HAMSI_H__
|
||||
#define SPH_HAMSI_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-224.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-256.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-384.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Hamsi-512.
|
||||
*/
|
||||
#define SPH_SIZE_hamsi512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-224 and Hamsi-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Hamsi computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Hamsi
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char partial[4];
|
||||
size_t partial_len;
|
||||
sph_u32 h[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_hamsi_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-224 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_small_context sph_hamsi224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-256 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_small_context sph_hamsi256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-384 and Hamsi-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a Hamsi computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Hamsi
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char partial[8];
|
||||
size_t partial_len;
|
||||
sph_u32 h[16];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_hamsi_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-384 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_big_context sph_hamsi384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Hamsi-512 computations. It is
|
||||
* identical to the common <code>sph_hamsi_small_context</code>.
|
||||
*/
|
||||
typedef sph_hamsi_big_context sph_hamsi512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-224 context (pointer to a
|
||||
* <code>sph_hamsi224_context</code>)
|
||||
*/
|
||||
void sph_hamsi224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-256 context (pointer to a
|
||||
* <code>sph_hamsi256_context</code>)
|
||||
*/
|
||||
void sph_hamsi256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-384 context (pointer to a
|
||||
* <code>sph_hamsi384_context</code>)
|
||||
*/
|
||||
void sph_hamsi384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Hamsi-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Hamsi-512 context (pointer to a
|
||||
* <code>sph_hamsi512_context</code>)
|
||||
*/
|
||||
void sph_hamsi512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_hamsi512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Hamsi-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Hamsi-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_hamsi512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
39648
src/crypto/ghostrider/sph_hamsi_helper.c
Normal file
39648
src/crypto/ghostrider/sph_hamsi_helper.c
Normal file
File diff suppressed because it is too large
Load Diff
1040
src/crypto/ghostrider/sph_jh.c
Normal file
1040
src/crypto/ghostrider/sph_jh.c
Normal file
File diff suppressed because it is too large
Load Diff
298
src/crypto/ghostrider/sph_jh.h
Normal file
298
src/crypto/ghostrider/sph_jh.h
Normal file
@@ -0,0 +1,298 @@
|
||||
/* $Id: sph_jh.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* JH interface. JH is a family of functions which differ by
|
||||
* their output size; this implementation defines JH for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_jh.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_JH_H__
|
||||
#define SPH_JH_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-224.
|
||||
*/
|
||||
#define SPH_SIZE_jh224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-256.
|
||||
*/
|
||||
#define SPH_SIZE_jh256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-384.
|
||||
*/
|
||||
#define SPH_SIZE_jh384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for JH-512.
|
||||
*/
|
||||
#define SPH_SIZE_jh512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for JH computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a JH computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running JH computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[16];
|
||||
#endif
|
||||
sph_u32 narrow[32];
|
||||
} H;
|
||||
#if SPH_64
|
||||
sph_u64 block_count;
|
||||
#else
|
||||
sph_u32 block_count_high, block_count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_jh_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh224_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh256_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh384_context;
|
||||
|
||||
/**
|
||||
* Type for a JH-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_jh_context sph_jh512_context;
|
||||
|
||||
/**
|
||||
* Initialize a JH-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-224 context (pointer to a
|
||||
* <code>sph_jh224_context</code>)
|
||||
*/
|
||||
void sph_jh224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-256 context (pointer to a
|
||||
* <code>sph_jh256_context</code>)
|
||||
*/
|
||||
void sph_jh256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-384 context (pointer to a
|
||||
* <code>sph_jh384_context</code>)
|
||||
*/
|
||||
void sph_jh384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a JH-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the JH-512 context (pointer to a
|
||||
* <code>sph_jh512_context</code>)
|
||||
*/
|
||||
void sph_jh512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_jh512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current JH-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the JH-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_jh512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1868
src/crypto/ghostrider/sph_keccak.c
Normal file
1868
src/crypto/ghostrider/sph_keccak.c
Normal file
File diff suppressed because it is too large
Load Diff
296
src/crypto/ghostrider/sph_keccak.h
Normal file
296
src/crypto/ghostrider/sph_keccak.h
Normal file
@@ -0,0 +1,296 @@
|
||||
/* $Id: sph_keccak.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* Keccak interface. This is the interface for Keccak with the
|
||||
* recommended parameters for SHA-3, with output lengths 224, 256,
|
||||
* 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_keccak.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_KECCAK_H__
|
||||
#define SPH_KECCAK_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
// Taken from keccak-gate.h
|
||||
extern int hard_coded_eb;
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-224.
|
||||
*/
|
||||
#define SPH_SIZE_keccak224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-256.
|
||||
*/
|
||||
#define SPH_SIZE_keccak256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-384.
|
||||
*/
|
||||
#define SPH_SIZE_keccak384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Keccak-512.
|
||||
*/
|
||||
#define SPH_SIZE_keccak512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Keccak computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once a
|
||||
* Keccak computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Keccak computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[144]; /* first field, for alignment */
|
||||
size_t ptr, lim;
|
||||
union {
|
||||
#if SPH_64
|
||||
sph_u64 wide[25];
|
||||
#endif
|
||||
sph_u32 narrow[50];
|
||||
} u;
|
||||
#endif
|
||||
} sph_keccak_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak224_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak256_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak384_context;
|
||||
|
||||
/**
|
||||
* Type for a Keccak-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_keccak_context sph_keccak512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-224 context (pointer to a
|
||||
* <code>sph_keccak224_context</code>)
|
||||
*/
|
||||
void sph_keccak224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-256 context (pointer to a
|
||||
* <code>sph_keccak256_context</code>)
|
||||
*/
|
||||
void sph_keccak256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-384 context (pointer to a
|
||||
* <code>sph_keccak384_context</code>)
|
||||
*/
|
||||
void sph_keccak384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Keccak-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Keccak-512 context (pointer to a
|
||||
* <code>sph_keccak512_context</code>)
|
||||
*/
|
||||
void sph_keccak512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_keccak512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Keccak-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Keccak-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_keccak512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1426
src/crypto/ghostrider/sph_luffa.c
Normal file
1426
src/crypto/ghostrider/sph_luffa.c
Normal file
File diff suppressed because it is too large
Load Diff
296
src/crypto/ghostrider/sph_luffa.h
Normal file
296
src/crypto/ghostrider/sph_luffa.h
Normal file
@@ -0,0 +1,296 @@
|
||||
/* $Id: sph_luffa.h 154 2010-04-26 17:00:24Z tp $ */
|
||||
/**
|
||||
* Luffa interface. Luffa is a family of functions which differ by
|
||||
* their output size; this implementation defines Luffa for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_luffa.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_LUFFA_H__
|
||||
#define SPH_LUFFA_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-224.
|
||||
*/
|
||||
#define SPH_SIZE_luffa224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-256.
|
||||
*/
|
||||
#define SPH_SIZE_luffa256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-384.
|
||||
*/
|
||||
#define SPH_SIZE_luffa384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Luffa-512.
|
||||
*/
|
||||
#define SPH_SIZE_luffa512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-224 computations: it contains
|
||||
* the intermediate values and some data from the last entered block.
|
||||
* Once a Luffa computation has been performed, the context can be
|
||||
* reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Luffa
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[3][8];
|
||||
#endif
|
||||
} sph_luffa224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-256 computations. It is
|
||||
* identical to <code>sph_luffa224_context</code>.
|
||||
*/
|
||||
typedef sph_luffa224_context sph_luffa256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-384 computations.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[4][8];
|
||||
#endif
|
||||
} sph_luffa384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for Luffa-512 computations.
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[32]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 V[5][8];
|
||||
#endif
|
||||
} sph_luffa512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-224 context (pointer to a
|
||||
* <code>sph_luffa224_context</code>)
|
||||
*/
|
||||
void sph_luffa224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-256 context (pointer to a
|
||||
* <code>sph_luffa256_context</code>)
|
||||
*/
|
||||
void sph_luffa256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-384 context (pointer to a
|
||||
* <code>sph_luffa384_context</code>)
|
||||
*/
|
||||
void sph_luffa384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Luffa-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Luffa-512 context (pointer to a
|
||||
* <code>sph_luffa512_context</code>)
|
||||
*/
|
||||
void sph_luffa512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_luffa512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Luffa-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Luffa-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_luffa512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
793
src/crypto/ghostrider/sph_sha2.c
Normal file
793
src/crypto/ghostrider/sph_sha2.c
Normal file
@@ -0,0 +1,793 @@
|
||||
/* $Id: sha2.c 227 2010-06-16 17:28:38Z tp $ */
|
||||
/*
|
||||
* SHA-224 / SHA-256 implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_sha2.h"
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT && !defined SPH_SMALL_FOOTPRINT_SHA2
|
||||
#define SPH_SMALL_FOOTPRINT_SHA2 1
|
||||
#endif
|
||||
|
||||
#define CH(X, Y, Z) ((((Y) ^ (Z)) & (X)) ^ (Z))
|
||||
//#define MAJ(X, Y, Z) (((Y) & (Z)) | (((Y) | (Z)) & (X)))
|
||||
#define MAJ( X, Y, Z ) ( Y ^ ( ( X_xor_Y = X ^ Y ) & ( Y_xor_Z ) ) )
|
||||
#define ROTR SPH_ROTR32
|
||||
|
||||
#define BSG2_0(x) (ROTR(x, 2) ^ ROTR(x, 13) ^ ROTR(x, 22))
|
||||
#define BSG2_1(x) (ROTR(x, 6) ^ ROTR(x, 11) ^ ROTR(x, 25))
|
||||
#define SSG2_0(x) (ROTR(x, 7) ^ ROTR(x, 18) ^ SPH_T32((x) >> 3))
|
||||
#define SSG2_1(x) (ROTR(x, 17) ^ ROTR(x, 19) ^ SPH_T32((x) >> 10))
|
||||
|
||||
static const sph_u32 H224[8] = {
|
||||
SPH_C32(0xC1059ED8), SPH_C32(0x367CD507), SPH_C32(0x3070DD17),
|
||||
SPH_C32(0xF70E5939), SPH_C32(0xFFC00B31), SPH_C32(0x68581511),
|
||||
SPH_C32(0x64F98FA7), SPH_C32(0xBEFA4FA4)
|
||||
};
|
||||
|
||||
static const sph_u32 H256[8] = {
|
||||
SPH_C32(0x6A09E667), SPH_C32(0xBB67AE85), SPH_C32(0x3C6EF372),
|
||||
SPH_C32(0xA54FF53A), SPH_C32(0x510E527F), SPH_C32(0x9B05688C),
|
||||
SPH_C32(0x1F83D9AB), SPH_C32(0x5BE0CD19)
|
||||
};
|
||||
|
||||
/*
|
||||
* The SHA2_ROUND_BODY defines the body for a SHA-224 / SHA-256
|
||||
* compression function implementation. The "in" parameter should
|
||||
* evaluate, when applied to a numerical input parameter from 0 to 15,
|
||||
* to an expression which yields the corresponding input block. The "r"
|
||||
* parameter should evaluate to an array or pointer expression
|
||||
* designating the array of 8 words which contains the input and output
|
||||
* of the compression function.
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
static const sph_u32 K[64] = {
|
||||
SPH_C32(0x428A2F98), SPH_C32(0x71374491),
|
||||
SPH_C32(0xB5C0FBCF), SPH_C32(0xE9B5DBA5),
|
||||
SPH_C32(0x3956C25B), SPH_C32(0x59F111F1),
|
||||
SPH_C32(0x923F82A4), SPH_C32(0xAB1C5ED5),
|
||||
SPH_C32(0xD807AA98), SPH_C32(0x12835B01),
|
||||
SPH_C32(0x243185BE), SPH_C32(0x550C7DC3),
|
||||
SPH_C32(0x72BE5D74), SPH_C32(0x80DEB1FE),
|
||||
SPH_C32(0x9BDC06A7), SPH_C32(0xC19BF174),
|
||||
SPH_C32(0xE49B69C1), SPH_C32(0xEFBE4786),
|
||||
SPH_C32(0x0FC19DC6), SPH_C32(0x240CA1CC),
|
||||
SPH_C32(0x2DE92C6F), SPH_C32(0x4A7484AA),
|
||||
SPH_C32(0x5CB0A9DC), SPH_C32(0x76F988DA),
|
||||
SPH_C32(0x983E5152), SPH_C32(0xA831C66D),
|
||||
SPH_C32(0xB00327C8), SPH_C32(0xBF597FC7),
|
||||
SPH_C32(0xC6E00BF3), SPH_C32(0xD5A79147),
|
||||
SPH_C32(0x06CA6351), SPH_C32(0x14292967),
|
||||
SPH_C32(0x27B70A85), SPH_C32(0x2E1B2138),
|
||||
SPH_C32(0x4D2C6DFC), SPH_C32(0x53380D13),
|
||||
SPH_C32(0x650A7354), SPH_C32(0x766A0ABB),
|
||||
SPH_C32(0x81C2C92E), SPH_C32(0x92722C85),
|
||||
SPH_C32(0xA2BFE8A1), SPH_C32(0xA81A664B),
|
||||
SPH_C32(0xC24B8B70), SPH_C32(0xC76C51A3),
|
||||
SPH_C32(0xD192E819), SPH_C32(0xD6990624),
|
||||
SPH_C32(0xF40E3585), SPH_C32(0x106AA070),
|
||||
SPH_C32(0x19A4C116), SPH_C32(0x1E376C08),
|
||||
SPH_C32(0x2748774C), SPH_C32(0x34B0BCB5),
|
||||
SPH_C32(0x391C0CB3), SPH_C32(0x4ED8AA4A),
|
||||
SPH_C32(0x5B9CCA4F), SPH_C32(0x682E6FF3),
|
||||
SPH_C32(0x748F82EE), SPH_C32(0x78A5636F),
|
||||
SPH_C32(0x84C87814), SPH_C32(0x8CC70208),
|
||||
SPH_C32(0x90BEFFFA), SPH_C32(0xA4506CEB),
|
||||
SPH_C32(0xBEF9A3F7), SPH_C32(0xC67178F2)
|
||||
};
|
||||
*/
|
||||
|
||||
#if SPH_SMALL_FOOTPRINT_SHA2
|
||||
|
||||
#define SHA2_MEXP1(in, pc) do { \
|
||||
W[pc] = in(pc); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_MEXP2(in, pc) do { \
|
||||
W[(pc) & 0x0F] = SPH_T32(SSG2_1(W[((pc) - 2) & 0x0F]) \
|
||||
+ W[((pc) - 7) & 0x0F] \
|
||||
+ SSG2_0(W[((pc) - 15) & 0x0F]) + W[(pc) & 0x0F]); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEPn(n, a, b, c, d, e, f, g, h, in, pc) do { \
|
||||
sph_u32 t1, t2; \
|
||||
SHA2_MEXP ## n(in, pc); \
|
||||
t1 = SPH_T32(h + BSG2_1(e) + CH(e, f, g) \
|
||||
+ K[pcount + (pc)] + W[(pc) & 0x0F]); \
|
||||
t2 = SPH_T32(BSG2_0(a) + MAJ(a, b, c)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
d = SPH_T32(d + t1); \
|
||||
h = SPH_T32(t1 + t2); \
|
||||
} while (0)
|
||||
|
||||
#define SHA2_STEP1(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(1, a, b, c, d, e, f, g, h, in, pc)
|
||||
#define SHA2_STEP2(a, b, c, d, e, f, g, h, in, pc) \
|
||||
SHA2_STEPn(2, a, b, c, d, e, f, g, h, in, pc)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, X_xor_Y, Y_xor_Z; \
|
||||
sph_u32 W[16]; \
|
||||
unsigned pcount; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
pcount = 0; \
|
||||
Y_xor_Z = B ^ C; \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP1(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP1(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP1(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP1(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP1(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP1(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP1(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP1(B, C, D, E, F, G, H, A, in, 15); \
|
||||
for (pcount = 16; pcount < 64; pcount += 16) { \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 0); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 1); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 2); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 3); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 4); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 5); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 6); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 7); \
|
||||
SHA2_STEP2(A, B, C, D, E, F, G, H, in, 8); \
|
||||
SHA2_STEP2(H, A, B, C, D, E, F, G, in, 9); \
|
||||
SHA2_STEP2(G, H, A, B, C, D, E, F, in, 10); \
|
||||
SHA2_STEP2(F, G, H, A, B, C, D, E, in, 11); \
|
||||
SHA2_STEP2(E, F, G, H, A, B, C, D, in, 12); \
|
||||
SHA2_STEP2(D, E, F, G, H, A, B, C, in, 13); \
|
||||
SHA2_STEP2(C, D, E, F, G, H, A, B, in, 14); \
|
||||
SHA2_STEP2(B, C, D, E, F, G, H, A, in, 15); \
|
||||
} \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#else // large footprint (default)
|
||||
|
||||
#define SHA2_ROUND_BODY(in, r) do { \
|
||||
sph_u32 A, B, C, D, E, F, G, H, T1, T2, X_xor_Y, Y_xor_Z;; \
|
||||
sph_u32 W00, W01, W02, W03, W04, W05, W06, W07; \
|
||||
sph_u32 W08, W09, W10, W11, W12, W13, W14, W15; \
|
||||
\
|
||||
A = (r)[0]; \
|
||||
B = (r)[1]; \
|
||||
C = (r)[2]; \
|
||||
D = (r)[3]; \
|
||||
E = (r)[4]; \
|
||||
F = (r)[5]; \
|
||||
G = (r)[6]; \
|
||||
H = (r)[7]; \
|
||||
Y_xor_Z = B ^ C; \
|
||||
W00 = in(0); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x428A2F98) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = in(1); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x71374491) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = in(2); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB5C0FBCF) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = in(3); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xE9B5DBA5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = in(4); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x3956C25B) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = in(5); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x59F111F1) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = in(6); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x923F82A4) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = in(7); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xAB1C5ED5) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = in(8); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xD807AA98) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = in(9); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x12835B01) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = in(10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x243185BE) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = in(11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x550C7DC3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = in(12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x72BE5D74) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = in(13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x80DEB1FE) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = in(14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x9BDC06A7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = in(15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC19BF174) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xE49B69C1) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xEFBE4786) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x0FC19DC6) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x240CA1CC) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x2DE92C6F) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4A7484AA) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5CB0A9DC) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x76F988DA) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x983E5152) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA831C66D) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xB00327C8) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xBF597FC7) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xC6E00BF3) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD5A79147) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x06CA6351) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x14292967) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x27B70A85) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x2E1B2138) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x4D2C6DFC) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x53380D13) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x650A7354) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x766A0ABB) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x81C2C92E) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x92722C85) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0xA2BFE8A1) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0xA81A664B) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0xC24B8B70) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0xC76C51A3) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0xD192E819) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xD6990624) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xF40E3585) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x106AA070) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W00 = SPH_T32(SSG2_1(W14) + W09 + SSG2_0(W01) + W00); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x19A4C116) + W00); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W01 = SPH_T32(SSG2_1(W15) + W10 + SSG2_0(W02) + W01); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x1E376C08) + W01); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W02 = SPH_T32(SSG2_1(W00) + W11 + SSG2_0(W03) + W02); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x2748774C) + W02); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W03 = SPH_T32(SSG2_1(W01) + W12 + SSG2_0(W04) + W03); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x34B0BCB5) + W03); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W04 = SPH_T32(SSG2_1(W02) + W13 + SSG2_0(W05) + W04); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x391C0CB3) + W04); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W05 = SPH_T32(SSG2_1(W03) + W14 + SSG2_0(W06) + W05); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0x4ED8AA4A) + W05); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W06 = SPH_T32(SSG2_1(W04) + W15 + SSG2_0(W07) + W06); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0x5B9CCA4F) + W06); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W07 = SPH_T32(SSG2_1(W05) + W00 + SSG2_0(W08) + W07); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0x682E6FF3) + W07); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
W08 = SPH_T32(SSG2_1(W06) + W01 + SSG2_0(W09) + W08); \
|
||||
T1 = SPH_T32(H + BSG2_1(E) + CH(E, F, G) \
|
||||
+ SPH_C32(0x748F82EE) + W08); \
|
||||
T2 = SPH_T32(BSG2_0(A) + MAJ(A, B, C)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
D = SPH_T32(D + T1); \
|
||||
H = SPH_T32(T1 + T2); \
|
||||
W09 = SPH_T32(SSG2_1(W07) + W02 + SSG2_0(W10) + W09); \
|
||||
T1 = SPH_T32(G + BSG2_1(D) + CH(D, E, F) \
|
||||
+ SPH_C32(0x78A5636F) + W09); \
|
||||
T2 = SPH_T32(BSG2_0(H) + MAJ(H, A, B)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
C = SPH_T32(C + T1); \
|
||||
G = SPH_T32(T1 + T2); \
|
||||
W10 = SPH_T32(SSG2_1(W08) + W03 + SSG2_0(W11) + W10); \
|
||||
T1 = SPH_T32(F + BSG2_1(C) + CH(C, D, E) \
|
||||
+ SPH_C32(0x84C87814) + W10); \
|
||||
T2 = SPH_T32(BSG2_0(G) + MAJ(G, H, A)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
B = SPH_T32(B + T1); \
|
||||
F = SPH_T32(T1 + T2); \
|
||||
W11 = SPH_T32(SSG2_1(W09) + W04 + SSG2_0(W12) + W11); \
|
||||
T1 = SPH_T32(E + BSG2_1(B) + CH(B, C, D) \
|
||||
+ SPH_C32(0x8CC70208) + W11); \
|
||||
T2 = SPH_T32(BSG2_0(F) + MAJ(F, G, H)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
A = SPH_T32(A + T1); \
|
||||
E = SPH_T32(T1 + T2); \
|
||||
W12 = SPH_T32(SSG2_1(W10) + W05 + SSG2_0(W13) + W12); \
|
||||
T1 = SPH_T32(D + BSG2_1(A) + CH(A, B, C) \
|
||||
+ SPH_C32(0x90BEFFFA) + W12); \
|
||||
T2 = SPH_T32(BSG2_0(E) + MAJ(E, F, G)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
H = SPH_T32(H + T1); \
|
||||
D = SPH_T32(T1 + T2); \
|
||||
W13 = SPH_T32(SSG2_1(W11) + W06 + SSG2_0(W14) + W13); \
|
||||
T1 = SPH_T32(C + BSG2_1(H) + CH(H, A, B) \
|
||||
+ SPH_C32(0xA4506CEB) + W13); \
|
||||
T2 = SPH_T32(BSG2_0(D) + MAJ(D, E, F)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
G = SPH_T32(G + T1); \
|
||||
C = SPH_T32(T1 + T2); \
|
||||
W14 = SPH_T32(SSG2_1(W12) + W07 + SSG2_0(W15) + W14); \
|
||||
T1 = SPH_T32(B + BSG2_1(G) + CH(G, H, A) \
|
||||
+ SPH_C32(0xBEF9A3F7) + W14); \
|
||||
T2 = SPH_T32(BSG2_0(C) + MAJ(C, D, E)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
F = SPH_T32(F + T1); \
|
||||
B = SPH_T32(T1 + T2); \
|
||||
W15 = SPH_T32(SSG2_1(W13) + W08 + SSG2_0(W00) + W15); \
|
||||
T1 = SPH_T32(A + BSG2_1(F) + CH(F, G, H) \
|
||||
+ SPH_C32(0xC67178F2) + W15); \
|
||||
T2 = SPH_T32(BSG2_0(B) + MAJ(B, C, D)); \
|
||||
Y_xor_Z = X_xor_Y; \
|
||||
E = SPH_T32(E + T1); \
|
||||
A = SPH_T32(T1 + T2); \
|
||||
(r)[0] = SPH_T32((r)[0] + A); \
|
||||
(r)[1] = SPH_T32((r)[1] + B); \
|
||||
(r)[2] = SPH_T32((r)[2] + C); \
|
||||
(r)[3] = SPH_T32((r)[3] + D); \
|
||||
(r)[4] = SPH_T32((r)[4] + E); \
|
||||
(r)[5] = SPH_T32((r)[5] + F); \
|
||||
(r)[6] = SPH_T32((r)[6] + G); \
|
||||
(r)[7] = SPH_T32((r)[7] + H); \
|
||||
} while (0)
|
||||
|
||||
#endif // small footprint else
|
||||
|
||||
/*
|
||||
* One round of SHA-224 / SHA-256. The data must be aligned for 32-bit access.
|
||||
*/
|
||||
static void
|
||||
sha2_round(const unsigned char *data, sph_u32 r[8])
|
||||
{
|
||||
#define SHA2_IN(x) sph_dec32be_aligned(data + (4 * (x)))
|
||||
SHA2_ROUND_BODY(SHA2_IN, r);
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
memcpy( state_out, state_in, 32 );
|
||||
#define SHA2_IN(x) (data[x])
|
||||
SHA2_ROUND_BODY( SHA2_IN, state_out );
|
||||
#undef SHA2_IN
|
||||
}
|
||||
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in )
|
||||
{
|
||||
memcpy( state_out, state_in, 32 );
|
||||
#define SHA2_IN(x) sph_dec32be_aligned( data+(x) )
|
||||
SHA2_ROUND_BODY( SHA2_IN, state_out );
|
||||
#undef SHA2_IN
|
||||
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_init(void *cc)
|
||||
{
|
||||
sph_sha224_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H224, sizeof H224);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_init(void *cc)
|
||||
{
|
||||
sph_sha256_context *sc;
|
||||
|
||||
sc = cc;
|
||||
memcpy(sc->val, H256, sizeof H256);
|
||||
#if SPH_64
|
||||
sc->count = 0;
|
||||
#else
|
||||
sc->count_high = sc->count_low = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RFUN sha2_round
|
||||
#define HASH sha224
|
||||
#define BE32 1
|
||||
#include "md_helper.c"
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 7);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 7);
|
||||
// sph_sha224_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_close(void *cc, void *dst)
|
||||
{
|
||||
sha224_close(cc, dst, 8);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
void
|
||||
sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
sha224_addbits_and_close(cc, ub, n, dst, 8);
|
||||
// sph_sha256_init(cc);
|
||||
}
|
||||
|
||||
void sph_sha256_full( void *dst, const void *data, size_t len )
|
||||
{
|
||||
sph_sha256_context cc;
|
||||
sph_sha256_init( &cc );
|
||||
sph_sha256( &cc, data, len );
|
||||
sph_sha256_close( &cc, dst );
|
||||
}
|
||||
|
||||
void sha256d(void* hash, const void* data, int len)
|
||||
{
|
||||
sph_sha256_full(hash, data, len);
|
||||
sph_sha256_full(hash, hash, 32);
|
||||
}
|
||||
|
||||
/* see sph_sha2.h */
|
||||
//void
|
||||
//sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8])
|
||||
//{
|
||||
//#define SHA2_IN(x) msg[x]
|
||||
// SHA2_ROUND_BODY(SHA2_IN, val);
|
||||
//#undef SHA2_IN
|
||||
//}
|
||||
383
src/crypto/ghostrider/sph_sha2.h
Normal file
383
src/crypto/ghostrider/sph_sha2.h
Normal file
@@ -0,0 +1,383 @@
|
||||
/* $Id: sph_sha2.h 216 2010-06-08 09:46:57Z tp $ */
|
||||
/**
|
||||
* SHA-224, SHA-256, SHA-384 and SHA-512 interface.
|
||||
*
|
||||
* SHA-256 has been published in FIPS 180-2, now amended with a change
|
||||
* notice to include SHA-224 as well (which is a simple variation on
|
||||
* SHA-256). SHA-384 and SHA-512 are also defined in FIPS 180-2. FIPS
|
||||
* standards can be found at:
|
||||
* http://csrc.nist.gov/publications/fips/
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_sha2.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHA2_H__
|
||||
#define SPH_SHA2_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include <stdint.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-224.
|
||||
*/
|
||||
#define SPH_SIZE_sha224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-256.
|
||||
*/
|
||||
#define SPH_SIZE_sha256 256
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-224 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a SHA-224 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHA-224 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
sph_u32 val[8];
|
||||
#if SPH_64
|
||||
sph_u64 count;
|
||||
#else
|
||||
sph_u32 count_high, count_low;
|
||||
#endif
|
||||
#endif
|
||||
} sph_sha224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-256 computations. It is identical
|
||||
* to the SHA-224 context. However, a context is initialized for SHA-224
|
||||
* <strong>or</strong> SHA-256, but not both (the internal IV is not the
|
||||
* same).
|
||||
*/
|
||||
typedef sph_sha224_context sph_sha256_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-224 context (pointer to
|
||||
* a <code>sph_sha224_context</code>)
|
||||
*/
|
||||
void sph_sha224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-224 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the SHA-224 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 32-bit input blocks,
|
||||
* as numerical values (hence after the big-endian decoding). The
|
||||
* <code>val</code> parameter contains the 8 32-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 256-bit input and output
|
||||
*/
|
||||
void sph_sha224_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
|
||||
/**
|
||||
* Initialize a SHA-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-256 context (pointer to
|
||||
* a <code>sph_sha256_context</code>)
|
||||
*/
|
||||
void sph_sha256_init(void *cc);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Process some data bytes, for SHA-256. This function is identical to
|
||||
* <code>sha_224()</code>
|
||||
*
|
||||
* @param cc the SHA-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha256(void *cc, const void *data, size_t len);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha256 sph_sha224
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-256 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Apply the SHA-256 compression function on the provided data. This
|
||||
* function is identical to <code>sha224_comp()</code>.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 256-bit input and output
|
||||
*/
|
||||
void sph_sha256_comp(const sph_u32 msg[16], sph_u32 val[8]);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha256_comp sph_sha224_comp
|
||||
#endif
|
||||
|
||||
void sph_sha256_full( void *dst, const void *data, size_t len );
|
||||
void sha256d(void* hash, const void* data, int len);
|
||||
|
||||
// These shouldn't be called directly, use sha256-hash.h generic functions
|
||||
// sha256_transform_le & sha256_transform_be instead.
|
||||
void sph_sha256_transform_le( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
|
||||
void sph_sha256_transform_be( uint32_t *state_out, const uint32_t *data,
|
||||
const uint32_t *state_in );
|
||||
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-384.
|
||||
*/
|
||||
#define SPH_SIZE_sha384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHA-512.
|
||||
*/
|
||||
#define SPH_SIZE_sha512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-384 computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a SHA-384 computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHA-384 computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
sph_u64 val[8];
|
||||
sph_u64 count;
|
||||
#endif
|
||||
} sph_sha384_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-384 context (pointer to
|
||||
* a <code>sph_sha384_context</code>)
|
||||
*/
|
||||
void sph_sha384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-384 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Apply the SHA-384 compression function on the provided data. The
|
||||
* <code>msg</code> parameter contains the 16 64-bit input blocks,
|
||||
* as numerical values (hence after the big-endian decoding). The
|
||||
* <code>val</code> parameter contains the 8 64-bit input blocks for
|
||||
* the compression function; the output is written in place in this
|
||||
* array.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 512-bit input and output
|
||||
*/
|
||||
void sph_sha384_comp(const sph_u64 msg[16], sph_u64 val[8]);
|
||||
|
||||
/**
|
||||
* This structure is a context for SHA-512 computations. It is identical
|
||||
* to the SHA-384 context. However, a context is initialized for SHA-384
|
||||
* <strong>or</strong> SHA-512, but not both (the internal IV is not the
|
||||
* same).
|
||||
*/
|
||||
typedef sph_sha384_context sph_sha512_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHA-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHA-512 context (pointer to
|
||||
* a <code>sph_sha512_context</code>)
|
||||
*/
|
||||
void sph_sha512_init(void *cc);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Process some data bytes, for SHA-512. This function is identical to
|
||||
* <code>sph_sha384()</code>.
|
||||
*
|
||||
* @param cc the SHA-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_sha512(void *cc, const void *data, size_t len);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha512 sph_sha384
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Terminate the current SHA-512 computation and output the result into the
|
||||
* provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHA-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHA-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_sha512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef DOXYGEN_IGNORE
|
||||
/**
|
||||
* Apply the SHA-512 compression function. This function is identical to
|
||||
* <code>sph_sha384_comp()</code>.
|
||||
*
|
||||
* @param msg the message block (16 values)
|
||||
* @param val the function 512-bit input and output
|
||||
*/
|
||||
void sph_sha512_comp(const sph_u64 msg[16], sph_u64 val[8]);
|
||||
#endif
|
||||
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
#define sph_sha512_comp sph_sha384_comp
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#endif
|
||||
808
src/crypto/ghostrider/sph_shabal.c
Normal file
808
src/crypto/ghostrider/sph_shabal.c
Normal file
@@ -0,0 +1,808 @@
|
||||
/* $Id: shabal.c 175 2010-05-07 16:03:20Z tp $ */
|
||||
/*
|
||||
* Shabal implementation.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#include <stddef.h>
|
||||
#include <string.h>
|
||||
|
||||
#include "sph_shabal.h"
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#pragma warning (disable: 4146)
|
||||
#endif
|
||||
|
||||
/*
|
||||
* Part of this code was automatically generated (the part between
|
||||
* the "BEGIN" and "END" markers).
|
||||
*/
|
||||
|
||||
#define sM 16
|
||||
|
||||
#define C32 SPH_C32
|
||||
#define T32 SPH_T32
|
||||
|
||||
#define O1 13
|
||||
#define O2 9
|
||||
#define O3 6
|
||||
|
||||
/*
|
||||
* We copy the state into local variables, so that the compiler knows
|
||||
* that it can optimize them at will.
|
||||
*/
|
||||
|
||||
/* BEGIN -- automatically generated code. */
|
||||
|
||||
#define DECL_STATE \
|
||||
sph_u32 A00, A01, A02, A03, A04, A05, A06, A07, \
|
||||
A08, A09, A0A, A0B; \
|
||||
sph_u32 B0, B1, B2, B3, B4, B5, B6, B7, \
|
||||
B8, B9, BA, BB, BC, BD, BE, BF; \
|
||||
sph_u32 C0, C1, C2, C3, C4, C5, C6, C7, \
|
||||
C8, C9, CA, CB, CC, CD, CE, CF; \
|
||||
sph_u32 M0, M1, M2, M3, M4, M5, M6, M7, \
|
||||
M8, M9, MA, MB, MC, MD, ME, MF; \
|
||||
sph_u32 Wlow, Whigh;
|
||||
|
||||
#define READ_STATE(state) do { \
|
||||
A00 = (state)->A[0]; \
|
||||
A01 = (state)->A[1]; \
|
||||
A02 = (state)->A[2]; \
|
||||
A03 = (state)->A[3]; \
|
||||
A04 = (state)->A[4]; \
|
||||
A05 = (state)->A[5]; \
|
||||
A06 = (state)->A[6]; \
|
||||
A07 = (state)->A[7]; \
|
||||
A08 = (state)->A[8]; \
|
||||
A09 = (state)->A[9]; \
|
||||
A0A = (state)->A[10]; \
|
||||
A0B = (state)->A[11]; \
|
||||
B0 = (state)->B[0]; \
|
||||
B1 = (state)->B[1]; \
|
||||
B2 = (state)->B[2]; \
|
||||
B3 = (state)->B[3]; \
|
||||
B4 = (state)->B[4]; \
|
||||
B5 = (state)->B[5]; \
|
||||
B6 = (state)->B[6]; \
|
||||
B7 = (state)->B[7]; \
|
||||
B8 = (state)->B[8]; \
|
||||
B9 = (state)->B[9]; \
|
||||
BA = (state)->B[10]; \
|
||||
BB = (state)->B[11]; \
|
||||
BC = (state)->B[12]; \
|
||||
BD = (state)->B[13]; \
|
||||
BE = (state)->B[14]; \
|
||||
BF = (state)->B[15]; \
|
||||
C0 = (state)->C[0]; \
|
||||
C1 = (state)->C[1]; \
|
||||
C2 = (state)->C[2]; \
|
||||
C3 = (state)->C[3]; \
|
||||
C4 = (state)->C[4]; \
|
||||
C5 = (state)->C[5]; \
|
||||
C6 = (state)->C[6]; \
|
||||
C7 = (state)->C[7]; \
|
||||
C8 = (state)->C[8]; \
|
||||
C9 = (state)->C[9]; \
|
||||
CA = (state)->C[10]; \
|
||||
CB = (state)->C[11]; \
|
||||
CC = (state)->C[12]; \
|
||||
CD = (state)->C[13]; \
|
||||
CE = (state)->C[14]; \
|
||||
CF = (state)->C[15]; \
|
||||
Wlow = (state)->Wlow; \
|
||||
Whigh = (state)->Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define WRITE_STATE(state) do { \
|
||||
(state)->A[0] = A00; \
|
||||
(state)->A[1] = A01; \
|
||||
(state)->A[2] = A02; \
|
||||
(state)->A[3] = A03; \
|
||||
(state)->A[4] = A04; \
|
||||
(state)->A[5] = A05; \
|
||||
(state)->A[6] = A06; \
|
||||
(state)->A[7] = A07; \
|
||||
(state)->A[8] = A08; \
|
||||
(state)->A[9] = A09; \
|
||||
(state)->A[10] = A0A; \
|
||||
(state)->A[11] = A0B; \
|
||||
(state)->B[0] = B0; \
|
||||
(state)->B[1] = B1; \
|
||||
(state)->B[2] = B2; \
|
||||
(state)->B[3] = B3; \
|
||||
(state)->B[4] = B4; \
|
||||
(state)->B[5] = B5; \
|
||||
(state)->B[6] = B6; \
|
||||
(state)->B[7] = B7; \
|
||||
(state)->B[8] = B8; \
|
||||
(state)->B[9] = B9; \
|
||||
(state)->B[10] = BA; \
|
||||
(state)->B[11] = BB; \
|
||||
(state)->B[12] = BC; \
|
||||
(state)->B[13] = BD; \
|
||||
(state)->B[14] = BE; \
|
||||
(state)->B[15] = BF; \
|
||||
(state)->C[0] = C0; \
|
||||
(state)->C[1] = C1; \
|
||||
(state)->C[2] = C2; \
|
||||
(state)->C[3] = C3; \
|
||||
(state)->C[4] = C4; \
|
||||
(state)->C[5] = C5; \
|
||||
(state)->C[6] = C6; \
|
||||
(state)->C[7] = C7; \
|
||||
(state)->C[8] = C8; \
|
||||
(state)->C[9] = C9; \
|
||||
(state)->C[10] = CA; \
|
||||
(state)->C[11] = CB; \
|
||||
(state)->C[12] = CC; \
|
||||
(state)->C[13] = CD; \
|
||||
(state)->C[14] = CE; \
|
||||
(state)->C[15] = CF; \
|
||||
(state)->Wlow = Wlow; \
|
||||
(state)->Whigh = Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define DECODE_BLOCK do { \
|
||||
M0 = sph_dec32le_aligned(buf + 0); \
|
||||
M1 = sph_dec32le_aligned(buf + 4); \
|
||||
M2 = sph_dec32le_aligned(buf + 8); \
|
||||
M3 = sph_dec32le_aligned(buf + 12); \
|
||||
M4 = sph_dec32le_aligned(buf + 16); \
|
||||
M5 = sph_dec32le_aligned(buf + 20); \
|
||||
M6 = sph_dec32le_aligned(buf + 24); \
|
||||
M7 = sph_dec32le_aligned(buf + 28); \
|
||||
M8 = sph_dec32le_aligned(buf + 32); \
|
||||
M9 = sph_dec32le_aligned(buf + 36); \
|
||||
MA = sph_dec32le_aligned(buf + 40); \
|
||||
MB = sph_dec32le_aligned(buf + 44); \
|
||||
MC = sph_dec32le_aligned(buf + 48); \
|
||||
MD = sph_dec32le_aligned(buf + 52); \
|
||||
ME = sph_dec32le_aligned(buf + 56); \
|
||||
MF = sph_dec32le_aligned(buf + 60); \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BLOCK_ADD do { \
|
||||
B0 = T32(B0 + M0); \
|
||||
B1 = T32(B1 + M1); \
|
||||
B2 = T32(B2 + M2); \
|
||||
B3 = T32(B3 + M3); \
|
||||
B4 = T32(B4 + M4); \
|
||||
B5 = T32(B5 + M5); \
|
||||
B6 = T32(B6 + M6); \
|
||||
B7 = T32(B7 + M7); \
|
||||
B8 = T32(B8 + M8); \
|
||||
B9 = T32(B9 + M9); \
|
||||
BA = T32(BA + MA); \
|
||||
BB = T32(BB + MB); \
|
||||
BC = T32(BC + MC); \
|
||||
BD = T32(BD + MD); \
|
||||
BE = T32(BE + ME); \
|
||||
BF = T32(BF + MF); \
|
||||
} while (0)
|
||||
|
||||
#define INPUT_BLOCK_SUB do { \
|
||||
C0 = T32(C0 - M0); \
|
||||
C1 = T32(C1 - M1); \
|
||||
C2 = T32(C2 - M2); \
|
||||
C3 = T32(C3 - M3); \
|
||||
C4 = T32(C4 - M4); \
|
||||
C5 = T32(C5 - M5); \
|
||||
C6 = T32(C6 - M6); \
|
||||
C7 = T32(C7 - M7); \
|
||||
C8 = T32(C8 - M8); \
|
||||
C9 = T32(C9 - M9); \
|
||||
CA = T32(CA - MA); \
|
||||
CB = T32(CB - MB); \
|
||||
CC = T32(CC - MC); \
|
||||
CD = T32(CD - MD); \
|
||||
CE = T32(CE - ME); \
|
||||
CF = T32(CF - MF); \
|
||||
} while (0)
|
||||
|
||||
#define XOR_W do { \
|
||||
A00 ^= Wlow; \
|
||||
A01 ^= Whigh; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP(v1, v2) do { \
|
||||
sph_u32 tmp = (v1); \
|
||||
(v1) = (v2); \
|
||||
(v2) = tmp; \
|
||||
} while (0)
|
||||
|
||||
#define SWAP_BC do { \
|
||||
SWAP(B0, C0); \
|
||||
SWAP(B1, C1); \
|
||||
SWAP(B2, C2); \
|
||||
SWAP(B3, C3); \
|
||||
SWAP(B4, C4); \
|
||||
SWAP(B5, C5); \
|
||||
SWAP(B6, C6); \
|
||||
SWAP(B7, C7); \
|
||||
SWAP(B8, C8); \
|
||||
SWAP(B9, C9); \
|
||||
SWAP(BA, CA); \
|
||||
SWAP(BB, CB); \
|
||||
SWAP(BC, CC); \
|
||||
SWAP(BD, CD); \
|
||||
SWAP(BE, CE); \
|
||||
SWAP(BF, CF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_ELT(xa0, xa1, xb0, xb1, xb2, xb3, xc, xm) do { \
|
||||
xa0 = T32((xa0 \
|
||||
^ (((xa1 << 15) | (xa1 >> 17)) * 5U) \
|
||||
^ xc) * 3U) \
|
||||
^ xb1 ^ (xb2 & ~xb3) ^ xm; \
|
||||
xb0 = T32(~(((xb0 << 1) | (xb0 >> 31)) ^ xa0)); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_0 do { \
|
||||
PERM_ELT(A00, A0B, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A01, A00, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A02, A01, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A03, A02, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A04, A03, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A05, A04, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A06, A05, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A07, A06, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A08, A07, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A09, A08, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A0A, A09, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A0B, A0A, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A00, A0B, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A01, A00, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A02, A01, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A03, A02, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_1 do { \
|
||||
PERM_ELT(A04, A03, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A05, A04, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A06, A05, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A07, A06, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A08, A07, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A09, A08, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A0A, A09, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A0B, A0A, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A00, A0B, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A01, A00, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A02, A01, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A03, A02, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A04, A03, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A05, A04, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A06, A05, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A07, A06, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define PERM_STEP_2 do { \
|
||||
PERM_ELT(A08, A07, B0, BD, B9, B6, C8, M0); \
|
||||
PERM_ELT(A09, A08, B1, BE, BA, B7, C7, M1); \
|
||||
PERM_ELT(A0A, A09, B2, BF, BB, B8, C6, M2); \
|
||||
PERM_ELT(A0B, A0A, B3, B0, BC, B9, C5, M3); \
|
||||
PERM_ELT(A00, A0B, B4, B1, BD, BA, C4, M4); \
|
||||
PERM_ELT(A01, A00, B5, B2, BE, BB, C3, M5); \
|
||||
PERM_ELT(A02, A01, B6, B3, BF, BC, C2, M6); \
|
||||
PERM_ELT(A03, A02, B7, B4, B0, BD, C1, M7); \
|
||||
PERM_ELT(A04, A03, B8, B5, B1, BE, C0, M8); \
|
||||
PERM_ELT(A05, A04, B9, B6, B2, BF, CF, M9); \
|
||||
PERM_ELT(A06, A05, BA, B7, B3, B0, CE, MA); \
|
||||
PERM_ELT(A07, A06, BB, B8, B4, B1, CD, MB); \
|
||||
PERM_ELT(A08, A07, BC, B9, B5, B2, CC, MC); \
|
||||
PERM_ELT(A09, A08, BD, BA, B6, B3, CB, MD); \
|
||||
PERM_ELT(A0A, A09, BE, BB, B7, B4, CA, ME); \
|
||||
PERM_ELT(A0B, A0A, BF, BC, B8, B5, C9, MF); \
|
||||
} while (0)
|
||||
|
||||
#define APPLY_P do { \
|
||||
B0 = T32(B0 << 17) | (B0 >> 15); \
|
||||
B1 = T32(B1 << 17) | (B1 >> 15); \
|
||||
B2 = T32(B2 << 17) | (B2 >> 15); \
|
||||
B3 = T32(B3 << 17) | (B3 >> 15); \
|
||||
B4 = T32(B4 << 17) | (B4 >> 15); \
|
||||
B5 = T32(B5 << 17) | (B5 >> 15); \
|
||||
B6 = T32(B6 << 17) | (B6 >> 15); \
|
||||
B7 = T32(B7 << 17) | (B7 >> 15); \
|
||||
B8 = T32(B8 << 17) | (B8 >> 15); \
|
||||
B9 = T32(B9 << 17) | (B9 >> 15); \
|
||||
BA = T32(BA << 17) | (BA >> 15); \
|
||||
BB = T32(BB << 17) | (BB >> 15); \
|
||||
BC = T32(BC << 17) | (BC >> 15); \
|
||||
BD = T32(BD << 17) | (BD >> 15); \
|
||||
BE = T32(BE << 17) | (BE >> 15); \
|
||||
BF = T32(BF << 17) | (BF >> 15); \
|
||||
PERM_STEP_0; \
|
||||
PERM_STEP_1; \
|
||||
PERM_STEP_2; \
|
||||
A0B = T32(A0B + C6); \
|
||||
A0A = T32(A0A + C5); \
|
||||
A09 = T32(A09 + C4); \
|
||||
A08 = T32(A08 + C3); \
|
||||
A07 = T32(A07 + C2); \
|
||||
A06 = T32(A06 + C1); \
|
||||
A05 = T32(A05 + C0); \
|
||||
A04 = T32(A04 + CF); \
|
||||
A03 = T32(A03 + CE); \
|
||||
A02 = T32(A02 + CD); \
|
||||
A01 = T32(A01 + CC); \
|
||||
A00 = T32(A00 + CB); \
|
||||
A0B = T32(A0B + CA); \
|
||||
A0A = T32(A0A + C9); \
|
||||
A09 = T32(A09 + C8); \
|
||||
A08 = T32(A08 + C7); \
|
||||
A07 = T32(A07 + C6); \
|
||||
A06 = T32(A06 + C5); \
|
||||
A05 = T32(A05 + C4); \
|
||||
A04 = T32(A04 + C3); \
|
||||
A03 = T32(A03 + C2); \
|
||||
A02 = T32(A02 + C1); \
|
||||
A01 = T32(A01 + C0); \
|
||||
A00 = T32(A00 + CF); \
|
||||
A0B = T32(A0B + CE); \
|
||||
A0A = T32(A0A + CD); \
|
||||
A09 = T32(A09 + CC); \
|
||||
A08 = T32(A08 + CB); \
|
||||
A07 = T32(A07 + CA); \
|
||||
A06 = T32(A06 + C9); \
|
||||
A05 = T32(A05 + C8); \
|
||||
A04 = T32(A04 + C7); \
|
||||
A03 = T32(A03 + C6); \
|
||||
A02 = T32(A02 + C5); \
|
||||
A01 = T32(A01 + C4); \
|
||||
A00 = T32(A00 + C3); \
|
||||
} while (0)
|
||||
|
||||
#define INCR_W do { \
|
||||
if ((Wlow = T32(Wlow + 1)) == 0) \
|
||||
Whigh = T32(Whigh + 1); \
|
||||
} while (0)
|
||||
|
||||
static const sph_u32 A_init_192[] = {
|
||||
C32(0xFD749ED4), C32(0xB798E530), C32(0x33904B6F), C32(0x46BDA85E),
|
||||
C32(0x076934B4), C32(0x454B4058), C32(0x77F74527), C32(0xFB4CF465),
|
||||
C32(0x62931DA9), C32(0xE778C8DB), C32(0x22B3998E), C32(0xAC15CFB9)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_192[] = {
|
||||
C32(0x58BCBAC4), C32(0xEC47A08E), C32(0xAEE933B2), C32(0xDFCBC824),
|
||||
C32(0xA7944804), C32(0xBF65BDB0), C32(0x5A9D4502), C32(0x59979AF7),
|
||||
C32(0xC5CEA54E), C32(0x4B6B8150), C32(0x16E71909), C32(0x7D632319),
|
||||
C32(0x930573A0), C32(0xF34C63D1), C32(0xCAF914B4), C32(0xFDD6612C)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_192[] = {
|
||||
C32(0x61550878), C32(0x89EF2B75), C32(0xA1660C46), C32(0x7EF3855B),
|
||||
C32(0x7297B58C), C32(0x1BC67793), C32(0x7FB1C723), C32(0xB66FC640),
|
||||
C32(0x1A48B71C), C32(0xF0976D17), C32(0x088CE80A), C32(0xA454EDF3),
|
||||
C32(0x1C096BF4), C32(0xAC76224B), C32(0x5215781C), C32(0xCD5D2669)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_224[] = {
|
||||
C32(0xA5201467), C32(0xA9B8D94A), C32(0xD4CED997), C32(0x68379D7B),
|
||||
C32(0xA7FC73BA), C32(0xF1A2546B), C32(0x606782BF), C32(0xE0BCFD0F),
|
||||
C32(0x2F25374E), C32(0x069A149F), C32(0x5E2DFF25), C32(0xFAECF061)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_224[] = {
|
||||
C32(0xEC9905D8), C32(0xF21850CF), C32(0xC0A746C8), C32(0x21DAD498),
|
||||
C32(0x35156EEB), C32(0x088C97F2), C32(0x26303E40), C32(0x8A2D4FB5),
|
||||
C32(0xFEEE44B6), C32(0x8A1E9573), C32(0x7B81111A), C32(0xCBC139F0),
|
||||
C32(0xA3513861), C32(0x1D2C362E), C32(0x918C580E), C32(0xB58E1B9C)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_224[] = {
|
||||
C32(0xE4B573A1), C32(0x4C1A0880), C32(0x1E907C51), C32(0x04807EFD),
|
||||
C32(0x3AD8CDE5), C32(0x16B21302), C32(0x02512C53), C32(0x2204CB18),
|
||||
C32(0x99405F2D), C32(0xE5B648A1), C32(0x70AB1D43), C32(0xA10C25C2),
|
||||
C32(0x16F1AC05), C32(0x38BBEB56), C32(0x9B01DC60), C32(0xB1096D83)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_256[] = {
|
||||
C32(0x52F84552), C32(0xE54B7999), C32(0x2D8EE3EC), C32(0xB9645191),
|
||||
C32(0xE0078B86), C32(0xBB7C44C9), C32(0xD2B5C1CA), C32(0xB0D2EB8C),
|
||||
C32(0x14CE5A45), C32(0x22AF50DC), C32(0xEFFDBC6B), C32(0xEB21B74A)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_256[] = {
|
||||
C32(0xB555C6EE), C32(0x3E710596), C32(0xA72A652F), C32(0x9301515F),
|
||||
C32(0xDA28C1FA), C32(0x696FD868), C32(0x9CB6BF72), C32(0x0AFE4002),
|
||||
C32(0xA6E03615), C32(0x5138C1D4), C32(0xBE216306), C32(0xB38B8890),
|
||||
C32(0x3EA8B96B), C32(0x3299ACE4), C32(0x30924DD4), C32(0x55CB34A5)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_256[] = {
|
||||
C32(0xB405F031), C32(0xC4233EBA), C32(0xB3733979), C32(0xC0DD9D55),
|
||||
C32(0xC51C28AE), C32(0xA327B8E1), C32(0x56C56167), C32(0xED614433),
|
||||
C32(0x88B59D60), C32(0x60E2CEBA), C32(0x758B4B8B), C32(0x83E82A7F),
|
||||
C32(0xBC968828), C32(0xE6E00BF7), C32(0xBA839E55), C32(0x9B491C60)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_384[] = {
|
||||
C32(0xC8FCA331), C32(0xE55C504E), C32(0x003EBF26), C32(0xBB6B8D83),
|
||||
C32(0x7B0448C1), C32(0x41B82789), C32(0x0A7C9601), C32(0x8D659CFF),
|
||||
C32(0xB6E2673E), C32(0xCA54C77B), C32(0x1460FD7E), C32(0x3FCB8F2D)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_384[] = {
|
||||
C32(0x527291FC), C32(0x2A16455F), C32(0x78E627E5), C32(0x944F169F),
|
||||
C32(0x1CA6F016), C32(0xA854EA25), C32(0x8DB98ABE), C32(0xF2C62641),
|
||||
C32(0x30117DCB), C32(0xCF5C4309), C32(0x93711A25), C32(0xF9F671B8),
|
||||
C32(0xB01D2116), C32(0x333F4B89), C32(0xB285D165), C32(0x86829B36)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_384[] = {
|
||||
C32(0xF764B11A), C32(0x76172146), C32(0xCEF6934D), C32(0xC6D28399),
|
||||
C32(0xFE095F61), C32(0x5E6018B4), C32(0x5048ECF5), C32(0x51353261),
|
||||
C32(0x6E6E36DC), C32(0x63130DAD), C32(0xA9C69BD6), C32(0x1E90EA0C),
|
||||
C32(0x7C35073B), C32(0x28D95E6D), C32(0xAA340E0D), C32(0xCB3DEE70)
|
||||
};
|
||||
|
||||
static const sph_u32 A_init_512[] = {
|
||||
C32(0x20728DFD), C32(0x46C0BD53), C32(0xE782B699), C32(0x55304632),
|
||||
C32(0x71B4EF90), C32(0x0EA9E82C), C32(0xDBB930F1), C32(0xFAD06B8B),
|
||||
C32(0xBE0CAE40), C32(0x8BD14410), C32(0x76D2ADAC), C32(0x28ACAB7F)
|
||||
};
|
||||
|
||||
static const sph_u32 B_init_512[] = {
|
||||
C32(0xC1099CB7), C32(0x07B385F3), C32(0xE7442C26), C32(0xCC8AD640),
|
||||
C32(0xEB6F56C7), C32(0x1EA81AA9), C32(0x73B9D314), C32(0x1DE85D08),
|
||||
C32(0x48910A5A), C32(0x893B22DB), C32(0xC5A0DF44), C32(0xBBC4324E),
|
||||
C32(0x72D2F240), C32(0x75941D99), C32(0x6D8BDE82), C32(0xA1A7502B)
|
||||
};
|
||||
|
||||
static const sph_u32 C_init_512[] = {
|
||||
C32(0xD9BF68D1), C32(0x58BAD750), C32(0x56028CB2), C32(0x8134F359),
|
||||
C32(0xB5D469D8), C32(0x941A8CC2), C32(0x418B2A6E), C32(0x04052780),
|
||||
C32(0x7F07D787), C32(0x5194358F), C32(0x3C60D665), C32(0xBE97D79A),
|
||||
C32(0x950C3434), C32(0xAED9A06D), C32(0x2537DC8D), C32(0x7CDB5969)
|
||||
};
|
||||
|
||||
/* END -- automatically generated code. */
|
||||
|
||||
static void
|
||||
shabal_init(void *cc, unsigned size)
|
||||
{
|
||||
/*
|
||||
* We have precomputed initial states for all the supported
|
||||
* output bit lengths.
|
||||
*/
|
||||
const sph_u32 *A_init, *B_init, *C_init;
|
||||
sph_shabal_context *sc;
|
||||
|
||||
switch (size) {
|
||||
case 192:
|
||||
A_init = A_init_192;
|
||||
B_init = B_init_192;
|
||||
C_init = C_init_192;
|
||||
break;
|
||||
case 224:
|
||||
A_init = A_init_224;
|
||||
B_init = B_init_224;
|
||||
C_init = C_init_224;
|
||||
break;
|
||||
case 256:
|
||||
A_init = A_init_256;
|
||||
B_init = B_init_256;
|
||||
C_init = C_init_256;
|
||||
break;
|
||||
case 384:
|
||||
A_init = A_init_384;
|
||||
B_init = B_init_384;
|
||||
C_init = C_init_384;
|
||||
break;
|
||||
case 512:
|
||||
A_init = A_init_512;
|
||||
B_init = B_init_512;
|
||||
C_init = C_init_512;
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
sc = cc;
|
||||
memcpy(sc->A, A_init, sizeof sc->A);
|
||||
memcpy(sc->B, B_init, sizeof sc->B);
|
||||
memcpy(sc->C, C_init, sizeof sc->C);
|
||||
sc->Wlow = 1;
|
||||
sc->Whigh = 0;
|
||||
sc->ptr = 0;
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_core(void *cc, const unsigned char *data, size_t len)
|
||||
{
|
||||
sph_shabal_context *sc;
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
DECL_STATE
|
||||
|
||||
sc = cc;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
|
||||
/*
|
||||
* We do not want to copy the state to local variables if the
|
||||
* amount of data is less than what is needed to complete the
|
||||
* current block. Note that it is anyway suboptimal to call
|
||||
* this method many times for small chunks of data.
|
||||
*/
|
||||
if (len < (sizeof sc->buf) - ptr) {
|
||||
memcpy(buf + ptr, data, len);
|
||||
ptr += len;
|
||||
sc->ptr = ptr;
|
||||
return;
|
||||
}
|
||||
|
||||
READ_STATE(sc);
|
||||
while (len > 0) {
|
||||
size_t clen;
|
||||
|
||||
clen = (sizeof sc->buf) - ptr;
|
||||
if (clen > len)
|
||||
clen = len;
|
||||
memcpy(buf + ptr, data, clen);
|
||||
ptr += clen;
|
||||
data += clen;
|
||||
len -= clen;
|
||||
if (ptr == sizeof sc->buf) {
|
||||
DECODE_BLOCK;
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
INPUT_BLOCK_SUB;
|
||||
SWAP_BC;
|
||||
INCR_W;
|
||||
ptr = 0;
|
||||
}
|
||||
}
|
||||
WRITE_STATE(sc);
|
||||
sc->ptr = ptr;
|
||||
}
|
||||
|
||||
static void
|
||||
shabal_close(void *cc, unsigned ub, unsigned n, void *dst, unsigned size_words)
|
||||
{
|
||||
sph_shabal_context *sc;
|
||||
unsigned char *buf;
|
||||
size_t ptr;
|
||||
int i;
|
||||
unsigned z;
|
||||
union {
|
||||
unsigned char tmp_out[64];
|
||||
sph_u32 dummy;
|
||||
} u;
|
||||
size_t out_len;
|
||||
DECL_STATE
|
||||
|
||||
sc = cc;
|
||||
buf = sc->buf;
|
||||
ptr = sc->ptr;
|
||||
z = 0x80 >> n;
|
||||
buf[ptr] = ((ub & -z) | z) & 0xFF;
|
||||
memset(buf + ptr + 1, 0, (sizeof sc->buf) - (ptr + 1));
|
||||
READ_STATE(sc);
|
||||
DECODE_BLOCK;
|
||||
INPUT_BLOCK_ADD;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
for (i = 0; i < 3; i ++) {
|
||||
SWAP_BC;
|
||||
XOR_W;
|
||||
APPLY_P;
|
||||
}
|
||||
|
||||
/*
|
||||
* We just use our local variables; no need to go through
|
||||
* the state structure. In order to share some code, we
|
||||
* emit the relevant words into a temporary buffer, which
|
||||
* we finally copy into the destination array.
|
||||
*/
|
||||
switch (size_words) {
|
||||
case 16:
|
||||
sph_enc32le_aligned(u.tmp_out + 0, B0);
|
||||
sph_enc32le_aligned(u.tmp_out + 4, B1);
|
||||
sph_enc32le_aligned(u.tmp_out + 8, B2);
|
||||
sph_enc32le_aligned(u.tmp_out + 12, B3);
|
||||
/* fall through */
|
||||
case 12:
|
||||
sph_enc32le_aligned(u.tmp_out + 16, B4);
|
||||
sph_enc32le_aligned(u.tmp_out + 20, B5);
|
||||
sph_enc32le_aligned(u.tmp_out + 24, B6);
|
||||
sph_enc32le_aligned(u.tmp_out + 28, B7);
|
||||
/* fall through */
|
||||
case 8:
|
||||
sph_enc32le_aligned(u.tmp_out + 32, B8);
|
||||
/* fall through */
|
||||
case 7:
|
||||
sph_enc32le_aligned(u.tmp_out + 36, B9);
|
||||
/* fall through */
|
||||
case 6:
|
||||
sph_enc32le_aligned(u.tmp_out + 40, BA);
|
||||
sph_enc32le_aligned(u.tmp_out + 44, BB);
|
||||
sph_enc32le_aligned(u.tmp_out + 48, BC);
|
||||
sph_enc32le_aligned(u.tmp_out + 52, BD);
|
||||
sph_enc32le_aligned(u.tmp_out + 56, BE);
|
||||
sph_enc32le_aligned(u.tmp_out + 60, BF);
|
||||
break;
|
||||
default:
|
||||
return;
|
||||
}
|
||||
out_len = size_words << 2;
|
||||
memcpy(dst, u.tmp_out + (sizeof u.tmp_out) - out_len, out_len);
|
||||
// shabal_init(sc, size_words << 5);
|
||||
}
|
||||
#if 0
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 192);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 6);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 6);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 224);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 7);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 7);
|
||||
}
|
||||
|
||||
#endif
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 256);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 8);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 8);
|
||||
}
|
||||
|
||||
#if 0
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 384);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 12);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 12);
|
||||
}
|
||||
#endif
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_init(void *cc)
|
||||
{
|
||||
shabal_init(cc, 512);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512(void *cc, const void *data, size_t len)
|
||||
{
|
||||
shabal_core(cc, data, len);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_close(void *cc, void *dst)
|
||||
{
|
||||
shabal_close(cc, 0, 0, dst, 16);
|
||||
}
|
||||
|
||||
/* see sph_shabal.h */
|
||||
void
|
||||
sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n, void *dst)
|
||||
{
|
||||
shabal_close(cc, ub, n, dst, 16);
|
||||
}
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
344
src/crypto/ghostrider/sph_shabal.h
Normal file
344
src/crypto/ghostrider/sph_shabal.h
Normal file
@@ -0,0 +1,344 @@
|
||||
/* $Id: sph_shabal.h 175 2010-05-07 16:03:20Z tp $ */
|
||||
/**
|
||||
* Shabal interface. Shabal is a family of functions which differ by
|
||||
* their output size; this implementation defines Shabal for output
|
||||
* sizes 192, 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_shabal.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHABAL_H__
|
||||
#define SPH_SHABAL_H__
|
||||
|
||||
#include "sph_types.h"
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-192.
|
||||
*/
|
||||
#define SPH_SIZE_shabal192 192
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-224.
|
||||
*/
|
||||
#define SPH_SIZE_shabal224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-256.
|
||||
*/
|
||||
#define SPH_SIZE_shabal256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-384.
|
||||
*/
|
||||
#define SPH_SIZE_shabal384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Shabal-512.
|
||||
*/
|
||||
#define SPH_SIZE_shabal512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Shabal computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* a Shabal computation has been performed, the context can be reused for
|
||||
* another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Shabal computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 A[12], B[16], C[16];
|
||||
sph_u32 Whigh, Wlow;
|
||||
#endif
|
||||
} sph_shabal_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-192 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal192_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-224 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal224_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-256 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal256_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-384 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal384_context;
|
||||
|
||||
/**
|
||||
* Type for a Shabal-512 context (identical to the common context).
|
||||
*/
|
||||
typedef sph_shabal_context sph_shabal512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-192 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-192 context (pointer to a
|
||||
* <code>sph_shabal192_context</code>)
|
||||
*/
|
||||
void sph_shabal192_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal192(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-192 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (24 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal192_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (24 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-192 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal192_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-224 context (pointer to a
|
||||
* <code>sph_shabal224_context</code>)
|
||||
*/
|
||||
void sph_shabal224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal224_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-256 context (pointer to a
|
||||
* <code>sph_shabal256_context</code>)
|
||||
*/
|
||||
void sph_shabal256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal256_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-384 context (pointer to a
|
||||
* <code>sph_shabal384_context</code>)
|
||||
*/
|
||||
void sph_shabal384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal384_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Shabal-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Shabal-512 context (pointer to a
|
||||
* <code>sph_shabal512_context</code>)
|
||||
*/
|
||||
void sph_shabal512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shabal512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Shabal-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Shabal-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shabal512_addbits_and_close(void *cc, unsigned ub, unsigned n,
|
||||
void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1764
src/crypto/ghostrider/sph_shavite.c
Normal file
1764
src/crypto/ghostrider/sph_shavite.c
Normal file
File diff suppressed because it is too large
Load Diff
314
src/crypto/ghostrider/sph_shavite.h
Normal file
314
src/crypto/ghostrider/sph_shavite.h
Normal file
@@ -0,0 +1,314 @@
|
||||
/* $Id: sph_shavite.h 208 2010-06-02 20:33:00Z tp $ */
|
||||
/**
|
||||
* SHAvite-3 interface. This code implements SHAvite-3 with the
|
||||
* recommended parameters for SHA-3, with outputs of 224, 256, 384 and
|
||||
* 512 bits. In the following, we call the function "SHAvite" (without
|
||||
* the "-3" suffix), thus "SHAvite-224" is "SHAvite-3 with a 224-bit
|
||||
* output".
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_shavite.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SHAVITE_H__
|
||||
#define SPH_SHAVITE_H__
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-224.
|
||||
*/
|
||||
#define SPH_SIZE_shavite224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-256.
|
||||
*/
|
||||
#define SPH_SIZE_shavite256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-384.
|
||||
*/
|
||||
#define SPH_SIZE_shavite384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SHAvite-512.
|
||||
*/
|
||||
#define SPH_SIZE_shavite512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 and SHAvite-256 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a SHAvite computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHAvite
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 h[8];
|
||||
sph_u32 count0, count1;
|
||||
#endif
|
||||
} sph_shavite_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-224 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_small_context sph_shavite224_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-256 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_small_context sph_shavite256_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 and SHAvite-512 computations:
|
||||
* it contains the intermediate values and some data from the last
|
||||
* entered block. Once a SHAvite computation has been performed, the
|
||||
* context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running SHAvite
|
||||
* computation can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 h[16];
|
||||
sph_u32 count0, count1, count2, count3;
|
||||
#endif
|
||||
} sph_shavite_big_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-384 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_big_context sph_shavite384_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SHAvite-512 computations. It is
|
||||
* identical to the common <code>sph_shavite_small_context</code>.
|
||||
*/
|
||||
typedef sph_shavite_big_context sph_shavite512_context;
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-224 context (pointer to a
|
||||
* <code>sph_shavite224_context</code>)
|
||||
*/
|
||||
void sph_shavite224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-256 context (pointer to a
|
||||
* <code>sph_shavite256_context</code>)
|
||||
*/
|
||||
void sph_shavite256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-384 context (pointer to a
|
||||
* <code>sph_shavite384_context</code>)
|
||||
*/
|
||||
void sph_shavite384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a SHAvite-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SHAvite-512 context (pointer to a
|
||||
* <code>sph_shavite512_context</code>)
|
||||
*/
|
||||
void sph_shavite512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_shavite512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SHAvite-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SHAvite-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_shavite512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1799
src/crypto/ghostrider/sph_simd.c
Normal file
1799
src/crypto/ghostrider/sph_simd.c
Normal file
File diff suppressed because it is too large
Load Diff
309
src/crypto/ghostrider/sph_simd.h
Normal file
309
src/crypto/ghostrider/sph_simd.h
Normal file
@@ -0,0 +1,309 @@
|
||||
/* $Id: sph_simd.h 154 2010-04-26 17:00:24Z tp $ */
|
||||
/**
|
||||
* SIMD interface. SIMD is a family of functions which differ by
|
||||
* their output size; this implementation defines SIMD for output
|
||||
* sizes 224, 256, 384 and 512 bits.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_simd.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SIMD_H__
|
||||
#define SPH_SIMD_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-224.
|
||||
*/
|
||||
#define SPH_SIZE_simd224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-256.
|
||||
*/
|
||||
#define SPH_SIZE_simd256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-384.
|
||||
*/
|
||||
#define SPH_SIZE_simd384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for SIMD-512.
|
||||
*/
|
||||
#define SPH_SIZE_simd512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an SIMD computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for SIMD-224
|
||||
* and SIMD-256.
|
||||
*
|
||||
* The contents of this structure are private. A running SIMD computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[16];
|
||||
sph_u32 count_low, count_high;
|
||||
#endif
|
||||
} sph_simd_small_context;
|
||||
|
||||
/**
|
||||
* This structure is a context for SIMD computations: it contains the
|
||||
* intermediate values and some data from the last entered block. Once
|
||||
* an SIMD computation has been performed, the context can be reused for
|
||||
* another computation. This specific structure is used for SIMD-384
|
||||
* and SIMD-512.
|
||||
*
|
||||
* The contents of this structure are private. A running SIMD computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[128]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u32 state[32];
|
||||
sph_u32 count_low, count_high;
|
||||
#endif
|
||||
} sph_simd_big_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-224 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_simd_small_context sph_simd224_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-256 context (identical to the common "small" context).
|
||||
*/
|
||||
typedef sph_simd_small_context sph_simd256_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_simd_big_context sph_simd384_context;
|
||||
|
||||
/**
|
||||
* Type for a SIMD-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_simd_big_context sph_simd512_context;
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-224 context (pointer to a
|
||||
* <code>sph_simd224_context</code>)
|
||||
*/
|
||||
void sph_simd224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-256 context (pointer to a
|
||||
* <code>sph_simd256_context</code>)
|
||||
*/
|
||||
void sph_simd256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-384 context (pointer to a
|
||||
* <code>sph_simd384_context</code>)
|
||||
*/
|
||||
void sph_simd384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize an SIMD-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the SIMD-512 context (pointer to a
|
||||
* <code>sph_simd512_context</code>)
|
||||
*/
|
||||
void sph_simd512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_simd512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current SIMD-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the SIMD-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_simd512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1254
src/crypto/ghostrider/sph_skein.c
Normal file
1254
src/crypto/ghostrider/sph_skein.c
Normal file
File diff suppressed because it is too large
Load Diff
298
src/crypto/ghostrider/sph_skein.h
Normal file
298
src/crypto/ghostrider/sph_skein.h
Normal file
@@ -0,0 +1,298 @@
|
||||
/* $Id: sph_skein.h 253 2011-06-07 18:33:10Z tp $ */
|
||||
/**
|
||||
* Skein interface. The Skein specification defines three main
|
||||
* functions, called Skein-256, Skein-512 and Skein-1024, which can be
|
||||
* further parameterized with an output length. For the SHA-3
|
||||
* competition, Skein-512 is used for output sizes of 224, 256, 384 and
|
||||
* 512 bits; this is what this code implements. Thus, we hereafter call
|
||||
* Skein-224, Skein-256, Skein-384 and Skein-512 what the Skein
|
||||
* specification defines as Skein-512-224, Skein-512-256, Skein-512-384
|
||||
* and Skein-512-512, respectively.
|
||||
*
|
||||
* ==========================(LICENSE BEGIN)============================
|
||||
*
|
||||
* Copyright (c) 2007-2010 Projet RNRT SAPHIR
|
||||
*
|
||||
* Permission is hereby granted, free of charge, to any person obtaining
|
||||
* a copy of this software and associated documentation files (the
|
||||
* "Software"), to deal in the Software without restriction, including
|
||||
* without limitation the rights to use, copy, modify, merge, publish,
|
||||
* distribute, sublicense, and/or sell copies of the Software, and to
|
||||
* permit persons to whom the Software is furnished to do so, subject to
|
||||
* the following conditions:
|
||||
*
|
||||
* The above copyright notice and this permission notice shall be
|
||||
* included in all copies or substantial portions of the Software.
|
||||
*
|
||||
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
||||
* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
||||
* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
||||
* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
||||
* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
||||
* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
||||
* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
||||
*
|
||||
* ===========================(LICENSE END)=============================
|
||||
*
|
||||
* @file sph_skein.h
|
||||
* @author Thomas Pornin <thomas.pornin@cryptolog.com>
|
||||
*/
|
||||
|
||||
#ifndef SPH_SKEIN_H__
|
||||
#define SPH_SKEIN_H__
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C"{
|
||||
#endif
|
||||
|
||||
#include <stddef.h>
|
||||
#include "sph_types.h"
|
||||
|
||||
#if SPH_64
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-224.
|
||||
*/
|
||||
#define SPH_SIZE_skein224 224
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-256.
|
||||
*/
|
||||
#define SPH_SIZE_skein256 256
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-384.
|
||||
*/
|
||||
#define SPH_SIZE_skein384 384
|
||||
|
||||
/**
|
||||
* Output size (in bits) for Skein-512.
|
||||
*/
|
||||
#define SPH_SIZE_skein512 512
|
||||
|
||||
/**
|
||||
* This structure is a context for Skein computations (with a 384- or
|
||||
* 512-bit output): it contains the intermediate values and some data
|
||||
* from the last entered block. Once a Skein computation has been
|
||||
* performed, the context can be reused for another computation.
|
||||
*
|
||||
* The contents of this structure are private. A running Skein computation
|
||||
* can be cloned by copying the context (e.g. with a simple
|
||||
* <code>memcpy()</code>).
|
||||
*/
|
||||
typedef struct {
|
||||
#ifndef DOXYGEN_IGNORE
|
||||
unsigned char buf[64]; /* first field, for alignment */
|
||||
size_t ptr;
|
||||
sph_u64 h0, h1, h2, h3, h4, h5, h6, h7;
|
||||
sph_u64 bcount;
|
||||
#endif
|
||||
} sph_skein_big_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-224 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein224_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-256 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein256_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-384 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein384_context;
|
||||
|
||||
/**
|
||||
* Type for a Skein-512 context (identical to the common "big" context).
|
||||
*/
|
||||
typedef sph_skein_big_context sph_skein512_context;
|
||||
|
||||
/**
|
||||
* Initialize a Skein-224 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-224 context (pointer to a
|
||||
* <code>sph_skein224_context</code>)
|
||||
*/
|
||||
void sph_skein224_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein224(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-224 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (28 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein224_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (28 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-224 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein224_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-256 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-256 context (pointer to a
|
||||
* <code>sph_skein256_context</code>)
|
||||
*/
|
||||
void sph_skein256_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein256(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-256 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (32 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein256_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (32 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-256 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein256_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-384 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-384 context (pointer to a
|
||||
* <code>sph_skein384_context</code>)
|
||||
*/
|
||||
void sph_skein384_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein384(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-384 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (48 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein384_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (48 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-384 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein384_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
/**
|
||||
* Initialize a Skein-512 context. This process performs no memory allocation.
|
||||
*
|
||||
* @param cc the Skein-512 context (pointer to a
|
||||
* <code>sph_skein512_context</code>)
|
||||
*/
|
||||
void sph_skein512_init(void *cc);
|
||||
|
||||
/**
|
||||
* Process some data bytes. It is acceptable that <code>len</code> is zero
|
||||
* (in which case this function does nothing).
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param data the input data
|
||||
* @param len the input data length (in bytes)
|
||||
*/
|
||||
void sph_skein512(void *cc, const void *data, size_t len);
|
||||
|
||||
/**
|
||||
* Terminate the current Skein-512 computation and output the result into
|
||||
* the provided buffer. The destination buffer must be wide enough to
|
||||
* accomodate the result (64 bytes). The context is automatically
|
||||
* reinitialized.
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein512_close(void *cc, void *dst);
|
||||
|
||||
/**
|
||||
* Add a few additional bits (0 to 7) to the current computation, then
|
||||
* terminate it and output the result in the provided buffer, which must
|
||||
* be wide enough to accomodate the result (64 bytes). If bit number i
|
||||
* in <code>ub</code> has value 2^i, then the extra bits are those
|
||||
* numbered 7 downto 8-n (this is the big-endian convention at the byte
|
||||
* level). The context is automatically reinitialized.
|
||||
*
|
||||
* @param cc the Skein-512 context
|
||||
* @param ub the extra bits
|
||||
* @param n the number of extra bits (0 to 7)
|
||||
* @param dst the destination buffer
|
||||
*/
|
||||
void sph_skein512_addbits_and_close(
|
||||
void *cc, unsigned ub, unsigned n, void *dst);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
||||
1976
src/crypto/ghostrider/sph_types.h
Normal file
1976
src/crypto/ghostrider/sph_types.h
Normal file
File diff suppressed because it is too large
Load Diff
3481
src/crypto/ghostrider/sph_whirlpool.c
Normal file
3481
src/crypto/ghostrider/sph_whirlpool.c
Normal file
File diff suppressed because it is too large
Load Diff
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user