mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-10 17:12:46 -05:00
Compare commits
35 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
718c7e0fc1 | ||
|
|
ef7951b91d | ||
|
|
214b1f021b | ||
|
|
81b18c0741 | ||
|
|
8e83f72456 | ||
|
|
c2ae625032 | ||
|
|
60566dc84c | ||
|
|
4ea8fe694d | ||
|
|
669d1ab008 | ||
|
|
e87d5111a2 | ||
|
|
56158779de | ||
|
|
efb322df66 | ||
|
|
e673d541c1 | ||
|
|
a98db529fb | ||
|
|
1a9eaaad8f | ||
|
|
be5fbca9b6 | ||
|
|
2feb264375 | ||
|
|
00990f2649 | ||
|
|
d78713be48 | ||
|
|
77367abe13 | ||
|
|
cd046f6fd0 | ||
|
|
63b7ec2887 | ||
|
|
a1e8f1c3e5 | ||
|
|
6db480a1ab | ||
|
|
a7acd9de6d | ||
|
|
a64f4d1870 | ||
|
|
9bfe59b630 | ||
|
|
1a4bf16521 | ||
|
|
a4d5d0a75a | ||
|
|
c40f1f9f66 | ||
|
|
15e5052dd0 | ||
|
|
f9f7963453 | ||
|
|
02240eff8c | ||
|
|
d64c963e5e | ||
|
|
c6292ce9ee |
22
CHANGELOG.md
22
CHANGELOG.md
@@ -1,3 +1,25 @@
|
||||
# v6.16.2
|
||||
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
|
||||
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
|
||||
- Fixed several performance bottlenecks introduced in v6.16.1.
|
||||
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
|
||||
- **Linux builds are up to 10% faster now compared to v6.16.0 GCC build.**
|
||||
- **Windows builds are up to 5% faster now compared to v6.16.0 MSVC build.**
|
||||
|
||||
# v6.16.1
|
||||
- [#2729](https://github.com/xmrig/xmrig/pull/2729) GhostRider fixes:
|
||||
- Added average hashrate display.
|
||||
- Fixed the number of threads shown at startup.
|
||||
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use).
|
||||
- [#2738](https://github.com/xmrig/xmrig/pull/2738) GhostRider fixes:
|
||||
- Fixed "difficulty is not a number" error when diff is high on some pools.
|
||||
- Fixed GhostRider compilation when `WITH_KAWPOW=OFF`.
|
||||
- [#2740](https://github.com/xmrig/xmrig/pull/2740) Added VAES support for Cryptonight variants **+4% speedup on Zen3**.
|
||||
- VAES instructions are available on Intel Ice Lake/AMD Zen3 and newer CPUs.
|
||||
- +4% speedup on Ryzen 5 5600X.
|
||||
|
||||
# v6.16.0
|
||||
- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
|
||||
- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.
|
||||
|
||||
@@ -28,6 +28,7 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
|
||||
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
||||
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
||||
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
|
||||
@@ -133,6 +134,15 @@ if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
|
||||
endif()
|
||||
|
||||
if (WITH_VAES)
|
||||
add_definitions(-DXMRIG_VAES)
|
||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.h)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/cn/CryptoNight_x86_vaes.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize -mavx2 -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_HWLOC)
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/common/NUMAMemoryPool.h
|
||||
|
||||
@@ -9,10 +9,23 @@ if (NOT CMAKE_SYSTEM_PROCESSOR)
|
||||
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
||||
endif()
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(VAES_SUPPORTED ON)
|
||||
else()
|
||||
CHECK_CXX_COMPILER_FLAG("-mavx2 -mvaes" VAES_SUPPORTED)
|
||||
endif()
|
||||
|
||||
if (NOT VAES_SUPPORTED)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||
add_definitions(-DRAPIDJSON_SSE2)
|
||||
else()
|
||||
set(WITH_SSE4_1 OFF)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (NOT ARM_TARGET)
|
||||
@@ -29,8 +42,6 @@ if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||
|
||||
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (ARM_TARGET EQUAL 8)
|
||||
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)
|
||||
|
||||
|
||||
@@ -53,6 +53,9 @@ xmrig::Hashrate::Hashrate(size_t threads) :
|
||||
m_timestamps[i] = new uint64_t[kBucketSize]();
|
||||
m_top[i] = 0;
|
||||
}
|
||||
|
||||
m_earliestTimestamp = std::numeric_limits<uint64_t>::max();
|
||||
m_totalCount = 0;
|
||||
}
|
||||
|
||||
|
||||
@@ -66,6 +69,14 @@ xmrig::Hashrate::~Hashrate()
|
||||
delete [] m_counts;
|
||||
delete [] m_timestamps;
|
||||
delete [] m_top;
|
||||
|
||||
}
|
||||
|
||||
|
||||
double xmrig::Hashrate::average() const
|
||||
{
|
||||
const uint64_t ts = Chrono::steadyMSecs();
|
||||
return (ts > m_earliestTimestamp) ? (m_totalCount * 1e3 / (ts - m_earliestTimestamp)) : 0.0;
|
||||
}
|
||||
|
||||
|
||||
@@ -167,4 +178,11 @@ void xmrig::Hashrate::addData(size_t index, uint64_t count, uint64_t timestamp)
|
||||
m_timestamps[index][top] = timestamp;
|
||||
|
||||
m_top[index] = (top + 1) & kBucketMask;
|
||||
|
||||
if (index == 0) {
|
||||
if (m_earliestTimestamp == std::numeric_limits<uint64_t>::max()) {
|
||||
m_earliestTimestamp = timestamp;
|
||||
}
|
||||
m_totalCount = count;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -53,6 +53,8 @@ public:
|
||||
inline void add(size_t threadId, uint64_t count, uint64_t timestamp) { addData(threadId + 1U, count, timestamp); }
|
||||
inline void add(uint64_t count, uint64_t timestamp) { addData(0U, count, timestamp); }
|
||||
|
||||
double average() const;
|
||||
|
||||
static const char *format(double h, char *buf, size_t size);
|
||||
static rapidjson::Value normalize(double d);
|
||||
|
||||
@@ -72,6 +74,9 @@ private:
|
||||
uint32_t* m_top;
|
||||
uint64_t** m_counts;
|
||||
uint64_t** m_timestamps;
|
||||
|
||||
uint64_t m_earliestTimestamp;
|
||||
uint64_t m_totalCount;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -31,6 +31,8 @@ class Worker : public IWorker
|
||||
public:
|
||||
Worker(size_t id, int64_t affinity, int priority);
|
||||
|
||||
size_t threads() const override { return 1; }
|
||||
|
||||
protected:
|
||||
inline int64_t affinity() const { return m_affinity; }
|
||||
inline size_t id() const override { return m_id; }
|
||||
|
||||
@@ -46,6 +46,7 @@ public:
|
||||
virtual const VirtualMemory *memory() const = 0;
|
||||
virtual size_t id() const = 0;
|
||||
virtual size_t intensity() const = 0;
|
||||
virtual size_t threads() const = 0;
|
||||
virtual void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const = 0;
|
||||
virtual void jobEarlyNotification(const Job &job) = 0;
|
||||
virtual void start() = 0;
|
||||
|
||||
@@ -88,6 +88,7 @@ public:
|
||||
{
|
||||
if (ready) {
|
||||
m_started++;
|
||||
m_totalStarted += worker->threads();
|
||||
|
||||
if (m_workersMemory.insert(worker->memory()).second) {
|
||||
m_hugePages += worker->memory()->hugePages();
|
||||
@@ -112,7 +113,7 @@ public:
|
||||
LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%1.0f%% %zu/%zu" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"),
|
||||
Tags::cpu(),
|
||||
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
|
||||
m_started, m_threads, m_ways,
|
||||
m_totalStarted, std::max(m_totalStarted, m_threads), m_ways,
|
||||
(m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
|
||||
m_hugePages.percent(),
|
||||
m_hugePages.allocated, m_hugePages.total,
|
||||
@@ -127,6 +128,7 @@ private:
|
||||
size_t m_errors = 0;
|
||||
size_t m_memory = 0;
|
||||
size_t m_started = 0;
|
||||
size_t m_totalStarted = 0;
|
||||
size_t m_threads = 0;
|
||||
size_t m_ways = 0;
|
||||
uint64_t m_ts = 0;
|
||||
|
||||
@@ -44,7 +44,7 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
|
||||
affinity(thread.affinity()),
|
||||
miner(miner),
|
||||
threads(threads),
|
||||
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
|
||||
intensity(std::max<uint32_t>(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()), algorithm.minIntensity())),
|
||||
affinities(affinities)
|
||||
{
|
||||
}
|
||||
|
||||
@@ -161,14 +161,14 @@ bool xmrig::CpuWorker<N>::selfTest()
|
||||
}
|
||||
# endif
|
||||
|
||||
allocateCnCtx();
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
return N == 8;
|
||||
return (N == 8) && verify(Algorithm::GHOSTRIDER_RTM, test_output_gr);
|
||||
}
|
||||
# endif
|
||||
|
||||
allocateCnCtx();
|
||||
|
||||
if (m_algorithm.family() == Algorithm::CN) {
|
||||
const bool rc = verify(Algorithm::CN_0, test_output_v0) &&
|
||||
verify(Algorithm::CN_1, test_output_v1) &&
|
||||
@@ -397,6 +397,37 @@ bool xmrig::CpuWorker<N>::nextRound()
|
||||
template<size_t N>
|
||||
bool xmrig::CpuWorker<N>::verify(const Algorithm &algorithm, const uint8_t *referenceValue)
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm == Algorithm::GHOSTRIDER_RTM) {
|
||||
uint8_t blob[N * 80] = {};
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
blob[i * 80 + 0] = static_cast<uint8_t>(i);
|
||||
blob[i * 80 + 4] = 0x10;
|
||||
blob[i * 80 + 5] = 0x02;
|
||||
}
|
||||
|
||||
uint8_t hash1[N * 32] = {};
|
||||
ghostrider::hash_octa(blob, 80, hash1, m_ctx, 0, false);
|
||||
|
||||
for (size_t i = 0; i < N; ++i) {
|
||||
blob[i * 80 + 0] = static_cast<uint8_t>(i);
|
||||
blob[i * 80 + 4] = 0x43;
|
||||
blob[i * 80 + 5] = 0x05;
|
||||
}
|
||||
|
||||
uint8_t hash2[N * 32] = {};
|
||||
ghostrider::hash_octa(blob, 80, hash2, m_ctx, 0, false);
|
||||
|
||||
for (size_t i = 0; i < N * 32; ++i) {
|
||||
if ((hash1[i] ^ hash2[i]) != referenceValue[i]) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_hash_fun func = fn(algorithm);
|
||||
if (!func) {
|
||||
return false;
|
||||
|
||||
@@ -52,6 +52,15 @@ public:
|
||||
CpuWorker(size_t id, const CpuLaunchData &data);
|
||||
~CpuWorker() override;
|
||||
|
||||
size_t threads() const override
|
||||
{
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
return ((m_algorithm.family() == Algorithm::GHOSTRIDER) && m_ghHelper) ? 2 : 1;
|
||||
# else
|
||||
return 1;
|
||||
# endif
|
||||
}
|
||||
|
||||
protected:
|
||||
bool selfTest() override;
|
||||
void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override;
|
||||
|
||||
@@ -61,6 +61,7 @@ public:
|
||||
|
||||
enum Flag : uint32_t {
|
||||
FLAG_AES,
|
||||
FLAG_VAES,
|
||||
FLAG_AVX,
|
||||
FLAG_AVX2,
|
||||
FLAG_AVX512F,
|
||||
@@ -90,6 +91,7 @@ public:
|
||||
virtual Assembly::Id assembly() const = 0;
|
||||
virtual bool has(Flag feature) const = 0;
|
||||
virtual bool hasAES() const = 0;
|
||||
virtual bool hasVAES() const = 0;
|
||||
virtual bool hasAVX() const = 0;
|
||||
virtual bool hasAVX2() const = 0;
|
||||
virtual bool hasBMI2() const = 0;
|
||||
|
||||
@@ -30,6 +30,12 @@
|
||||
#endif
|
||||
|
||||
|
||||
#include "crypto/cn/CryptoNight_monero.h"
|
||||
#ifdef XMRIG_VAES
|
||||
# include "crypto/cn/CryptoNight_x86_vaes.h"
|
||||
#endif
|
||||
|
||||
|
||||
#include "backend/cpu/platform/BasicCpuInfo.h"
|
||||
#include "3rdparty/rapidjson/document.h"
|
||||
#include "crypto/common/Assembly.h"
|
||||
@@ -52,8 +58,8 @@
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
constexpr size_t kCpuFlagsSize = 14;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
|
||||
constexpr size_t kCpuFlagsSize = 15;
|
||||
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
|
||||
static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch");
|
||||
|
||||
|
||||
@@ -140,6 +146,7 @@ static inline bool has_osxsave() { return has_feature(PROCESSOR_INFO,
|
||||
static inline bool has_aes_ni() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 25); }
|
||||
static inline bool has_avx() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 28) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_avx2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 5) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_vaes() { return has_feature(EXTENDED_FEATURES, ECX_Reg, 1 << 9) && has_osxsave() && has_xcr_avx(); }
|
||||
static inline bool has_avx512f() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 16) && has_osxsave() && has_xcr_avx512(); }
|
||||
static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 8); }
|
||||
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
|
||||
@@ -178,6 +185,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
m_flags.set(FLAG_AES, has_aes_ni());
|
||||
m_flags.set(FLAG_AVX, has_avx());
|
||||
m_flags.set(FLAG_AVX2, has_avx2());
|
||||
m_flags.set(FLAG_VAES, has_vaes());
|
||||
m_flags.set(FLAG_AVX512F, has_avx512f());
|
||||
m_flags.set(FLAG_BMI2, has_bmi2());
|
||||
m_flags.set(FLAG_OSXSAVE, has_osxsave());
|
||||
@@ -292,6 +300,9 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
cn_sse41_enabled = has(FLAG_SSE41);
|
||||
cn_vaes_enabled = has(FLAG_VAES);
|
||||
}
|
||||
|
||||
|
||||
|
||||
@@ -44,6 +44,7 @@ protected:
|
||||
inline Assembly::Id assembly() const override { return m_assembly; }
|
||||
inline bool has(Flag flag) const override { return m_flags.test(flag); }
|
||||
inline bool hasAES() const override { return has(FLAG_AES); }
|
||||
inline bool hasVAES() const override { return has(FLAG_VAES); }
|
||||
inline bool hasAVX() const override { return has(FLAG_AVX); }
|
||||
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
|
||||
inline bool hasBMI2() const override { return has(FLAG_BMI2); }
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
static const char astrobwt_cl[12493] = {
|
||||
static const char astrobwt_cl[12489] = {
|
||||
0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,
|
||||
0x73,0x68,0x6f,0x72,0x74,0x20,0x75,0x69,0x6e,0x74,0x31,0x36,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x69,0x6e,0x74,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x3b,0x0a,0x74,
|
||||
@@ -260,140 +260,140 @@ static const char astrobwt_cl[12493] = {
|
||||
0x6f,0x66,0x66,0x73,0x65,0x74,0x2b,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x29,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x29,0x5d,0x20,0x26,0x3d,0x20,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x3e,0x3e,0x28,0x28,0x34,0x2d,0x28,0x6f,0x75,0x74,0x70,0x75,
|
||||
0x74,0x5f,0x73,0x69,0x7a,0x65,0x26,0x33,0x29,0x29,0x3c,0x3c,0x33,0x29,0x3b,0x0a,0x7d,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,
|
||||
0x20,0x32,0x34,0x20,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,
|
||||
0x20,0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x38,0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,
|
||||
0x20,0x32,0x34,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,0x20,
|
||||
0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
|
||||
0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,0x55,
|
||||
0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,0x30,
|
||||
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,
|
||||
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
|
||||
0x0a,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,
|
||||
0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,
|
||||
0x5b,0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,
|
||||
0x31,0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,
|
||||
0x32,0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,
|
||||
0x35,0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,
|
||||
0x32,0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,
|
||||
0x36,0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,
|
||||
0x31,0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,
|
||||
0x31,0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,
|
||||
0x36,0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,
|
||||
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,
|
||||
0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,
|
||||
0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,
|
||||
0x20,0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,
|
||||
0x32,0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,
|
||||
0x37,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,
|
||||
0x39,0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,
|
||||
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,
|
||||
0x7b,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,
|
||||
0x2c,0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,
|
||||
0x32,0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,
|
||||
0x32,0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,
|
||||
0x31,0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,
|
||||
0x38,0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,
|
||||
0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,
|
||||
0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,
|
||||
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,
|
||||
0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,
|
||||
0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,
|
||||
0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,
|
||||
0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
|
||||
0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,
|
||||
0x5d,0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,
|
||||
0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,
|
||||
0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,
|
||||
0x5f,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,
|
||||
0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,
|
||||
0x64,0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,
|
||||
0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,
|
||||
0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,
|
||||
0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,
|
||||
0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,
|
||||
0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,
|
||||
0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,
|
||||
0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,
|
||||
0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,
|
||||
0x3d,0x30,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
||||
0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,
|
||||
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
|
||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,
|
||||
0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,
|
||||
0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,
|
||||
0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,
|
||||
0x65,0x2a,0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,
|
||||
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,
|
||||
0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,
|
||||
0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,
|
||||
0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,
|
||||
0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,
|
||||
0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,
|
||||
0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,
|
||||
0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,
|
||||
0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,
|
||||
0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,
|
||||
0x69,0x6e,0x69,0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,
|
||||
0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
|
||||
0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,
|
||||
0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,
|
||||
0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,
|
||||
0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
|
||||
0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,
|
||||
0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,
|
||||
0x3d,0x28,0x74,0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x2a,0x29,0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,
|
||||
0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,
|
||||
0x65,0x26,0x30,0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,
|
||||
0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,
|
||||
0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,
|
||||
0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
|
||||
0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,
|
||||
0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,
|
||||
0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,
|
||||
0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,
|
||||
0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,
|
||||
0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,
|
||||
0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,
|
||||
0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,
|
||||
0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,
|
||||
0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,
|
||||
0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,
|
||||
0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
||||
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x7d,
|
||||
0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,0x5b,
|
||||
0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,0x31,
|
||||
0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,0x32,
|
||||
0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,0x35,
|
||||
0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,0x32,
|
||||
0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,0x36,
|
||||
0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,0x31,
|
||||
0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,
|
||||
0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,0x31,
|
||||
0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,0x36,
|
||||
0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,0x32,
|
||||
0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,0x2c,
|
||||
0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,
|
||||
0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,0x32,
|
||||
0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,0x37,
|
||||
0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,0x39,
|
||||
0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,0x32,
|
||||
0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,0x7b,
|
||||
0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,0x2c,
|
||||
0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,
|
||||
0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,0x32,
|
||||
0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,
|
||||
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,0x38,
|
||||
0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,
|
||||
0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,
|
||||
0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,
|
||||
0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,0x33,
|
||||
0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,
|
||||
0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,
|
||||
0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,
|
||||
0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,
|
||||
0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,0x73,
|
||||
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,
|
||||
0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,
|
||||
0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
|
||||
0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,
|
||||
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,0x5d,
|
||||
0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,
|
||||
0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,
|
||||
0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,
|
||||
0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,
|
||||
0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,0x69,
|
||||
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,0x41,
|
||||
0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,
|
||||
0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,
|
||||
0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,
|
||||
0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,
|
||||
0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,
|
||||
0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,
|
||||
0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,
|
||||
0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,
|
||||
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,
|
||||
0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,
|
||||
0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
|
||||
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,
|
||||
0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,
|
||||
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,0x28,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,
|
||||
0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,
|
||||
0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,
|
||||
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,
|
||||
0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,
|
||||
0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,
|
||||
0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,
|
||||
0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,
|
||||
0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,
|
||||
0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,
|
||||
0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,0x5f,0x61,0x74,
|
||||
0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,
|
||||
0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,0x69,0x6e,0x69,
|
||||
0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,
|
||||
0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,0x69,0x6e,0x74,
|
||||
0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,
|
||||
0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,
|
||||
0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,
|
||||
0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,
|
||||
0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,
|
||||
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
|
||||
0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
|
||||
0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,
|
||||
0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x28,0x74,
|
||||
0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
|
||||
0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x29,
|
||||
0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x3d,0x28,0x6e,
|
||||
0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x26,0x30,
|
||||
0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,
|
||||
0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,0x3b,0x0a,0x75,
|
||||
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,
|
||||
0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,
|
||||
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
|
||||
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
|
||||
0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,
|
||||
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
|
||||
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,
|
||||
0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,
|
||||
0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,
|
||||
0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,
|
||||
0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,
|
||||
0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,
|
||||
0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,
|
||||
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,
|
||||
0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,
|
||||
0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,
|
||||
0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
|
||||
};
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
@@ -132,6 +132,7 @@ set(SOURCES_BASE
|
||||
src/base/net/tools/LineReader.cpp
|
||||
src/base/net/tools/NetBuffer.cpp
|
||||
src/base/tools/Arguments.cpp
|
||||
src/base/tools/Chrono.cpp
|
||||
src/base/tools/cryptonote/BlockTemplate.cpp
|
||||
src/base/tools/cryptonote/crypto-ops-data.c
|
||||
src/base/tools/cryptonote/crypto-ops.c
|
||||
@@ -244,7 +245,7 @@ else()
|
||||
endif()
|
||||
|
||||
|
||||
if (WITH_KAWPOW)
|
||||
if (WITH_KAWPOW OR WITH_GHOSTRIDER)
|
||||
list(APPEND HEADERS_BASE
|
||||
src/base/net/stratum/AutoClient.h
|
||||
src/base/net/stratum/EthStratumClient.h
|
||||
|
||||
@@ -189,6 +189,7 @@ public:
|
||||
inline Id id() const { return m_id; }
|
||||
inline size_t l2() const { return l2(m_id); }
|
||||
inline uint32_t family() const { return family(m_id); }
|
||||
inline uint32_t minIntensity() const { return ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
|
||||
inline uint32_t maxIntensity() const { return isCN() ? 5 : ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
|
||||
|
||||
inline size_t l3() const
|
||||
|
||||
@@ -73,7 +73,7 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result)
|
||||
auto& allocator = doc.GetAllocator();
|
||||
|
||||
Value params(kArrayType);
|
||||
params.PushBack(m_pool.user().toJSON(), allocator);
|
||||
params.PushBack(m_user.toJSON(), allocator);
|
||||
params.PushBack(result.jobId.toJSON(), allocator);
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
@@ -213,12 +213,13 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
|
||||
return;
|
||||
}
|
||||
|
||||
if (!arr[0].IsDouble()) {
|
||||
if (!arr[0].IsDouble() && !arr[0].IsUint64()) {
|
||||
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: difficulty is not a number"), tag());
|
||||
return;
|
||||
}
|
||||
|
||||
m_nextDifficulty = static_cast<uint64_t>(ceil(arr[0].GetDouble() * 65536.0));
|
||||
const double diff = arr[0].IsDouble() ? arr[0].GetDouble() : arr[0].GetUint64();
|
||||
m_nextDifficulty = static_cast<uint64_t>(ceil(diff * 65536.0));
|
||||
}
|
||||
# endif
|
||||
|
||||
@@ -470,8 +471,8 @@ void xmrig::EthStratumClient::authorize()
|
||||
auto &allocator = doc.GetAllocator();
|
||||
|
||||
Value params(kArrayType);
|
||||
params.PushBack(m_pool.user().toJSON(), allocator);
|
||||
params.PushBack(m_pool.password().toJSON(), allocator);
|
||||
params.PushBack(m_user.toJSON(), allocator);
|
||||
params.PushBack(m_password.toJSON(), allocator);
|
||||
|
||||
JsonRequest::create(doc, m_sequence, "mining.authorize", params);
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@
|
||||
#include "base/kernel/Platform.h"
|
||||
#include "base/net/stratum/Client.h"
|
||||
|
||||
#ifdef XMRIG_ALGO_KAWPOW
|
||||
#if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
# include "base/net/stratum/AutoClient.h"
|
||||
# include "base/net/stratum/EthStratumClient.h"
|
||||
#endif
|
||||
@@ -218,7 +218,7 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
|
||||
IClient *client = nullptr;
|
||||
|
||||
if (m_mode == MODE_POOL) {
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
const uint32_t f = m_algorithm.family();
|
||||
if ((f == Algorithm::KAWPOW) || (f == Algorithm::GHOSTRIDER) || (m_coin == Coin::RAVEN)) {
|
||||
client = new EthStratumClient(id, Platform::userAgent(), listener);
|
||||
@@ -237,7 +237,7 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
|
||||
client = new SelfSelectClient(id, Platform::userAgent(), listener, m_submitToOrigin);
|
||||
}
|
||||
# endif
|
||||
# ifdef XMRIG_ALGO_KAWPOW
|
||||
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
|
||||
else if (m_mode == MODE_AUTO_ETH) {
|
||||
client = new AutoClient(id, Platform::userAgent(), listener);
|
||||
}
|
||||
|
||||
@@ -50,6 +50,7 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (m_benchmark->algorithm() == Algorithm::GHOSTRIDER_RTM) {
|
||||
const uint32_t q = (benchmark->rotation() / 20) & 1;
|
||||
const uint32_t r = benchmark->rotation() % 20;
|
||||
|
||||
static constexpr uint32_t indices[20][3] = {
|
||||
@@ -75,9 +76,9 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
|
||||
{ 3, 4, 5 },
|
||||
};
|
||||
|
||||
blob[ 8] = '0' + indices[r][1];
|
||||
blob[ 8] = '0' + indices[r][q ? 2 : 1];
|
||||
blob[ 9] = '0' + indices[r][0];
|
||||
blob[11] = '0' + indices[r][2];
|
||||
blob[11] = '0' + indices[r][q ? 1 : 2];
|
||||
}
|
||||
# endif
|
||||
|
||||
|
||||
44
src/base/tools/Chrono.cpp
Normal file
44
src/base/tools/Chrono.cpp
Normal file
@@ -0,0 +1,44 @@
|
||||
/* XMRig
|
||||
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "Chrono.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_OS_WIN
|
||||
# include <Windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
double Chrono::highResolutionMSecs()
|
||||
{
|
||||
# ifdef XMRIG_OS_WIN
|
||||
LARGE_INTEGER f, t;
|
||||
QueryPerformanceFrequency(&f);
|
||||
QueryPerformanceCounter(&t);
|
||||
return static_cast<double>(t.QuadPart) * 1e3 / f.QuadPart;
|
||||
# else
|
||||
using namespace std::chrono;
|
||||
return static_cast<uint64_t>(duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count()) / 1e6;
|
||||
# endif
|
||||
}
|
||||
|
||||
|
||||
} /* namespace xmrig */
|
||||
@@ -29,12 +29,7 @@ namespace xmrig {
|
||||
class Chrono
|
||||
{
|
||||
public:
|
||||
static inline uint64_t highResolutionMSecs()
|
||||
{
|
||||
using namespace std::chrono;
|
||||
|
||||
return static_cast<uint64_t>(time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count());
|
||||
}
|
||||
static double highResolutionMSecs();
|
||||
|
||||
|
||||
static inline uint64_t steadyMSecs()
|
||||
|
||||
@@ -287,10 +287,12 @@ public:
|
||||
|
||||
void printHashrate(bool details)
|
||||
{
|
||||
char num[16 * 4] = { 0 };
|
||||
char num[16 * 5] = { 0 };
|
||||
double speed[3] = { 0.0 };
|
||||
uint32_t count = 0;
|
||||
|
||||
double avg_hashrate = 0.0;
|
||||
|
||||
for (auto backend : backends) {
|
||||
const auto hashrate = backend->hashrate();
|
||||
if (hashrate) {
|
||||
@@ -299,6 +301,8 @@ public:
|
||||
speed[0] += hashrate->calc(Hashrate::ShortInterval);
|
||||
speed[1] += hashrate->calc(Hashrate::MediumInterval);
|
||||
speed[2] += hashrate->calc(Hashrate::LargeInterval);
|
||||
|
||||
avg_hashrate += hashrate->average();
|
||||
}
|
||||
|
||||
backend->printHashrate(details);
|
||||
@@ -318,12 +322,22 @@ public:
|
||||
h = "MH/s";
|
||||
}
|
||||
|
||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
|
||||
char avg_hashrate_buf[64];
|
||||
avg_hashrate_buf[0] = '\0';
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
if (algorithm.family() == Algorithm::GHOSTRIDER) {
|
||||
snprintf(avg_hashrate_buf, sizeof(avg_hashrate_buf), " avg " CYAN_BOLD("%s %s"), Hashrate::format(avg_hashrate * scale, num + 16 * 4, 16), h);
|
||||
}
|
||||
# endif
|
||||
|
||||
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s") "%s",
|
||||
Tags::miner(),
|
||||
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
|
||||
Hashrate::format(speed[1] * scale, num + 16, sizeof(num) / 4),
|
||||
Hashrate::format(speed[2] * scale, num + 16 * 2, sizeof(num) / 4), h,
|
||||
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, sizeof(num) / 4), h
|
||||
Hashrate::format(speed[0] * scale, num, 16),
|
||||
Hashrate::format(speed[1] * scale, num + 16, 16),
|
||||
Hashrate::format(speed[2] * scale, num + 16 * 2, 16), h,
|
||||
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, 16), h,
|
||||
avg_hashrate_buf
|
||||
);
|
||||
|
||||
# ifdef XMRIG_FEATURE_BENCHMARK
|
||||
|
||||
@@ -55,6 +55,10 @@
|
||||
} while (0)
|
||||
|
||||
|
||||
bool cn_sse41_enabled = false;
|
||||
bool cn_vaes_enabled = false;
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
# define ADD_FN_ASM(algo) do { \
|
||||
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
|
||||
@@ -97,6 +101,27 @@ cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
|
||||
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_upx2_double_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_single_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_single_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_double_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_double_mainloop_asm = nullptr;
|
||||
|
||||
cn_mainloop_fun cn_gr0_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr1_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr2_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr3_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr4_quad_mainloop_asm = nullptr;
|
||||
cn_mainloop_fun cn_gr5_quad_mainloop_asm = nullptr;
|
||||
|
||||
|
||||
template<Algorithm::Id SOURCE_ALGO = Algorithm::CN_2, typename T, typename U>
|
||||
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
|
||||
@@ -136,7 +161,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
|
||||
|
||||
static void patchAsmVariants()
|
||||
{
|
||||
const int allocation_size = 131072;
|
||||
constexpr size_t allocation_size = 0x20000;
|
||||
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
|
||||
|
||||
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
|
||||
@@ -173,6 +198,29 @@ static void patchAsmVariants()
|
||||
cn_upx2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x15000);
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
cn_gr0_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16000);
|
||||
cn_gr1_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16800);
|
||||
cn_gr2_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17000);
|
||||
cn_gr3_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17800);
|
||||
cn_gr4_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18000);
|
||||
cn_gr5_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18800);
|
||||
|
||||
cn_gr0_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19000);
|
||||
cn_gr1_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19800);
|
||||
cn_gr2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A000);
|
||||
cn_gr3_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A800);
|
||||
cn_gr4_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B000);
|
||||
cn_gr5_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B800);
|
||||
|
||||
cn_gr0_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C000);
|
||||
cn_gr1_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C800);
|
||||
cn_gr2_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D000);
|
||||
cn_gr3_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D800);
|
||||
cn_gr4_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E000);
|
||||
cn_gr5_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E800);
|
||||
# endif
|
||||
|
||||
{
|
||||
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
|
||||
|
||||
@@ -232,6 +280,29 @@ static void patchAsmVariants()
|
||||
}
|
||||
# endif
|
||||
|
||||
# ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
|
||||
patchCode<Algorithm::CN_1>(cn_gr0_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr1_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr2_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr3_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr4_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
|
||||
patchCode<Algorithm::CN_1>(cn_gr5_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
|
||||
# endif
|
||||
|
||||
VirtualMemory::protectRX(base, allocation_size);
|
||||
VirtualMemory::flushInstructionCache(base, allocation_size);
|
||||
}
|
||||
@@ -348,7 +419,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
|
||||
|
||||
# ifdef XMRIG_ALGO_CN_HEAVY
|
||||
// cn-heavy optimization for Zen3 CPUs
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
|
||||
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
|
||||
switch (algorithm.id()) {
|
||||
case Algorithm::CN_HEAVY_0:
|
||||
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;
|
||||
|
||||
@@ -52,8 +52,10 @@ struct cryptonight_r_data {
|
||||
struct cryptonight_ctx {
|
||||
alignas(16) uint8_t state[224];
|
||||
alignas(16) uint8_t *memory;
|
||||
const uint32_t* tweak1_table;
|
||||
uint64_t tweak1_2;
|
||||
|
||||
uint8_t unused[40];
|
||||
uint8_t unused[24];
|
||||
const uint32_t *saes_table;
|
||||
|
||||
cn_mainloop_fun_ms_abi generated_code;
|
||||
|
||||
@@ -204,4 +204,7 @@
|
||||
v4_random_math(code##part, r##part); \
|
||||
}
|
||||
|
||||
extern bool cn_sse41_enabled;
|
||||
extern bool cn_vaes_enabled;
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */
|
||||
|
||||
@@ -450,6 +450,29 @@ const static uint8_t astrobwt_dero_test_out[256] = {
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef XMRIG_ALGO_GHOSTRIDER
|
||||
// "GhostRider"
|
||||
const static uint8_t test_output_gr[256] = {
|
||||
0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
|
||||
0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
|
||||
0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
|
||||
0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
|
||||
0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
|
||||
0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
|
||||
0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
|
||||
0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
|
||||
0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
|
||||
0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
|
||||
0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
|
||||
0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
|
||||
0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
|
||||
0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
|
||||
0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
|
||||
0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
|
||||
};
|
||||
#endif
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
|
||||
@@ -43,6 +43,11 @@
|
||||
#include "crypto/cn/soft_aes.h"
|
||||
|
||||
|
||||
#ifdef XMRIG_VAES
|
||||
# include "crypto/cn/CryptoNight_x86_vaes.h"
|
||||
#endif
|
||||
|
||||
|
||||
extern "C"
|
||||
{
|
||||
#include "crypto/cn/c_groestl.h"
|
||||
@@ -289,6 +294,13 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
@@ -341,7 +353,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
|
||||
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
__m128i* e = output + N - prefetch_dist;
|
||||
__m128i* e = output + (N << interleave) - prefetch_dist;
|
||||
__m128i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
@@ -396,6 +408,13 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
|
||||
return;
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr bool IS_HEAVY = props.isHeavy();
|
||||
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
|
||||
|
||||
@@ -615,9 +634,35 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
|
||||
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
|
||||
}
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height);
|
||||
#endif
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
|
||||
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_single_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
@@ -803,6 +848,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
|
||||
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
|
||||
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
|
||||
@@ -846,6 +894,28 @@ extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
|
||||
extern cn_mainloop_fun cn_upx2_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_upx2_double_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_single_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_single_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_double_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_double_mainloop_asm;
|
||||
|
||||
extern cn_mainloop_fun cn_gr0_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr1_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr2_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr3_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr4_quad_mainloop_asm;
|
||||
extern cn_mainloop_fun cn_gr5_quad_mainloop_asm;
|
||||
|
||||
|
||||
} // namespace xmrig
|
||||
|
||||
|
||||
@@ -996,8 +1066,17 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
if (ALGO == Algorithm::CN_2) {
|
||||
cnv2_double_mainloop_sandybridge_asm(ctx);
|
||||
@@ -1036,8 +1115,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
ctx[0]->generated_code(ctx);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
@@ -1054,9 +1141,130 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 32);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx);
|
||||
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
return;
|
||||
}
|
||||
|
||||
keccak(input, size, ctx[0]->state);
|
||||
keccak(input + size, size, ctx[1]->state);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
|
||||
ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
ctx[1]->tweak1_2 = tweak1_2_1;
|
||||
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_double_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
@@ -1092,8 +1300,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
@@ -1288,8 +1505,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
bx10 = cx1;
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
}
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
@@ -1299,27 +1524,15 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
|
||||
}
|
||||
|
||||
|
||||
static inline void cryptonight_monero_tweak_gr(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i cx)
|
||||
{
|
||||
__m128i tmp = _mm_xor_si128(bx0, cx);
|
||||
mem_out[0] = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
|
||||
uint64_t vh = _mm_cvtsi128_si64(tmp);
|
||||
|
||||
mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
|
||||
}
|
||||
|
||||
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
#ifdef XMRIG_FEATURE_ASM
|
||||
template<Algorithm::Id ALGO>
|
||||
static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
|
||||
{
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
constexpr Algorithm::Id BASE = props.base();
|
||||
|
||||
if (BASE == Algorithm::CN_1 && size < 43) {
|
||||
memset(output, 0, 64);
|
||||
memset(output, 0, 32 * 4);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1328,21 +1541,6 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||
keccak(input + size * 2, size, ctx[2]->state);
|
||||
keccak(input + size * 3, size, ctx[3]->state);
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
uint8_t* l1 = ctx[1]->memory;
|
||||
uint8_t* l2 = ctx[2]->memory;
|
||||
uint8_t* l3 = ctx[3]->memory;
|
||||
|
||||
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
|
||||
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
|
||||
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
|
||||
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
|
||||
|
||||
VARIANT1_INIT(0);
|
||||
VARIANT1_INIT(1);
|
||||
VARIANT1_INIT(2);
|
||||
VARIANT1_INIT(3);
|
||||
|
||||
if (props.half_mem()) {
|
||||
ctx[0]->first_half = true;
|
||||
ctx[1]->first_half = true;
|
||||
@@ -1350,145 +1548,59 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
|
||||
ctx[3]->first_half = true;
|
||||
}
|
||||
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
|
||||
uint64_t al0 = h0[0] ^ h0[4];
|
||||
uint64_t al1 = h1[0] ^ h1[4];
|
||||
uint64_t al2 = h2[0] ^ h2[4];
|
||||
uint64_t al3 = h3[0] ^ h3[4];
|
||||
|
||||
uint64_t ah0 = h0[1] ^ h0[5];
|
||||
uint64_t ah1 = h1[1] ^ h1[5];
|
||||
uint64_t ah2 = h2[1] ^ h2[5];
|
||||
uint64_t ah3 = h3[1] ^ h3[5];
|
||||
|
||||
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
|
||||
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
|
||||
__m128i bx20 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
|
||||
__m128i bx30 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
|
||||
|
||||
uint64_t idx0 = al0;
|
||||
uint64_t idx1 = al1;
|
||||
uint64_t idx2 = al2;
|
||||
uint64_t idx3 = al3;
|
||||
|
||||
__m128i cx0, cx1, cx2, cx3;
|
||||
|
||||
if (!SOFT_AES) {
|
||||
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||
cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
|
||||
cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, false, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < props.iterations(); i++) {
|
||||
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
|
||||
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
|
||||
const __m128i ax2 = _mm_set_epi64x(ah2, al2);
|
||||
const __m128i ax3 = _mm_set_epi64x(ah3, al3);
|
||||
VARIANT1_INIT(0); ctx[0]->tweak1_2 = tweak1_2_0;
|
||||
VARIANT1_INIT(1); ctx[1]->tweak1_2 = tweak1_2_1;
|
||||
VARIANT1_INIT(2); ctx[2]->tweak1_2 = tweak1_2_2;
|
||||
VARIANT1_INIT(3); ctx[3]->tweak1_2 = tweak1_2_3;
|
||||
|
||||
if (SOFT_AES) {
|
||||
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx2 = soft_aesenc(&l2[idx2 & MASK], ax2, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
cx3 = soft_aesenc(&l3[idx3 & MASK], ax3, reinterpret_cast<const uint32_t*>(saes_table));
|
||||
}
|
||||
else {
|
||||
cx0 = _mm_aesenc_si128(cx0, ax0);
|
||||
cx1 = _mm_aesenc_si128(cx1, ax1);
|
||||
cx2 = _mm_aesenc_si128(cx2, ax2);
|
||||
cx3 = _mm_aesenc_si128(cx3, ax3);
|
||||
if (MASK > 131072) {
|
||||
_mm_prefetch((const char*)(&l0[_mm_cvtsi128_si32(cx0) & MASK]), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(&l1[_mm_cvtsi128_si32(cx1) & MASK]), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(&l2[_mm_cvtsi128_si32(cx2) & MASK]), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(&l3[_mm_cvtsi128_si32(cx3) & MASK]), _MM_HINT_T0);
|
||||
ctx[0]->tweak1_table = tweak1_table;
|
||||
|
||||
if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx);
|
||||
if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx);
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, false, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, cx1);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l2[idx2 & MASK], l2, idx2 & MASK, ax2, bx20, cx2);
|
||||
cryptonight_monero_tweak_gr((uint64_t*)&l3[idx3 & MASK], l3, idx3 & MASK, ax3, bx30, cx3);
|
||||
|
||||
idx0 = _mm_cvtsi128_si64(cx0);
|
||||
idx1 = _mm_cvtsi128_si64(cx1);
|
||||
idx2 = _mm_cvtsi128_si64(cx2);
|
||||
idx3 = _mm_cvtsi128_si64(cx3);
|
||||
|
||||
uint64_t hi, lo, cl, ch;
|
||||
|
||||
cl = ((uint64_t*)&l0[idx0 & MASK])[0];
|
||||
ch = ((uint64_t*)&l0[idx0 & MASK])[1];
|
||||
lo = __umul128(idx0, cl, &hi);
|
||||
al0 += hi;
|
||||
ah0 += lo;
|
||||
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
|
||||
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
|
||||
al0 ^= cl;
|
||||
ah0 ^= ch;
|
||||
idx0 = al0;
|
||||
bx00 = cx0;
|
||||
if (!SOFT_AES) cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
|
||||
|
||||
cl = ((uint64_t*)&l1[idx1 & MASK])[0];
|
||||
ch = ((uint64_t*)&l1[idx1 & MASK])[1];
|
||||
lo = __umul128(idx1, cl, &hi);
|
||||
al1 += hi;
|
||||
ah1 += lo;
|
||||
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
|
||||
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
|
||||
al1 ^= cl;
|
||||
ah1 ^= ch;
|
||||
idx1 = al1;
|
||||
bx10 = cx1;
|
||||
if (!SOFT_AES) cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
|
||||
|
||||
cl = ((uint64_t*)&l2[idx2 & MASK])[0];
|
||||
ch = ((uint64_t*)&l2[idx2 & MASK])[1];
|
||||
lo = __umul128(idx2, cl, &hi);
|
||||
al2 += hi;
|
||||
ah2 += lo;
|
||||
((uint64_t*)&l2[idx2 & MASK])[0] = al2;
|
||||
((uint64_t*)&l2[idx2 & MASK])[1] = ah2 ^ tweak1_2_2;
|
||||
al2 ^= cl;
|
||||
ah2 ^= ch;
|
||||
idx2 = al2;
|
||||
bx20 = cx2;
|
||||
if (!SOFT_AES) cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
|
||||
|
||||
cl = ((uint64_t*)&l3[idx3 & MASK])[0];
|
||||
ch = ((uint64_t*)&l3[idx3 & MASK])[1];
|
||||
lo = __umul128(idx3, cl, &hi);
|
||||
al3 += hi;
|
||||
ah3 += lo;
|
||||
((uint64_t*)&l3[idx3 & MASK])[0] = al3;
|
||||
((uint64_t*)&l3[idx3 & MASK])[1] = ah3 ^ tweak1_2_3;
|
||||
al3 ^= cl;
|
||||
ah3 ^= ch;
|
||||
idx3 = al3;
|
||||
bx30 = cx3;
|
||||
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
|
||||
}
|
||||
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
|
||||
keccakf(h0, 24);
|
||||
keccakf(h1, 24);
|
||||
keccakf(h2, 24);
|
||||
keccakf(h3, 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[2]->state), 24);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[3]->state), 24);
|
||||
|
||||
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
|
||||
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
|
||||
extra_hashes[ctx[2]->state[0] & 3](ctx[2]->state, 200, output + 64);
|
||||
extra_hashes[ctx[3]->state[0] & 3](ctx[3]->state, 200, output + 96);
|
||||
}
|
||||
#endif
|
||||
|
||||
|
||||
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
|
||||
@@ -1684,13 +1796,26 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
|
||||
template<Algorithm::Id ALGO, bool SOFT_AES>
|
||||
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
|
||||
{
|
||||
const auto arch = Cpu::info()->arch();
|
||||
if ((arch >= ICpuInfo::ARCH_ZEN) && (arch <= ICpuInfo::ARCH_ZEN3)) {
|
||||
if ((ALGO == Algorithm::CN_GR_0) || (ALGO == Algorithm::CN_GR_1) || (ALGO == Algorithm::CN_GR_2) || (ALGO == Algorithm::CN_GR_3) || (ALGO == Algorithm::CN_GR_4) || (ALGO == Algorithm::CN_GR_5)) {
|
||||
cryptonight_quad_hash_zen<ALGO, SOFT_AES>(input, size, output, ctx, height);
|
||||
# ifdef XMRIG_FEATURE_ASM
|
||||
if (!SOFT_AES) {
|
||||
switch (ALGO) {
|
||||
case Algorithm::CN_GR_0:
|
||||
case Algorithm::CN_GR_1:
|
||||
case Algorithm::CN_GR_2:
|
||||
case Algorithm::CN_GR_3:
|
||||
case Algorithm::CN_GR_4:
|
||||
case Algorithm::CN_GR_5:
|
||||
if (cn_sse41_enabled) {
|
||||
cryptonight_quad_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
|
||||
return;
|
||||
}
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
# endif
|
||||
|
||||
constexpr CnAlgo<ALGO> props;
|
||||
constexpr size_t MASK = props.mask();
|
||||
@@ -1714,7 +1839,20 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
if (props.half_mem()) {
|
||||
ctx[i]->first_half = true;
|
||||
}
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
uint8_t* l0 = ctx[0]->memory;
|
||||
@@ -1766,8 +1904,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
|
||||
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
|
||||
}
|
||||
|
||||
# ifdef XMRIG_VAES
|
||||
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
|
||||
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
|
||||
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
|
||||
}
|
||||
else
|
||||
# endif
|
||||
{
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < 4; i++) {
|
||||
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
|
||||
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
|
||||
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
|
||||
}
|
||||
|
||||
478
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
478
src/crypto/cn/CryptoNight_x86_vaes.cpp
Normal file
@@ -0,0 +1,478 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#include "CryptoNight_x86_vaes.h"
|
||||
#include "CryptoNight_monero.h"
|
||||
#include "CryptoNight.h"
|
||||
|
||||
|
||||
#ifdef __GNUC__
|
||||
# include <x86intrin.h>
|
||||
#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
|
||||
static inline __m256i
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
|
||||
{
|
||||
return _mm256_inserti128_si256(
|
||||
_mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
|
||||
}
|
||||
|
||||
static inline void
|
||||
__attribute__((__always_inline__))
|
||||
_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
|
||||
{
|
||||
_mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
|
||||
_mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
|
||||
}
|
||||
#endif
|
||||
#else
|
||||
# include <intrin.h>
|
||||
#endif
|
||||
|
||||
|
||||
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
|
||||
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
|
||||
static FORCEINLINE __m128i sl_xor(__m128i tmp1)
|
||||
{
|
||||
__m128i tmp4;
|
||||
tmp4 = _mm_slli_si128(tmp1, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
tmp4 = _mm_slli_si128(tmp4, 0x04);
|
||||
tmp1 = _mm_xor_si128(tmp1, tmp4);
|
||||
return tmp1;
|
||||
}
|
||||
|
||||
|
||||
template<uint8_t rcon>
|
||||
static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
|
||||
{
|
||||
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
|
||||
*xout0 = sl_xor(*xout0);
|
||||
*xout0 = _mm_xor_si128(*xout0, xout1);
|
||||
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
|
||||
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
|
||||
*xout2 = sl_xor(*xout2);
|
||||
*xout2 = _mm_xor_si128(*xout2, xout1);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory);
|
||||
__m128i xout2 = _mm_load_si128(memory + 1);
|
||||
*k0 = _mm256_set_m128i(xout0, xout0);
|
||||
*k1 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout2);
|
||||
*k2 = _mm256_set_m128i(xout0, xout0);
|
||||
*k3 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout2);
|
||||
*k4 = _mm256_set_m128i(xout0, xout0);
|
||||
*k5 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout2);
|
||||
*k6 = _mm256_set_m128i(xout0, xout0);
|
||||
*k7 = _mm256_set_m128i(xout2, xout2);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout2);
|
||||
*k8 = _mm256_set_m128i(xout0, xout0);
|
||||
*k9 = _mm256_set_m128i(xout2, xout2);
|
||||
}
|
||||
|
||||
|
||||
static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
|
||||
{
|
||||
__m128i xout0 = _mm_load_si128(memory1);
|
||||
__m128i xout1 = _mm_load_si128(memory1 + 1);
|
||||
__m128i xout2 = _mm_load_si128(memory2);
|
||||
__m128i xout3 = _mm_load_si128(memory2 + 1);
|
||||
*k0 = _mm256_set_m128i(xout2, xout0);
|
||||
*k1 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x01>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x01>(&xout2, &xout3);
|
||||
*k2 = _mm256_set_m128i(xout2, xout0);
|
||||
*k3 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x02>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x02>(&xout2, &xout3);
|
||||
*k4 = _mm256_set_m128i(xout2, xout0);
|
||||
*k5 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x04>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x04>(&xout2, &xout3);
|
||||
*k6 = _mm256_set_m128i(xout2, xout0);
|
||||
*k7 = _mm256_set_m128i(xout3, xout1);
|
||||
|
||||
aes_genkey_sub<0x08>(&xout0, &xout1);
|
||||
aes_genkey_sub<0x08>(&xout2, &xout3);
|
||||
*k8 = _mm256_set_m128i(xout2, xout0);
|
||||
*k9 = _mm256_set_m128i(xout3, xout1);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
|
||||
{
|
||||
x01 = _mm256_aesenc_epi128(x01, key);
|
||||
x23 = _mm256_aesenc_epi128(x23, key);
|
||||
x45 = _mm256_aesenc_epi128(x45, key);
|
||||
x67 = _mm256_aesenc_epi128(x67, key);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
|
||||
{
|
||||
x0 = _mm256_aesenc_epi128(x0, key);
|
||||
x1 = _mm256_aesenc_epi128(x1, key);
|
||||
x2 = _mm256_aesenc_epi128(x2, key);
|
||||
x3 = _mm256_aesenc_epi128(x3, key);
|
||||
x4 = _mm256_aesenc_epi128(x4, key);
|
||||
x5 = _mm256_aesenc_epi128(x5, key);
|
||||
x6 = _mm256_aesenc_epi128(x6, key);
|
||||
x7 = _mm256_aesenc_epi128(x7, key);
|
||||
}
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xin01, xin23, xin45, xin67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
|
||||
|
||||
vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
if (half_mem && !ctx->first_half) {
|
||||
const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
|
||||
xin01 = _mm256_loadu_si256(p + 0);
|
||||
xin23 = _mm256_loadu_si256(p + 1);
|
||||
xin45 = _mm256_loadu_si256(p + 2);
|
||||
xin67 = _mm256_loadu_si256(p + 3);
|
||||
}
|
||||
else {
|
||||
xin01 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 4));
|
||||
xin23 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 6));
|
||||
xin45 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 8));
|
||||
xin67 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 10));
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m256i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m256i);
|
||||
|
||||
__m256i* e = output + N - prefetch_dist;
|
||||
__m256i* prefetch_ptr = output + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k1, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k2, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k3, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k4, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k5, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k6, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k7, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k8, xin01, xin23, xin45, xin67);
|
||||
vaes_round(k9, xin01, xin23, xin45, xin67);
|
||||
|
||||
_mm256_store_si256(output + 0, xin01);
|
||||
_mm256_store_si256(output + 1, xin23);
|
||||
|
||||
_mm256_store_si256(output + output_increment + 0, xin45);
|
||||
_mm256_store_si256(output + output_increment + 1, xin67);
|
||||
|
||||
output += output_increment * 2;
|
||||
prefetch_ptr += output_increment * 2;
|
||||
} while (output < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr = output;
|
||||
}
|
||||
|
||||
if (half_mem && ctx->first_half) {
|
||||
__m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
|
||||
_mm256_storeu_si256(p + 0, xin01);
|
||||
_mm256_storeu_si256(p + 1, xin23);
|
||||
_mm256_storeu_si256(p + 2, xin45);
|
||||
_mm256_storeu_si256(p + 3, xin67);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
|
||||
|
||||
vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
{
|
||||
const bool b = half_mem && !ctx1->first_half && !ctx2->first_half;
|
||||
const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
|
||||
const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
|
||||
xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
|
||||
xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
|
||||
xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
|
||||
xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
|
||||
xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
|
||||
xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
|
||||
xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
|
||||
xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
|
||||
}
|
||||
|
||||
constexpr int output_increment = 64 / sizeof(__m128i);
|
||||
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
|
||||
|
||||
__m128i* e = output1 + N - prefetch_dist;
|
||||
__m128i* prefetch_ptr1 = output1 + prefetch_dist;
|
||||
__m128i* prefetch_ptr2 = output2 + prefetch_dist;
|
||||
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
do {
|
||||
_mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
|
||||
|
||||
vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
|
||||
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
|
||||
_mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
|
||||
|
||||
output1 += output_increment * 2;
|
||||
prefetch_ptr1 += output_increment * 2;
|
||||
output2 += output_increment * 2;
|
||||
prefetch_ptr2 += output_increment * 2;
|
||||
} while (output1 < e);
|
||||
e += prefetch_dist;
|
||||
prefetch_ptr1 = output1;
|
||||
prefetch_ptr2 = output2;
|
||||
}
|
||||
|
||||
if (half_mem && ctx1->first_half && ctx2->first_half) {
|
||||
__m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
|
||||
__m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
|
||||
_mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
|
||||
_mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
|
||||
_mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
|
||||
_mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
|
||||
_mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
|
||||
_mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
|
||||
_mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
|
||||
_mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
|
||||
}
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xout01, xout23, xout45, xout67;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
|
||||
__m256i* output = reinterpret_cast<__m256i*>(ctx->state);
|
||||
|
||||
vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout01 = _mm256_loadu_si256(output + 2);
|
||||
xout23 = _mm256_loadu_si256(output + 3);
|
||||
xout45 = _mm256_loadu_si256(output + 4);
|
||||
xout67 = _mm256_loadu_si256(output + 5);
|
||||
|
||||
const __m256i* input_begin = input;
|
||||
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
|
||||
if (half_mem && (part == 1)) {
|
||||
input = input_begin;
|
||||
ctx->first_half = false;
|
||||
cn_explode_scratchpad_vaes(ctx, memory, half_mem);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout01 = _mm256_xor_si256(xout01, input[0]);
|
||||
xout23 = _mm256_xor_si256(xout23, input[1]);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m256i);
|
||||
|
||||
xout45 = _mm256_xor_si256(xout45, input[input_increment]);
|
||||
xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
|
||||
|
||||
input += input_increment * 2;
|
||||
i += 4;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k1, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k2, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k3, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k4, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k5, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k6, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k7, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k8, xout01, xout23, xout45, xout67);
|
||||
vaes_round(k9, xout01, xout23, xout45, xout67);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_storeu_si256(output + 2, xout01);
|
||||
_mm256_storeu_si256(output + 3, xout23);
|
||||
_mm256_storeu_si256(output + 4, xout45);
|
||||
_mm256_storeu_si256(output + 5, xout67);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
|
||||
{
|
||||
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
|
||||
|
||||
__m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
|
||||
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
|
||||
|
||||
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
|
||||
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
|
||||
|
||||
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
|
||||
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
|
||||
|
||||
vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
|
||||
|
||||
xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
|
||||
xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
|
||||
xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
|
||||
xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
|
||||
xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
|
||||
xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
|
||||
xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
|
||||
xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
|
||||
|
||||
const __m128i* input_begin1 = input1;
|
||||
const __m128i* input_begin2 = input2;
|
||||
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
|
||||
if (half_mem && (part == 1)) {
|
||||
input1 = input_begin1;
|
||||
input2 = input_begin2;
|
||||
ctx1->first_half = false;
|
||||
ctx2->first_half = false;
|
||||
cn_explode_scratchpad_vaes_double(ctx1, ctx2, memory, half_mem);
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < N;) {
|
||||
xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
|
||||
xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
|
||||
xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
|
||||
xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
|
||||
|
||||
constexpr int input_increment = 64 / sizeof(__m128i);
|
||||
|
||||
xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
|
||||
xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
|
||||
xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
|
||||
xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
|
||||
|
||||
input1 += input_increment * 2;
|
||||
input2 += input_increment * 2;
|
||||
i += 8;
|
||||
|
||||
if (i < N) {
|
||||
_mm_prefetch((const char*)(input1), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2), _MM_HINT_T0);
|
||||
_mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
|
||||
}
|
||||
|
||||
vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
|
||||
}
|
||||
}
|
||||
|
||||
_mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
|
||||
_mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
|
||||
_mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
|
||||
_mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
|
||||
_mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
|
||||
_mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
|
||||
_mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
|
||||
_mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
|
||||
|
||||
_mm256_zeroupper();
|
||||
}
|
||||
|
||||
|
||||
} // xmrig
|
||||
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
48
src/crypto/cn/CryptoNight_x86_vaes.h
Normal file
@@ -0,0 +1,48 @@
|
||||
/* XMRig
|
||||
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
|
||||
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
|
||||
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
|
||||
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
|
||||
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
|
||||
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
|
||||
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
|
||||
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
|
||||
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
|
||||
*
|
||||
* This program is free software: you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation, either version 3 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* This program is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with this program. If not, see <http://www.gnu.org/licenses/>.
|
||||
*/
|
||||
|
||||
#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
#define XMRIG_CRYPTONIGHT_X86_VAES_H
|
||||
|
||||
|
||||
#include "crypto/cn/CnAlgo.h"
|
||||
|
||||
|
||||
struct cryptonight_ctx;
|
||||
|
||||
|
||||
namespace xmrig {
|
||||
|
||||
|
||||
void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
|
||||
void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
|
||||
void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
|
||||
void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
|
||||
|
||||
|
||||
} // xmrig
|
||||
|
||||
|
||||
#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */
|
||||
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/cn1/cnv1_double_main_loop.inc
Normal file
@@ -0,0 +1,132 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movq xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movq xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movq xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rbp
|
||||
movq xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movq xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movq xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movq xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movq r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movq QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movq QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movq rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movq xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movq xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/cn1/cnv1_quad_main_loop.inc
Normal file
@@ -0,0 +1,263 @@
|
||||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movq xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movq xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movq xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movq xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movq xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movq xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movq xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movq xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movq xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movq xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movq xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movq xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movq QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movq rbx, xmm1
|
||||
movq QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movq rdi, xmm2
|
||||
movq QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movq QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movq rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movq xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movq rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movq xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movq rcx, xmm11
|
||||
movq rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movq xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movq rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movq rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movq xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/cn1/cnv1_single_main_loop.inc
Normal file
@@ -0,0 +1,66 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movq xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movq xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movq r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movq QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
||||
@@ -11,6 +11,9 @@
|
||||
# define FN_PREFIX(fn) fn
|
||||
.section .text
|
||||
#endif
|
||||
.global FN_PREFIX(cnv1_single_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv1_quad_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
|
||||
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
|
||||
@@ -19,6 +22,33 @@
|
||||
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
|
||||
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_single_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_single_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_double_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_double_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv1_quad_mainloop_asm):
|
||||
sub rsp, 48
|
||||
mov rcx, rdi
|
||||
#include "cn1/cnv1_quad_main_loop.inc"
|
||||
add rsp, 48
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
|
||||
sub rsp, 48
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
@@ -6,6 +9,27 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
|
||||
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
132
src/crypto/cn/asm/win64/cn1/cnv1_double_main_loop.inc
Normal file
@@ -0,0 +1,132 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 32
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov r8, QWORD PTR [rcx+8]
|
||||
mov r12d, 524288
|
||||
movaps XMMWORD PTR [rsp+16], xmm6
|
||||
mov rbx, QWORD PTR [rdx+32]
|
||||
xor rbx, QWORD PTR [rdx]
|
||||
mov rsi, QWORD PTR [rdx+40]
|
||||
mov r10, rbx
|
||||
xor rsi, QWORD PTR [rdx+8]
|
||||
and r10d, 2097136
|
||||
mov rdi, QWORD PTR [r8+32]
|
||||
xor rdi, QWORD PTR [r8]
|
||||
movd xmm3, rbx
|
||||
mov rbp, QWORD PTR [r8+40]
|
||||
mov r9, rdi
|
||||
xor rbp, QWORD PTR [r8+8]
|
||||
movd xmm0, rsi
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
and r9d, 2097136
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
movd xmm4, rdi
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov r14, QWORD PTR [rdx+224]
|
||||
mov r13, QWORD PTR [rdx+232]
|
||||
mov r15, QWORD PTR [r8+224]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rbp
|
||||
movd xmm5, rax
|
||||
punpcklqdq xmm4, xmm0
|
||||
mov rax, QWORD PTR [r8+48]
|
||||
movd xmm0, rcx
|
||||
xor rax, QWORD PTR [r8+16]
|
||||
mov rcx, QWORD PTR [r8+56]
|
||||
xor rcx, QWORD PTR [r8+24]
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
movd xmm6, rax
|
||||
punpcklqdq xmm5, xmm0
|
||||
mov rax, QWORD PTR [rdx+240]
|
||||
movd xmm0, rcx
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
punpcklqdq xmm6, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_double:
|
||||
aesenc xmm1, xmm3
|
||||
aesenc xmm2, xmm4
|
||||
movdqa xmm0, xmm1
|
||||
movd r11, xmm2
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm1
|
||||
movd QWORD PTR [r14+r10], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
movdqa xmm0, xmm2
|
||||
shr rax, 24
|
||||
pxor xmm0, xmm6
|
||||
movdqa xmm6, xmm2
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r14+r10+8], rax
|
||||
movd QWORD PTR [r15+r9], xmm0
|
||||
pextrq rcx, xmm0, 1
|
||||
mov eax, ecx
|
||||
shr rax, 24
|
||||
mov eax, DWORD PTR [r13+rax*4]
|
||||
xor rax, rcx
|
||||
movd rcx, xmm1
|
||||
mov QWORD PTR [r15+r9+8], rax
|
||||
mov r9, rcx
|
||||
and r9d, 2097136
|
||||
mov r10, QWORD PTR [r14+r9]
|
||||
mov r8, QWORD PTR [r14+r9+8]
|
||||
mov rax, r10
|
||||
mul rcx
|
||||
add rsi, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [r14+r9], rbx
|
||||
xor rax, rsi
|
||||
mov QWORD PTR [r14+r9+8], rax
|
||||
xor rsi, r8
|
||||
xor rbx, r10
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
mov r10, rbx
|
||||
and r10d, 2097136
|
||||
movd xmm3, rbx
|
||||
pinsrq xmm3, rsi, 1
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov rcx, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
movdqu xmm1, XMMWORD PTR [r14+r10]
|
||||
mul r11
|
||||
add rbp, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r15+r8], rdi
|
||||
xor rax, rbp
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
mov r9, rdi
|
||||
xor rbp, rcx
|
||||
and r9d, 2097136
|
||||
movd xmm4, rdi
|
||||
pinsrq xmm4, rbp, 1
|
||||
movdqu xmm2, XMMWORD PTR [r15+r9]
|
||||
sub r12, 1
|
||||
jne main_loop_cnv1_double
|
||||
|
||||
mov rbx, QWORD PTR [rsp+80]
|
||||
mov rbp, QWORD PTR [rsp+88]
|
||||
mov rsi, QWORD PTR [rsp+96]
|
||||
movaps xmm6, XMMWORD PTR [rsp+16]
|
||||
add rsp, 32
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
263
src/crypto/cn/asm/win64/cn1/cnv1_quad_main_loop.inc
Normal file
@@ -0,0 +1,263 @@
|
||||
mov rax, rsp
|
||||
mov QWORD PTR [rax+8], rbx
|
||||
mov QWORD PTR [rax+16], rbp
|
||||
mov QWORD PTR [rax+24], rsi
|
||||
push rdi
|
||||
push r12
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
sub rsp, 144
|
||||
mov r8, QWORD PTR [rcx]
|
||||
mov r9, QWORD PTR [rcx+8]
|
||||
mov r10, QWORD PTR [rcx+16]
|
||||
mov r11, QWORD PTR [rcx+24]
|
||||
mov rbp, QWORD PTR [r8+224]
|
||||
mov r13, QWORD PTR [r8+232]
|
||||
mov r14, QWORD PTR [r9+224]
|
||||
mov r15, QWORD PTR [r10+224]
|
||||
mov r12, QWORD PTR [r11+224]
|
||||
mov rcx, QWORD PTR [r8+40]
|
||||
xor rcx, QWORD PTR [r8+8]
|
||||
mov rbx, QWORD PTR [r8+32]
|
||||
xor rbx, QWORD PTR [r8]
|
||||
mov rdi, QWORD PTR [r9+32]
|
||||
xor rdi, QWORD PTR [r9]
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r9+40]
|
||||
xor rcx, QWORD PTR [r9+8]
|
||||
movd xmm1, rbx
|
||||
movaps XMMWORD PTR [rax-56], xmm6
|
||||
movaps XMMWORD PTR [rax-72], xmm7
|
||||
movaps XMMWORD PTR [rax-88], xmm8
|
||||
movaps XMMWORD PTR [rax-104], xmm9
|
||||
movaps XMMWORD PTR [rax-120], xmm10
|
||||
movaps XMMWORD PTR [rsp+48], xmm11
|
||||
movaps XMMWORD PTR [rsp+32], xmm12
|
||||
and ebx, 2097136
|
||||
mov rsi, QWORD PTR [r10+32]
|
||||
movd xmm2, rdi
|
||||
mov rax, QWORD PTR [r8+240]
|
||||
and edi, 2097136
|
||||
xor rsi, QWORD PTR [r10]
|
||||
mov rdx, QWORD PTR [r8+56]
|
||||
xor rdx, QWORD PTR [r8+24]
|
||||
mov QWORD PTR [rsp], rax
|
||||
mov rax, QWORD PTR [r9+240]
|
||||
movd xmm3, rsi
|
||||
mov QWORD PTR [rsp+8], rax
|
||||
and esi, 2097136
|
||||
mov rax, QWORD PTR [r10+240]
|
||||
punpcklqdq xmm1, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r10+40]
|
||||
xor rcx, QWORD PTR [r10+8]
|
||||
mov QWORD PTR [rsp+16], rax
|
||||
mov rax, QWORD PTR [r11+240]
|
||||
punpcklqdq xmm2, xmm0
|
||||
movd xmm0, rcx
|
||||
mov QWORD PTR [rsp+24], rax
|
||||
mov rcx, QWORD PTR [r11+40]
|
||||
xor rcx, QWORD PTR [r11+8]
|
||||
mov rax, QWORD PTR [r11+32]
|
||||
xor rax, QWORD PTR [r11]
|
||||
punpcklqdq xmm3, xmm0
|
||||
movd xmm0, rcx
|
||||
mov rcx, QWORD PTR [r8+48]
|
||||
xor rcx, QWORD PTR [r8+16]
|
||||
movd xmm4, rax
|
||||
and eax, 2097136
|
||||
punpcklqdq xmm4, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r9+56]
|
||||
xor rdx, QWORD PTR [r9+24]
|
||||
movd xmm5, rcx
|
||||
mov rcx, QWORD PTR [r9+48]
|
||||
xor rcx, QWORD PTR [r9+16]
|
||||
punpcklqdq xmm5, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r10+56]
|
||||
xor rdx, QWORD PTR [r10+24]
|
||||
movd xmm6, rcx
|
||||
mov rcx, QWORD PTR [r10+48]
|
||||
xor rcx, QWORD PTR [r10+16]
|
||||
punpcklqdq xmm6, xmm0
|
||||
movd xmm0, rdx
|
||||
mov rdx, QWORD PTR [r11+56]
|
||||
movd xmm7, rcx
|
||||
punpcklqdq xmm7, xmm0
|
||||
xor rdx, QWORD PTR [r11+24]
|
||||
mov rcx, QWORD PTR [r11+48]
|
||||
xor rcx, QWORD PTR [r11+16]
|
||||
mov r11d, 524288
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
movd xmm0, rdx
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
movd xmm8, rcx
|
||||
punpcklqdq xmm8, xmm0
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_quad:
|
||||
aesenc xmm9, xmm1
|
||||
aesenc xmm10, xmm2
|
||||
aesenc xmm11, xmm3
|
||||
aesenc xmm12, xmm4
|
||||
movd ecx, xmm9
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+rbp]
|
||||
movd ecx, xmm10
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r14]
|
||||
movd ecx, xmm11
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r15]
|
||||
movd ecx, xmm12
|
||||
and ecx, 2097136
|
||||
prefetcht0 BYTE PTR [rcx+r12]
|
||||
movdqa xmm0, xmm9
|
||||
pxor xmm0, xmm5
|
||||
movdqa xmm5, xmm9
|
||||
movd QWORD PTR [rbp+rbx], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm10
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm6
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rbp+rbx+8], rcx
|
||||
movd rbx, xmm1
|
||||
movd QWORD PTR [r14+rdi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm11
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm7
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r14+rdi+8], rcx
|
||||
movd rdi, xmm2
|
||||
movd QWORD PTR [r15+rsi], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
movdqa xmm0, xmm12
|
||||
shr rcx, 24
|
||||
pxor xmm0, xmm8
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r15+rsi+8], rcx
|
||||
movd QWORD PTR [r12+rax], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov ecx, edx
|
||||
shr rcx, 24
|
||||
mov ecx, DWORD PTR [r13+rcx*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [r12+rax+8], rcx
|
||||
movd rcx, xmm9
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
mov r9, QWORD PTR [rbp+r8]
|
||||
mov r10, QWORD PTR [rbp+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm1, 1
|
||||
add rcx, rax
|
||||
add rbx, rdx
|
||||
mov rax, QWORD PTR [rsp]
|
||||
mov QWORD PTR [rbp+r8], rbx
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [rbp+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rbx, r9
|
||||
movd xmm1, rbx
|
||||
and ebx, 2097136
|
||||
pinsrq xmm1, rcx, 1
|
||||
movd rcx, xmm10
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm9, XMMWORD PTR [rbp+rbx]
|
||||
mov r9, QWORD PTR [r14+r8]
|
||||
mov r10, QWORD PTR [r14+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm2, 1
|
||||
add rcx, rax
|
||||
add rdi, rdx
|
||||
mov rax, QWORD PTR [rsp+8]
|
||||
mov QWORD PTR [r14+r8], rdi
|
||||
xor rax, rcx
|
||||
xor rdi, r9
|
||||
mov QWORD PTR [r14+r8+8], rax
|
||||
xor rcx, r10
|
||||
movd xmm2, rdi
|
||||
and edi, 2097136
|
||||
pinsrq xmm2, rcx, 1
|
||||
movd rcx, xmm11
|
||||
movd rsi, xmm3
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqa xmm6, xmm10
|
||||
movdqa xmm7, xmm11
|
||||
movdqa xmm8, xmm12
|
||||
movdqu xmm10, XMMWORD PTR [r14+rdi]
|
||||
mov r9, QWORD PTR [r15+r8]
|
||||
mov r10, QWORD PTR [r15+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
pextrq rcx, xmm3, 1
|
||||
add rcx, rax
|
||||
add rsi, rdx
|
||||
mov rax, QWORD PTR [rsp+16]
|
||||
xor rax, rcx
|
||||
mov QWORD PTR [r15+r8], rsi
|
||||
mov QWORD PTR [r15+r8+8], rax
|
||||
xor rcx, r10
|
||||
xor rsi, r9
|
||||
movd xmm3, rsi
|
||||
and esi, 2097136
|
||||
pinsrq xmm3, rcx, 1
|
||||
movd rcx, xmm12
|
||||
mov r8, rcx
|
||||
and r8d, 2097136
|
||||
movdqu xmm11, XMMWORD PTR [r15+rsi]
|
||||
mov r9, QWORD PTR [r12+r8]
|
||||
mov r10, QWORD PTR [r12+r8+8]
|
||||
mov rax, r9
|
||||
mul rcx
|
||||
mov rcx, rax
|
||||
movd rax, xmm4
|
||||
add rax, rdx
|
||||
mov QWORD PTR [r12+r8], rax
|
||||
xor rax, r9
|
||||
pextrq rdx, xmm4, 1
|
||||
add rdx, rcx
|
||||
mov rcx, QWORD PTR [rsp+24]
|
||||
xor rcx, rdx
|
||||
xor rdx, r10
|
||||
movd xmm4, rax
|
||||
mov QWORD PTR [r12+r8+8], rcx
|
||||
and eax, 2097136
|
||||
pinsrq xmm4, rdx, 1
|
||||
movdqu xmm12, XMMWORD PTR [r12+rax]
|
||||
sub r11, 1
|
||||
jne main_loop_cnv1_quad
|
||||
|
||||
movaps xmm7, XMMWORD PTR [rsp+112]
|
||||
lea r11, QWORD PTR [rsp+144]
|
||||
mov rbx, QWORD PTR [r11+48]
|
||||
mov rbp, QWORD PTR [r11+56]
|
||||
mov rsi, QWORD PTR [r11+64]
|
||||
movaps xmm6, XMMWORD PTR [r11-16]
|
||||
movaps xmm8, XMMWORD PTR [r11-48]
|
||||
movaps xmm9, XMMWORD PTR [r11-64]
|
||||
movaps xmm10, XMMWORD PTR [r11-80]
|
||||
movaps xmm11, XMMWORD PTR [r11-96]
|
||||
movaps xmm12, XMMWORD PTR [r11-112]
|
||||
mov rsp, r11
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
pop r12
|
||||
pop rdi
|
||||
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
66
src/crypto/cn/asm/win64/cn1/cnv1_single_main_loop.inc
Normal file
@@ -0,0 +1,66 @@
|
||||
mov QWORD PTR [rsp+8], rbx
|
||||
mov QWORD PTR [rsp+16], rbp
|
||||
mov QWORD PTR [rsp+24], rsi
|
||||
mov QWORD PTR [rsp+32], rdi
|
||||
push r13
|
||||
push r14
|
||||
push r15
|
||||
mov rdx, QWORD PTR [rcx]
|
||||
mov esi, 524288
|
||||
mov r11, QWORD PTR [rdx+32]
|
||||
xor r11, QWORD PTR [rdx]
|
||||
mov rdi, QWORD PTR [rdx+224]
|
||||
mov rbx, QWORD PTR [rdx+40]
|
||||
xor rbx, QWORD PTR [rdx+8]
|
||||
mov rcx, QWORD PTR [rdx+56]
|
||||
xor rcx, QWORD PTR [rdx+24]
|
||||
mov rax, QWORD PTR [rdx+48]
|
||||
xor rax, QWORD PTR [rdx+16]
|
||||
mov rbp, QWORD PTR [rdx+240]
|
||||
mov r14, QWORD PTR [rdx+232]
|
||||
movd xmm2, rax
|
||||
pinsrq xmm2, rcx, 1
|
||||
|
||||
ALIGN(64)
|
||||
main_loop_cnv1_single:
|
||||
mov r8, r11
|
||||
and r8d, 2097136
|
||||
movdqu xmm1, XMMWORD PTR [rdi+r8]
|
||||
movd xmm0, r11
|
||||
pinsrq xmm0, rbx, 1
|
||||
aesenc xmm1, xmm0
|
||||
movd r15, xmm1
|
||||
mov r9, r15
|
||||
and r9d, 2097136
|
||||
movdqa xmm0, xmm1
|
||||
pxor xmm0, xmm2
|
||||
movdqa xmm2, xmm1
|
||||
movd QWORD PTR [rdi+r8], xmm0
|
||||
pextrq rdx, xmm0, 1
|
||||
mov eax, edx
|
||||
shr rax, 24
|
||||
mov ecx, DWORD PTR [r14+rax*4]
|
||||
xor rcx, rdx
|
||||
mov QWORD PTR [rdi+r8+8], rcx
|
||||
mov r10, QWORD PTR [rdi+r9]
|
||||
mov r8, QWORD PTR [rdi+r9+8]
|
||||
mov rax, r10
|
||||
mul r15
|
||||
add rbx, rax
|
||||
add r11, rdx
|
||||
mov QWORD PTR [rdi+r9], r11
|
||||
mov rax, rbx
|
||||
xor rbx, r8
|
||||
xor r11, r10
|
||||
xor rax, rbp
|
||||
mov QWORD PTR [rdi+r9+8], rax
|
||||
sub rsi, 1
|
||||
jne main_loop_cnv1_single
|
||||
|
||||
pop r15
|
||||
pop r14
|
||||
pop r13
|
||||
mov rbx, QWORD PTR [rsp+8]
|
||||
mov rbp, QWORD PTR [rsp+16]
|
||||
mov rsi, QWORD PTR [rsp+24]
|
||||
mov rdi, QWORD PTR [rsp+32]
|
||||
@@ -1,6 +1,9 @@
|
||||
#define ALIGN(x) .align 64
|
||||
.intel_syntax noprefix
|
||||
.section .text
|
||||
.global cnv1_single_mainloop_asm
|
||||
.global cnv1_double_mainloop_asm
|
||||
.global cnv1_quad_mainloop_asm
|
||||
.global cnv2_mainloop_ivybridge_asm
|
||||
.global cnv2_mainloop_ryzen_asm
|
||||
.global cnv2_mainloop_bulldozer_asm
|
||||
@@ -9,6 +12,24 @@
|
||||
.global cnv2_rwz_double_mainloop_asm
|
||||
.global cnv2_upx_double_mainloop_zen3_asm
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm:
|
||||
#include "../cn1/cnv1_single_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm:
|
||||
#include "../cn1/cnv1_double_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm:
|
||||
#include "../cn1/cnv1_quad_main_loop.inc"
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm:
|
||||
#include "../cn2/cnv2_main_loop_ivybridge.inc"
|
||||
|
||||
@@ -1,4 +1,7 @@
|
||||
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
|
||||
PUBLIC cnv1_single_mainloop_asm
|
||||
PUBLIC cnv1_double_mainloop_asm
|
||||
PUBLIC cnv1_quad_mainloop_asm
|
||||
PUBLIC cnv2_mainloop_ivybridge_asm
|
||||
PUBLIC cnv2_mainloop_ryzen_asm
|
||||
PUBLIC cnv2_mainloop_bulldozer_asm
|
||||
@@ -6,28 +9,49 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
|
||||
PUBLIC cnv2_rwz_mainloop_asm
|
||||
PUBLIC cnv2_rwz_double_mainloop_asm
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv1_single_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_single_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_single_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_double_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_double_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_double_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv1_quad_mainloop_asm PROC
|
||||
INCLUDE cn1/cnv1_quad_main_loop.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv1_quad_mainloop_asm ENDP
|
||||
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ivybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ivybridge_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_ryzen_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_ryzen.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_ryzen_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_mainloop_bulldozer_asm PROC
|
||||
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
|
||||
ret 0
|
||||
mov eax, 3735929054
|
||||
cnv2_mainloop_bulldozer_asm ENDP
|
||||
|
||||
ALIGN 64
|
||||
ALIGN(64)
|
||||
cnv2_double_mainloop_sandybridge_asm PROC
|
||||
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
|
||||
ret 0
|
||||
|
||||
@@ -42,14 +42,40 @@ set(SOURCES
|
||||
ghostrider.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
# gcc 11.2.0 crashes with -ftree-vrp
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vrp")
|
||||
|
||||
# gcc 11.2.0 creates incorrect code with -O3
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-O2")
|
||||
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Ofast -Wno-unused-const-variable")
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
|
||||
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Os -fno-tree-vrp")
|
||||
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Os -Wno-unused-const-variable")
|
||||
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "-Os")
|
||||
endif()
|
||||
|
||||
include_directories(.)
|
||||
|
||||
@@ -4,8 +4,6 @@
|
||||
|
||||
No tuning is required - auto-config works well on most CPUs!
|
||||
|
||||
**Note for Windows users: MSVC binary is ~5% faster than GCC binary!**
|
||||
|
||||
### Sample command line (non-SSL port)
|
||||
```
|
||||
xmrig -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS
|
||||
@@ -18,22 +16,24 @@ xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
|
||||
|
||||
You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
|
||||
|
||||
**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
|
||||
|
||||
## Performance
|
||||
|
||||
While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
|
||||
|
||||
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
|
||||
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.
|
||||
|
||||
**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
|
||||
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
|
||||
**Windows** (detailed results [here](https://imgur.com/a/0njIVVW))
|
||||
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.2 (MSVC build), h/s|Speedup
|
||||
-|-|-|-
|
||||
AMD Ryzen 7 4700U|632.6|731|+15.5%
|
||||
Intel Core i7-2600|496.4|533.6|+7.5%
|
||||
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
|
||||
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
|
||||
AMD Ryzen 7 4700U|632.6|733.1|+15.89%
|
||||
Intel Core i7-2600|496.4|554.6|+11.72%
|
||||
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2496.5|+1.77%
|
||||
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2337.5|+10.65%
|
||||
|
||||
**Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
|
||||
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup
|
||||
**Linux (outdated)** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
|
||||
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.0 (GCC build), h/s|Speedup
|
||||
-|-|-|-
|
||||
AMD Ryzen 9 3900X|3746.51|3604.89|-3.78%
|
||||
2xIntel Xeon E5-2698v3|2563.4|2638.38|+2.925%
|
||||
|
||||
@@ -36,6 +36,7 @@
|
||||
|
||||
#include "base/io/log/Log.h"
|
||||
#include "base/io/log/Tags.h"
|
||||
#include "base/tools/Chrono.h"
|
||||
#include "backend/cpu/Cpu.h"
|
||||
#include "crypto/cn/CnHash.h"
|
||||
#include "crypto/cn/CnCtx.h"
|
||||
@@ -44,7 +45,6 @@
|
||||
|
||||
#include <thread>
|
||||
#include <atomic>
|
||||
#include <chrono>
|
||||
#include <uv.h>
|
||||
|
||||
#ifdef XMRIG_FEATURE_HWLOC
|
||||
@@ -328,8 +328,6 @@ void benchmark()
|
||||
LOG_VERBOSE("%24s | N | Hashrate", "Algorithm");
|
||||
LOG_VERBOSE("-------------------------|-----|-------------");
|
||||
|
||||
using namespace std::chrono;
|
||||
|
||||
for (uint32_t algo = 0; algo < 6; ++algo) {
|
||||
for (uint64_t step : { 1, 2, 4}) {
|
||||
const size_t cur_scratchpad_size = cn_sizes[algo] * step;
|
||||
@@ -339,26 +337,26 @@ void benchmark()
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
|
||||
double start_time = Chrono::highResolutionMSecs();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
|
||||
double t1 = Chrono::highResolutionMSecs();
|
||||
|
||||
// Stop after 15 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 15)) {
|
||||
if ((iter >= 10) && (t1 - start_time >= 15.0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
|
||||
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
|
||||
const double dt = Chrono::highResolutionMSecs() - t1;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step / min_dt;
|
||||
const double hashrate = step * 1e3 / min_dt;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x1 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
@@ -388,14 +386,14 @@ void benchmark()
|
||||
|
||||
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
|
||||
|
||||
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
|
||||
double start_time = Chrono::highResolutionMSecs();
|
||||
|
||||
double min_dt = 1e10;
|
||||
for (uint32_t iter = 0;; ++iter) {
|
||||
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
|
||||
double t1 = Chrono::highResolutionMSecs();
|
||||
|
||||
// Stop after 30 milliseconds, but only if at least 10 iterations were done
|
||||
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 30)) {
|
||||
if ((iter >= 10) && (t1 - start_time >= 30.0)) {
|
||||
break;
|
||||
}
|
||||
|
||||
@@ -403,13 +401,13 @@ void benchmark()
|
||||
f(buf, sizeof(buf), hash, ctx, 0);
|
||||
helper->wait();
|
||||
|
||||
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
|
||||
const double dt = Chrono::highResolutionMSecs() - t1;
|
||||
if (dt < min_dt) {
|
||||
min_dt = dt;
|
||||
}
|
||||
}
|
||||
|
||||
const double hashrate = step * 2.0 / min_dt * 1.0075;
|
||||
const double hashrate = step * 2e3 / min_dt * 1.0075;
|
||||
LOG_VERBOSE("%24s | %" PRIu64 "x2 | %.2f h/s", cn_names[algo], step, hashrate);
|
||||
|
||||
if (hashrate > tune8MB[algo].hashrate) {
|
||||
@@ -538,7 +536,7 @@ void destroy_helper_thread(HelperThread* t)
|
||||
}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
|
||||
{
|
||||
enum { N = 8 };
|
||||
|
||||
@@ -554,6 +552,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
||||
uint32_t cn_indices[6];
|
||||
select_indices(cn_indices, data + 4);
|
||||
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
@@ -561,6 +560,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
const AlgoTune* tune = (helper && helper->m_is8MB) ? tune8MB : tuneDefault;
|
||||
@@ -765,7 +765,7 @@ HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { retur
|
||||
void destroy_helper_thread(HelperThread*) {}
|
||||
|
||||
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
|
||||
{
|
||||
constexpr uint32_t N = 8;
|
||||
|
||||
@@ -784,6 +784,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
||||
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
|
||||
#endif
|
||||
|
||||
if (verbose) {
|
||||
static uint32_t prev_indices[3];
|
||||
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
|
||||
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
|
||||
@@ -791,6 +792,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
|
||||
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
|
||||
|
||||
|
||||
@@ -41,7 +41,7 @@ struct HelperThread;
|
||||
void benchmark();
|
||||
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
|
||||
void destroy_helper_thread(HelperThread* t);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
|
||||
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);
|
||||
|
||||
|
||||
} // namespace ghostrider
|
||||
|
||||
@@ -382,7 +382,7 @@ void SelectSoftAESImpl(size_t threadsCount)
|
||||
double fast_speed = 0.0;
|
||||
for (size_t run = 0; run < 3; ++run) {
|
||||
for (size_t i = 0; i < impl.size(); ++i) {
|
||||
const uint64_t t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
const double t1 = xmrig::Chrono::highResolutionMSecs();
|
||||
std::vector<uint32_t> count(threadsCount, 0);
|
||||
std::vector<std::thread> threads;
|
||||
for (size_t t = 0; t < threadsCount; ++t) {
|
||||
@@ -401,7 +401,7 @@ void SelectSoftAESImpl(size_t threadsCount)
|
||||
threads[t].join();
|
||||
total += count[t];
|
||||
}
|
||||
const uint64_t t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
const double t2 = xmrig::Chrono::highResolutionMSecs();
|
||||
const double speed = total * 1e3 / (t2 - t1);
|
||||
if (speed > fast_speed) {
|
||||
fast_idx = i;
|
||||
|
||||
@@ -295,7 +295,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||
INST_HANDLE(IMUL_R, ISUB_M);
|
||||
INST_HANDLE(IMUL_M, IMUL_R);
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#if defined(XMRIG_FEATURE_ASM) && (defined(_M_X64) || defined(__x86_64__))
|
||||
if (hasBMI2) {
|
||||
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
|
||||
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
|
||||
@@ -337,7 +337,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
|
||||
INST_HANDLE(CBRANCH, FSQRT_R);
|
||||
#endif
|
||||
|
||||
#if defined(_M_X64) || defined(__x86_64__)
|
||||
#if defined(XMRIG_FEATURE_ASM) && (defined(_M_X64) || defined(__x86_64__))
|
||||
if (hasBMI2) {
|
||||
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
|
||||
}
|
||||
|
||||
@@ -22,7 +22,7 @@
|
||||
#define APP_ID "xmrig"
|
||||
#define APP_NAME "XMRig"
|
||||
#define APP_DESC "XMRig miner"
|
||||
#define APP_VERSION "6.16.0"
|
||||
#define APP_VERSION "6.16.2"
|
||||
#define APP_DOMAIN "xmrig.com"
|
||||
#define APP_SITE "www.xmrig.com"
|
||||
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
|
||||
@@ -30,7 +30,7 @@
|
||||
|
||||
#define APP_VER_MAJOR 6
|
||||
#define APP_VER_MINOR 16
|
||||
#define APP_VER_PATCH 0
|
||||
#define APP_VER_PATCH 2
|
||||
|
||||
#ifdef _MSC_VER
|
||||
# if (_MSC_VER >= 1920)
|
||||
|
||||
Reference in New Issue
Block a user