1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-10 17:12:46 -05:00

Compare commits

..

35 Commits

Author SHA1 Message Date
XMRig
718c7e0fc1 v6.16.2 2021-12-02 20:55:27 +07:00
XMRig
ef7951b91d Merge branch 'dev' 2021-12-02 20:54:50 +07:00
xmrig
214b1f021b Update CHANGELOG.md 2021-12-02 20:52:53 +07:00
XMRig
81b18c0741 #2771 Fixed environment variables support in EthStratumClient. 2021-12-02 19:36:51 +07:00
xmrig
8e83f72456 Merge pull request #2772 from SChernykh/dev
Compilation fixes
2021-12-02 19:36:09 +07:00
SChernykh
c2ae625032 Compilationn fixes 2021-12-02 13:34:24 +01:00
xmrig
60566dc84c Merge pull request #2769 from SChernykh/compiler_fix
Performance fixes
2021-12-02 10:00:29 +07:00
SChernykh
4ea8fe694d GhostRider benchmark: added 20 more possible rounds 2021-12-01 20:26:41 +01:00
SChernykh
669d1ab008 Updated changelog and GhostRider readme 2021-12-01 18:14:01 +01:00
SChernykh
e87d5111a2 Compiler fix 2021-12-01 17:08:40 +01:00
xmrig
56158779de Merge pull request #2761 from SChernykh/dev
Refactored Chrono::highResolutionMSecs()
2021-11-30 19:13:24 +07:00
SChernykh
efb322df66 Refactored Chrono::highResolutionMSecs()
Improved precision
2021-11-30 08:11:09 +01:00
xmrig
e673d541c1 Merge pull request #2751 from SChernykh/dev
VAES crash fixes
2021-11-30 09:49:37 +07:00
SChernykh
a98db529fb Explicitly use QueryPerformanceCounter() on Windows 2021-11-29 21:58:24 +01:00
SChernykh
1a9eaaad8f VAES crash fixes 2021-11-29 21:05:51 +01:00
XMRig
be5fbca9b6 v6.16.2-dev 2021-11-29 21:35:42 +07:00
XMRig
2feb264375 Merge branch 'master' into dev 2021-11-29 21:35:02 +07:00
XMRig
00990f2649 v6.16.1 2021-11-29 20:43:17 +07:00
XMRig
d78713be48 Merge branch 'dev' 2021-11-29 20:42:32 +07:00
XMRig
77367abe13 Fixed Clang build. 2021-11-29 16:01:16 +07:00
xmrig
cd046f6fd0 Merge pull request #2747 from SChernykh/dev
Disable VAES in 32-bit builds
2021-11-29 15:50:17 +07:00
SChernykh
63b7ec2887 Check compiler support for VAES 2021-11-29 09:48:15 +01:00
xmrig
a1e8f1c3e5 Merge pull request #2746 from Spudz76/dev-fixVAESCompile
Fix compile for VAES support with GCC<10
2021-11-29 15:38:35 +07:00
SChernykh
6db480a1ab Disable VAES in 32-bit builds 2021-11-29 09:32:00 +01:00
Tony Butler
a7acd9de6d Fix compile for VAES support with GCC<10 2021-11-28 22:11:42 -07:00
XMRig
a64f4d1870 v6.16.1-dev 2021-11-29 09:29:24 +07:00
XMRig
9bfe59b630 Merge branch 'master' into dev 2021-11-29 09:28:43 +07:00
xmrig
1a4bf16521 Merge pull request #2740 from SChernykh/dev
Added VAES support for Cryptonight variants
2021-11-29 09:26:45 +07:00
SChernykh
a4d5d0a75a Added VAES support for Cryptonight variants 2021-11-28 20:49:54 +01:00
xmrig
c40f1f9f66 Merge pull request #2738 from SChernykh/dev
More GhostRider fixes
2021-11-28 18:19:08 +07:00
SChernykh
15e5052dd0 More GhostRider fixes
- Fixed "difficulty is not a number" when diff is high on some pools
- Fixed GhostRider compilation when WITH_KAWPOW=OFF
2021-11-28 12:11:08 +01:00
xmrig
f9f7963453 Merge pull request #2734 from Spudz76/dev-nitpickWhitespace
Slash and burn EOL whitespace everywhere
2021-11-28 10:51:45 +07:00
Tony Butler
02240eff8c Slash and burn EOL whitespace everywhere 2021-11-27 17:59:40 -07:00
xmrig
d64c963e5e Merge pull request #2729 from SChernykh/dev
GhostRider hotfixes
2021-11-27 18:31:19 +07:00
SChernykh
c6292ce9ee GhostRider hotfixes
- Added average hashrate display
- Fixed the number of threads shown at startup
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use)
2021-11-27 12:27:26 +01:00
139 changed files with 3331 additions and 1324 deletions

View File

@@ -1,3 +1,25 @@
# v6.16.2
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
- Fixed several performance bottlenecks introduced in v6.16.1.
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
- **Linux builds are up to 10% faster now compared to v6.16.0 GCC build.**
- **Windows builds are up to 5% faster now compared to v6.16.0 MSVC build.**
# v6.16.1
- [#2729](https://github.com/xmrig/xmrig/pull/2729) GhostRider fixes:
- Added average hashrate display.
- Fixed the number of threads shown at startup.
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use).
- [#2738](https://github.com/xmrig/xmrig/pull/2738) GhostRider fixes:
- Fixed "difficulty is not a number" error when diff is high on some pools.
- Fixed GhostRider compilation when `WITH_KAWPOW=OFF`.
- [#2740](https://github.com/xmrig/xmrig/pull/2740) Added VAES support for Cryptonight variants **+4% speedup on Zen3**.
- VAES instructions are available on Intel Ice Lake/AMD Zen3 and newer CPUs.
- +4% speedup on Ryzen 5 5600X.
# v6.16.0
- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.

View File

@@ -28,6 +28,7 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
option(WITH_PROFILING "Enable profiling for developers" OFF)
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
@@ -133,6 +134,15 @@ if (CMAKE_C_COMPILER_ID MATCHES GNU)
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
endif()
if (WITH_VAES)
add_definitions(-DXMRIG_VAES)
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.h)
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.cpp)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/cn/CryptoNight_x86_vaes.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize -mavx2 -mvaes")
endif()
endif()
if (WITH_HWLOC)
list(APPEND HEADERS_CRYPTO
src/crypto/common/NUMAMemoryPool.h

View File

@@ -9,10 +9,23 @@ if (NOT CMAKE_SYSTEM_PROCESSOR)
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
endif()
include(CheckCXXCompilerFlag)
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(VAES_SUPPORTED ON)
else()
CHECK_CXX_COMPILER_FLAG("-mavx2 -mvaes" VAES_SUPPORTED)
endif()
if (NOT VAES_SUPPORTED)
set(WITH_VAES OFF)
endif()
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
add_definitions(-DRAPIDJSON_SSE2)
else()
set(WITH_SSE4_1 OFF)
set(WITH_VAES OFF)
endif()
if (NOT ARM_TARGET)
@@ -29,8 +42,6 @@ if (ARM_TARGET AND ARM_TARGET GREATER 6)
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
include(CheckCXXCompilerFlag)
if (ARM_TARGET EQUAL 8)
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)

View File

@@ -53,6 +53,9 @@ xmrig::Hashrate::Hashrate(size_t threads) :
m_timestamps[i] = new uint64_t[kBucketSize]();
m_top[i] = 0;
}
m_earliestTimestamp = std::numeric_limits<uint64_t>::max();
m_totalCount = 0;
}
@@ -66,6 +69,14 @@ xmrig::Hashrate::~Hashrate()
delete [] m_counts;
delete [] m_timestamps;
delete [] m_top;
}
double xmrig::Hashrate::average() const
{
const uint64_t ts = Chrono::steadyMSecs();
return (ts > m_earliestTimestamp) ? (m_totalCount * 1e3 / (ts - m_earliestTimestamp)) : 0.0;
}
@@ -167,4 +178,11 @@ void xmrig::Hashrate::addData(size_t index, uint64_t count, uint64_t timestamp)
m_timestamps[index][top] = timestamp;
m_top[index] = (top + 1) & kBucketMask;
if (index == 0) {
if (m_earliestTimestamp == std::numeric_limits<uint64_t>::max()) {
m_earliestTimestamp = timestamp;
}
m_totalCount = count;
}
}

View File

@@ -53,6 +53,8 @@ public:
inline void add(size_t threadId, uint64_t count, uint64_t timestamp) { addData(threadId + 1U, count, timestamp); }
inline void add(uint64_t count, uint64_t timestamp) { addData(0U, count, timestamp); }
double average() const;
static const char *format(double h, char *buf, size_t size);
static rapidjson::Value normalize(double d);
@@ -72,6 +74,9 @@ private:
uint32_t* m_top;
uint64_t** m_counts;
uint64_t** m_timestamps;
uint64_t m_earliestTimestamp;
uint64_t m_totalCount;
};

View File

@@ -31,6 +31,8 @@ class Worker : public IWorker
public:
Worker(size_t id, int64_t affinity, int priority);
size_t threads() const override { return 1; }
protected:
inline int64_t affinity() const { return m_affinity; }
inline size_t id() const override { return m_id; }

View File

@@ -46,6 +46,7 @@ public:
virtual const VirtualMemory *memory() const = 0;
virtual size_t id() const = 0;
virtual size_t intensity() const = 0;
virtual size_t threads() const = 0;
virtual void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const = 0;
virtual void jobEarlyNotification(const Job &job) = 0;
virtual void start() = 0;

View File

@@ -88,6 +88,7 @@ public:
{
if (ready) {
m_started++;
m_totalStarted += worker->threads();
if (m_workersMemory.insert(worker->memory()).second) {
m_hugePages += worker->memory()->hugePages();
@@ -112,7 +113,7 @@ public:
LOG_INFO("%s" GREEN_BOLD(" READY") " threads %s%zu/%zu (%zu)" CLEAR " huge pages %s%1.0f%% %zu/%zu" CLEAR " memory " CYAN_BOLD("%zu KB") BLACK_BOLD(" (%" PRIu64 " ms)"),
Tags::cpu(),
m_errors == 0 ? CYAN_BOLD_S : YELLOW_BOLD_S,
m_started, m_threads, m_ways,
m_totalStarted, std::max(m_totalStarted, m_threads), m_ways,
(m_hugePages.isFullyAllocated() ? GREEN_BOLD_S : (m_hugePages.allocated == 0 ? RED_BOLD_S : YELLOW_BOLD_S)),
m_hugePages.percent(),
m_hugePages.allocated, m_hugePages.total,
@@ -127,6 +128,7 @@ private:
size_t m_errors = 0;
size_t m_memory = 0;
size_t m_started = 0;
size_t m_totalStarted = 0;
size_t m_threads = 0;
size_t m_ways = 0;
uint64_t m_ts = 0;

View File

@@ -44,7 +44,7 @@ xmrig::CpuLaunchData::CpuLaunchData(const Miner *miner, const Algorithm &algorit
affinity(thread.affinity()),
miner(miner),
threads(threads),
intensity(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity())),
intensity(std::max<uint32_t>(std::min<uint32_t>(thread.intensity(), algorithm.maxIntensity()), algorithm.minIntensity())),
affinities(affinities)
{
}

View File

@@ -161,14 +161,14 @@ bool xmrig::CpuWorker<N>::selfTest()
}
# endif
allocateCnCtx();
# ifdef XMRIG_ALGO_GHOSTRIDER
if (m_algorithm.family() == Algorithm::GHOSTRIDER) {
return N == 8;
return (N == 8) && verify(Algorithm::GHOSTRIDER_RTM, test_output_gr);
}
# endif
allocateCnCtx();
if (m_algorithm.family() == Algorithm::CN) {
const bool rc = verify(Algorithm::CN_0, test_output_v0) &&
verify(Algorithm::CN_1, test_output_v1) &&
@@ -397,6 +397,37 @@ bool xmrig::CpuWorker<N>::nextRound()
template<size_t N>
bool xmrig::CpuWorker<N>::verify(const Algorithm &algorithm, const uint8_t *referenceValue)
{
# ifdef XMRIG_ALGO_GHOSTRIDER
if (algorithm == Algorithm::GHOSTRIDER_RTM) {
uint8_t blob[N * 80] = {};
for (size_t i = 0; i < N; ++i) {
blob[i * 80 + 0] = static_cast<uint8_t>(i);
blob[i * 80 + 4] = 0x10;
blob[i * 80 + 5] = 0x02;
}
uint8_t hash1[N * 32] = {};
ghostrider::hash_octa(blob, 80, hash1, m_ctx, 0, false);
for (size_t i = 0; i < N; ++i) {
blob[i * 80 + 0] = static_cast<uint8_t>(i);
blob[i * 80 + 4] = 0x43;
blob[i * 80 + 5] = 0x05;
}
uint8_t hash2[N * 32] = {};
ghostrider::hash_octa(blob, 80, hash2, m_ctx, 0, false);
for (size_t i = 0; i < N * 32; ++i) {
if ((hash1[i] ^ hash2[i]) != referenceValue[i]) {
return false;
}
}
return true;
}
# endif
cn_hash_fun func = fn(algorithm);
if (!func) {
return false;

View File

@@ -52,6 +52,15 @@ public:
CpuWorker(size_t id, const CpuLaunchData &data);
~CpuWorker() override;
size_t threads() const override
{
# ifdef XMRIG_ALGO_GHOSTRIDER
return ((m_algorithm.family() == Algorithm::GHOSTRIDER) && m_ghHelper) ? 2 : 1;
# else
return 1;
# endif
}
protected:
bool selfTest() override;
void hashrateData(uint64_t &hashCount, uint64_t &timeStamp, uint64_t &rawHashes) const override;

View File

@@ -61,6 +61,7 @@ public:
enum Flag : uint32_t {
FLAG_AES,
FLAG_VAES,
FLAG_AVX,
FLAG_AVX2,
FLAG_AVX512F,
@@ -90,6 +91,7 @@ public:
virtual Assembly::Id assembly() const = 0;
virtual bool has(Flag feature) const = 0;
virtual bool hasAES() const = 0;
virtual bool hasVAES() const = 0;
virtual bool hasAVX() const = 0;
virtual bool hasAVX2() const = 0;
virtual bool hasBMI2() const = 0;

View File

@@ -30,6 +30,12 @@
#endif
#include "crypto/cn/CryptoNight_monero.h"
#ifdef XMRIG_VAES
# include "crypto/cn/CryptoNight_x86_vaes.h"
#endif
#include "backend/cpu/platform/BasicCpuInfo.h"
#include "3rdparty/rapidjson/document.h"
#include "crypto/common/Assembly.h"
@@ -52,8 +58,8 @@
namespace xmrig {
constexpr size_t kCpuFlagsSize = 14;
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
constexpr size_t kCpuFlagsSize = 15;
static const std::array<const char *, kCpuFlagsSize> flagNames = { "aes", "vaes", "avx", "avx2", "avx512f", "bmi2", "osxsave", "pdpe1gb", "sse2", "ssse3", "sse4.1", "xop", "popcnt", "cat_l3", "vm" };
static_assert(kCpuFlagsSize == ICpuInfo::FLAG_MAX, "kCpuFlagsSize and FLAG_MAX mismatch");
@@ -140,6 +146,7 @@ static inline bool has_osxsave() { return has_feature(PROCESSOR_INFO,
static inline bool has_aes_ni() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 25); }
static inline bool has_avx() { return has_feature(PROCESSOR_INFO, ECX_Reg, 1 << 28) && has_osxsave() && has_xcr_avx(); }
static inline bool has_avx2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 5) && has_osxsave() && has_xcr_avx(); }
static inline bool has_vaes() { return has_feature(EXTENDED_FEATURES, ECX_Reg, 1 << 9) && has_osxsave() && has_xcr_avx(); }
static inline bool has_avx512f() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 16) && has_osxsave() && has_xcr_avx512(); }
static inline bool has_bmi2() { return has_feature(EXTENDED_FEATURES, EBX_Reg, 1 << 8); }
static inline bool has_pdpe1gb() { return has_feature(PROCESSOR_EXT_INFO, EDX_Reg, 1 << 26); }
@@ -178,6 +185,7 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
m_flags.set(FLAG_AES, has_aes_ni());
m_flags.set(FLAG_AVX, has_avx());
m_flags.set(FLAG_AVX2, has_avx2());
m_flags.set(FLAG_VAES, has_vaes());
m_flags.set(FLAG_AVX512F, has_avx512f());
m_flags.set(FLAG_BMI2, has_bmi2());
m_flags.set(FLAG_OSXSAVE, has_osxsave());
@@ -292,6 +300,9 @@ xmrig::BasicCpuInfo::BasicCpuInfo() :
}
}
# endif
cn_sse41_enabled = has(FLAG_SSE41);
cn_vaes_enabled = has(FLAG_VAES);
}

View File

@@ -44,6 +44,7 @@ protected:
inline Assembly::Id assembly() const override { return m_assembly; }
inline bool has(Flag flag) const override { return m_flags.test(flag); }
inline bool hasAES() const override { return has(FLAG_AES); }
inline bool hasVAES() const override { return has(FLAG_VAES); }
inline bool hasAVX() const override { return has(FLAG_AVX); }
inline bool hasAVX2() const override { return has(FLAG_AVX2); }
inline bool hasBMI2() const override { return has(FLAG_BMI2); }

View File

@@ -2,7 +2,7 @@
namespace xmrig {
static const char astrobwt_cl[12493] = {
static const char astrobwt_cl[12489] = {
0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x63,0x68,0x61,0x72,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,
0x73,0x68,0x6f,0x72,0x74,0x20,0x75,0x69,0x6e,0x74,0x31,0x36,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x69,0x6e,0x74,0x20,0x75,0x69,0x6e,
0x74,0x33,0x32,0x5f,0x74,0x3b,0x0a,0x74,0x79,0x70,0x65,0x64,0x65,0x66,0x20,0x75,0x6c,0x6f,0x6e,0x67,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x3b,0x0a,0x74,
@@ -260,140 +260,140 @@ static const char astrobwt_cl[12493] = {
0x6f,0x66,0x66,0x73,0x65,0x74,0x2b,0x6f,0x75,0x74,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x29,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x33,
0x32,0x5f,0x74,0x29,0x5d,0x20,0x26,0x3d,0x20,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x3e,0x3e,0x28,0x28,0x34,0x2d,0x28,0x6f,0x75,0x74,0x70,0x75,
0x74,0x5f,0x73,0x69,0x7a,0x65,0x26,0x33,0x29,0x29,0x3c,0x3c,0x33,0x29,0x3b,0x0a,0x7d,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,
0x20,0x32,0x34,0x20,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,
0x20,0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x38,0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
0x38,0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x30,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x38,0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,
0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,
0x20,0x32,0x34,0x0a,0x23,0x64,0x65,0x66,0x69,0x6e,0x65,0x20,0x52,0x36,0x34,0x28,0x61,0x2c,0x62,0x2c,0x63,0x29,0x20,0x28,0x28,0x28,0x61,0x29,0x20,0x3c,0x3c,0x20,
0x62,0x29,0x20,0x7c,0x20,0x28,0x28,0x61,0x29,0x20,0x3e,0x3e,0x20,0x63,0x29,0x29,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x72,0x63,0x5b,0x32,0x5d,0x5b,0x52,0x4f,0x55,0x4e,0x44,0x53,0x5d,0x3d,0x7b,0x0a,0x7b,0x30,0x78,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
0x30,0x38,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x38,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,
0x30,0x30,0x39,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x41,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x30,0x38,0x42,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x39,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x33,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,
0x30,0x30,0x32,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
0x30,0x30,0x41,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x38,0x31,0x55,0x4c,0x2c,0x0a,0x30,0x78,0x38,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x38,0x30,0x55,0x4c,0x2c,0x30,0x78,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x30,
0x30,0x30,0x31,0x55,0x4c,0x2c,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x38,0x30,0x30,0x30,0x38,0x30,0x30,0x38,0x55,0x4c,0x7d,0x2c,0x0a,0x7b,0x30,0x55,
0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,0x30,
0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x0a,
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,
0x0a,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,
0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,
0x5b,0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,
0x31,0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,
0x32,0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,
0x35,0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,
0x32,0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,
0x36,0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,
0x31,0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
0x74,0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,
0x31,0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,
0x36,0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,
0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,
0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,
0x20,0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,
0x32,0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,
0x37,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,
0x39,0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,
0x7b,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,
0x2c,0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,
0x32,0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,
0x32,0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,
0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,
0x31,0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,
0x38,0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,
0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,
0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,
0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,
0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,
0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,
0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,
0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,
0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,
0x34,0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,
0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,
0x5d,0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,
0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,
0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,
0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,
0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,
0x5f,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,
0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,
0x64,0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,
0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,
0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,
0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,
0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,
0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,
0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,
0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,
0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,
0x3d,0x30,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,
0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,
0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,
0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,
0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,
0x65,0x2a,0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,
0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,
0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,
0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,
0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,
0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,
0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,
0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,
0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,
0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,
0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,
0x69,0x6e,0x69,0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,
0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,
0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,
0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,
0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,
0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,
0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,
0x3d,0x28,0x74,0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
0x33,0x32,0x5f,0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
0x74,0x2a,0x29,0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,
0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,
0x65,0x26,0x30,0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,
0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,
0x3b,0x0a,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,
0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,
0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,
0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,
0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,
0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,
0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,
0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,
0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,
0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,
0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,
0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,
0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x20,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,
0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,
0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x2c,0x30,0x55,0x4c,0x7d,
0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x72,0x6f,0x5b,0x32,0x35,0x5d,0x5b,
0x32,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x36,0x34,0x7d,0x2c,0x7b,0x34,0x34,0x2c,0x32,0x30,0x7d,0x2c,0x7b,0x34,0x33,0x2c,0x32,0x31,0x7d,0x2c,0x7b,0x32,0x31,
0x2c,0x34,0x33,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x35,0x30,0x7d,0x2c,0x0a,0x7b,0x20,0x31,0x2c,0x36,0x33,0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x35,0x38,0x7d,0x2c,0x7b,0x32,
0x35,0x2c,0x33,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x35,0x36,0x7d,0x2c,0x7b,0x31,0x38,0x2c,0x34,0x36,0x7d,0x2c,0x0a,0x7b,0x36,0x32,0x2c,0x32,0x7d,0x2c,0x7b,0x35,
0x35,0x2c,0x39,0x7d,0x2c,0x7b,0x33,0x39,0x2c,0x32,0x35,0x7d,0x2c,0x7b,0x34,0x31,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x20,0x32,0x2c,0x36,0x32,0x7d,0x2c,0x0a,0x7b,0x32,
0x38,0x2c,0x33,0x36,0x7d,0x2c,0x7b,0x32,0x30,0x2c,0x34,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x36,0x31,0x7d,0x2c,0x7b,0x34,0x35,0x2c,0x31,0x39,0x7d,0x2c,0x7b,0x36,
0x31,0x2c,0x33,0x7d,0x2c,0x0a,0x7b,0x32,0x37,0x2c,0x33,0x37,0x7d,0x2c,0x7b,0x33,0x36,0x2c,0x32,0x38,0x7d,0x2c,0x7b,0x31,0x30,0x2c,0x35,0x34,0x7d,0x2c,0x7b,0x31,
0x35,0x2c,0x34,0x39,0x7d,0x2c,0x7b,0x35,0x36,0x2c,0x38,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,
0x20,0x69,0x6e,0x74,0x20,0x61,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x36,0x2c,0x31,0x32,0x2c,0x31,0x38,0x2c,0x32,0x34,0x2c,0x0a,0x31,0x2c,0x37,0x2c,0x31,
0x33,0x2c,0x31,0x39,0x2c,0x32,0x30,0x2c,0x0a,0x32,0x2c,0x38,0x2c,0x31,0x34,0x2c,0x31,0x35,0x2c,0x32,0x31,0x2c,0x0a,0x33,0x2c,0x39,0x2c,0x31,0x30,0x2c,0x31,0x36,
0x2c,0x32,0x32,0x2c,0x0a,0x34,0x2c,0x35,0x2c,0x31,0x31,0x2c,0x31,0x37,0x2c,0x32,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,0x62,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x2c,0x32,
0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x0a,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x0a,0x33,0x2c,0x34,0x2c,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x0a,0x34,0x2c,
0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x69,0x6e,0x74,0x20,
0x63,0x5b,0x32,0x35,0x5d,0x5b,0x33,0x5d,0x3d,0x7b,0x0a,0x7b,0x20,0x30,0x2c,0x31,0x2c,0x32,0x7d,0x2c,0x7b,0x20,0x31,0x2c,0x32,0x2c,0x33,0x7d,0x2c,0x7b,0x20,0x32,
0x2c,0x33,0x2c,0x34,0x7d,0x2c,0x7b,0x20,0x33,0x2c,0x34,0x2c,0x30,0x7d,0x2c,0x7b,0x20,0x34,0x2c,0x30,0x2c,0x31,0x7d,0x2c,0x0a,0x7b,0x20,0x35,0x2c,0x36,0x2c,0x37,
0x7d,0x2c,0x7b,0x20,0x36,0x2c,0x37,0x2c,0x38,0x7d,0x2c,0x7b,0x20,0x37,0x2c,0x38,0x2c,0x39,0x7d,0x2c,0x7b,0x20,0x38,0x2c,0x39,0x2c,0x35,0x7d,0x2c,0x7b,0x20,0x39,
0x2c,0x35,0x2c,0x36,0x7d,0x2c,0x0a,0x7b,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,0x32,0x7d,0x2c,0x7b,0x31,0x31,0x2c,0x31,0x32,0x2c,0x31,0x33,0x7d,0x2c,0x7b,0x31,0x32,
0x2c,0x31,0x33,0x2c,0x31,0x34,0x7d,0x2c,0x7b,0x31,0x33,0x2c,0x31,0x34,0x2c,0x31,0x30,0x7d,0x2c,0x7b,0x31,0x34,0x2c,0x31,0x30,0x2c,0x31,0x31,0x7d,0x2c,0x0a,0x7b,
0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x7d,0x2c,0x7b,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x7d,0x2c,0x7b,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x7d,0x2c,
0x7b,0x31,0x38,0x2c,0x31,0x39,0x2c,0x31,0x35,0x7d,0x2c,0x7b,0x31,0x39,0x2c,0x31,0x35,0x2c,0x31,0x36,0x7d,0x2c,0x0a,0x7b,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,
0x7d,0x2c,0x7b,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x7d,0x2c,0x7b,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x7d,0x2c,0x7b,0x32,0x33,0x2c,0x32,0x34,0x2c,0x32,
0x30,0x7d,0x2c,0x7b,0x32,0x34,0x2c,0x32,0x30,0x2c,0x32,0x31,0x7d,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x63,0x6f,0x6e,0x73,0x74,0x61,0x6e,0x74,0x20,0x63,0x6f,0x6e,0x73,
0x74,0x20,0x69,0x6e,0x74,0x20,0x64,0x5b,0x32,0x35,0x5d,0x3d,0x7b,0x0a,0x30,0x2c,0x31,0x2c,0x32,0x2c,0x33,0x2c,0x34,0x2c,0x0a,0x31,0x30,0x2c,0x31,0x31,0x2c,0x31,
0x32,0x2c,0x31,0x33,0x2c,0x31,0x34,0x2c,0x0a,0x32,0x30,0x2c,0x32,0x31,0x2c,0x32,0x32,0x2c,0x32,0x33,0x2c,0x32,0x34,0x2c,0x0a,0x35,0x2c,0x36,0x2c,0x37,0x2c,0x38,
0x2c,0x39,0x2c,0x0a,0x31,0x35,0x2c,0x31,0x36,0x2c,0x31,0x37,0x2c,0x31,0x38,0x2c,0x31,0x39,0x0a,0x7d,0x3b,0x0a,0x5f,0x5f,0x61,0x74,0x74,0x72,0x69,0x62,0x75,0x74,
0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,0x33,0x32,0x2c,0x31,0x2c,0x31,
0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x73,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,
0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x2c,0x75,0x69,0x6e,0x74,0x33,
0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,
0x5f,0x74,0x2a,0x20,0x68,0x61,0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,
0x65,0x74,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,
0x3d,0x67,0x65,0x74,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,
0x72,0x6e,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x63,0x6f,0x6e,0x73,
0x74,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x3d,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,
0x5f,0x74,0x29,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x74,0x72,0x69,0x64,0x65,0x29,0x2a,0x67,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,
0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,
0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x73,0x2b,0x69,0x6e,0x70,0x75,0x74,0x5f,0x6f,0x66,0x66,0x73,0x65,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x73,0x5b,0x67,0x5d,
0x2b,0x31,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,
0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x30,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,
0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x73,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,
0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,
0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x75,0x69,
0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x77,0x6f,0x72,0x64,0x73,0x3b,0x20,0x2b,0x2b,0x69,0x2c,0x2b,0x2b,0x69,0x6e,0x70,0x75,0x74,0x29,0x0a,0x7b,0x0a,0x41,
0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x2a,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x2b,0x2b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,
0x65,0x78,0x3b,0x0a,0x69,0x66,0x28,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x3d,0x31,0x37,0x29,0x0a,0x7b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,
0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,
0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,
0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,
0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,
0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,
0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,
0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x30,
0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x74,0x61,0x69,0x6c,0x3d,0x30,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,
0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x70,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,
0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x29,0x69,0x6e,0x70,0x75,0x74,0x3b,0x0a,0x66,0x6f,0x72,0x20,0x28,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x74,0x61,0x69,0x6c,0x7c,0x3d,0x28,0x75,
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x70,0x5b,0x69,0x5d,0x29,0x3c,0x3c,0x28,0x69,0x2a,0x38,0x29,0x3b,0x0a,0x7d,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,
0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x74,0x61,0x69,0x6c,0x5e,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,
0x38,0x29,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x31,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,
0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,
0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,
0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,
0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,
0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,
0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,
0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,
0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x5f,0x5f,0x61,0x74,
0x74,0x72,0x69,0x62,0x75,0x74,0x65,0x5f,0x5f,0x28,0x28,0x72,0x65,0x71,0x64,0x5f,0x77,0x6f,0x72,0x6b,0x5f,0x67,0x72,0x6f,0x75,0x70,0x5f,0x73,0x69,0x7a,0x65,0x28,
0x33,0x32,0x2c,0x31,0x2c,0x31,0x29,0x29,0x29,0x0a,0x5f,0x5f,0x6b,0x65,0x72,0x6e,0x65,0x6c,0x20,0x76,0x6f,0x69,0x64,0x20,0x73,0x68,0x61,0x33,0x5f,0x69,0x6e,0x69,
0x74,0x69,0x61,0x6c,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x38,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,
0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x2c,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2c,0x75,0x69,0x6e,0x74,
0x33,0x32,0x5f,0x74,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x2c,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x68,0x61,
0x73,0x68,0x65,0x73,0x29,0x0a,0x7b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x3d,0x67,0x65,0x74,0x5f,0x6c,0x6f,0x63,
0x61,0x6c,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x67,0x3d,0x67,0x65,0x74,0x5f,0x67,
0x72,0x6f,0x75,0x70,0x5f,0x69,0x64,0x28,0x30,0x29,0x3b,0x0a,0x69,0x66,0x28,0x74,0x3e,0x3d,0x32,0x35,0x29,0x0a,0x72,0x65,0x74,0x75,0x72,0x6e,0x3b,0x0a,0x63,0x6f,
0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x73,0x3d,0x74,0x20,0x25,0x20,0x35,0x3b,0x0a,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,
0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x2a,0x20,0x69,0x6e,0x70,0x75,0x74,0x3d,0x28,0x5f,0x5f,0x67,0x6c,0x6f,0x62,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
0x74,0x2a,0x29,0x28,0x69,0x6e,0x70,0x75,0x74,0x5f,0x64,0x61,0x74,0x61,0x29,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,
0x74,0x20,0x41,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x43,0x5b,0x32,0x35,0x5d,0x3b,
0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x20,0x44,0x5b,0x32,0x35,0x5d,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x3d,0x28,0x74,
0x3c,0x31,0x36,0x29,0x3f,0x69,0x6e,0x70,0x75,0x74,0x5b,0x74,0x5d,0x3a,0x30,0x3b,0x0a,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,
0x74,0x2a,0x20,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x3d,0x28,0x5f,0x5f,0x6c,0x6f,0x63,0x61,0x6c,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x2a,0x29,
0x28,0x41,0x29,0x2b,0x39,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x2b,0x3d,0x67,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x3d,0x28,0x6e,
0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x30,0x5d,0x26,0x30,0x78,0x46,0x46,0x46,0x46,0x46,0x46,0x55,0x29,0x7c,0x28,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x26,0x30,
0x78,0x46,0x46,0x29,0x3c,0x3c,0x32,0x34,0x29,0x3b,0x0a,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,0x6f,0x73,0x5b,0x31,0x5d,0x3d,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x5f,0x70,
0x6f,0x73,0x5b,0x31,0x5d,0x26,0x30,0x78,0x46,0x46,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x29,0x7c,0x28,0x6e,0x6f,0x6e,0x63,0x65,0x3e,0x3e,0x38,0x29,0x3b,0x0a,0x75,
0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x2f,0x73,0x69,0x7a,
0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x63,0x6f,0x6e,0x73,0x74,0x20,0x75,0x69,0x6e,0x74,0x33,0x32,0x5f,0x74,0x20,0x74,0x61,
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x3d,0x69,0x6e,0x70,0x75,0x74,0x5f,0x73,0x69,0x7a,0x65,0x20,0x25,0x20,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,
0x36,0x34,0x5f,0x74,0x29,0x3b,0x0a,0x41,0x5b,0x77,0x6f,0x72,0x64,0x49,0x6e,0x64,0x65,0x78,0x5d,0x20,0x5e,0x3d,0x20,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,
0x29,0x28,0x28,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x28,0x30,0x78,0x30,0x32,0x7c,0x28,0x31,0x3c,0x3c,0x32,0x29,0x29,0x29,0x3c,0x3c,0x28,0x74,0x61,
0x69,0x6c,0x5f,0x73,0x69,0x7a,0x65,0x2a,0x38,0x29,0x29,0x3b,0x0a,0x41,0x5b,0x31,0x36,0x5d,0x20,0x5e,0x3d,0x20,0x30,0x78,0x38,0x30,0x30,0x30,0x30,0x30,0x30,0x30,
0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x30,0x55,0x4c,0x3b,0x0a,0x23,0x70,0x72,0x61,0x67,0x6d,0x61,0x20,0x75,0x6e,0x72,0x6f,0x6c,0x6c,0x20,0x52,0x4f,0x55,0x4e,0x44,
0x53,0x0a,0x66,0x6f,0x72,0x20,0x28,0x69,0x6e,0x74,0x20,0x69,0x3d,0x30,0x3b,0x20,0x69,0x3c,0x52,0x4f,0x55,0x4e,0x44,0x53,0x3b,0x20,0x2b,0x2b,0x69,0x29,0x0a,0x7b,
0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x41,0x5b,0x73,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x35,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x30,0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x31,0x35,
0x5d,0x5e,0x41,0x5b,0x73,0x2b,0x32,0x30,0x5d,0x3b,0x0a,0x44,0x5b,0x74,0x5d,0x3d,0x43,0x5b,0x62,0x5b,0x32,0x30,0x2b,0x73,0x5d,0x5d,0x5e,0x52,0x36,0x34,0x28,0x43,
0x5b,0x62,0x5b,0x35,0x2b,0x73,0x5d,0x5d,0x2c,0x31,0x2c,0x36,0x33,0x29,0x3b,0x0a,0x43,0x5b,0x74,0x5d,0x3d,0x52,0x36,0x34,0x28,0x41,0x5b,0x61,0x5b,0x74,0x5d,0x5d,
0x5e,0x44,0x5b,0x62,0x5b,0x74,0x5d,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x2c,0x72,0x6f,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x64,
0x5b,0x74,0x5d,0x5d,0x3d,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x30,0x5d,0x5d,0x5e,0x28,0x28,0x7e,0x43,0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x31,0x5d,0x5d,0x29,0x26,0x43,
0x5b,0x63,0x5b,0x74,0x5d,0x5b,0x32,0x5d,0x5d,0x29,0x3b,0x0a,0x41,0x5b,0x74,0x5d,0x20,0x5e,0x3d,0x20,0x72,0x63,0x5b,0x28,0x74,0x3d,0x3d,0x30,0x29,0x3f,0x30,0x3a,
0x31,0x5d,0x5b,0x69,0x5d,0x3b,0x0a,0x7d,0x0a,0x69,0x66,0x28,0x74,0x3c,0x34,0x29,0x0a,0x7b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x2b,0x3d,0x67,0x2a,0x28,0x33,0x32,
0x2f,0x73,0x69,0x7a,0x65,0x6f,0x66,0x28,0x75,0x69,0x6e,0x74,0x36,0x34,0x5f,0x74,0x29,0x29,0x3b,0x0a,0x68,0x61,0x73,0x68,0x65,0x73,0x5b,0x74,0x5d,0x3d,0x41,0x5b,
0x74,0x5d,0x3b,0x0a,0x7d,0x0a,0x7d,0x0a,0x00
};
} // namespace xmrig

View File

@@ -132,6 +132,7 @@ set(SOURCES_BASE
src/base/net/tools/LineReader.cpp
src/base/net/tools/NetBuffer.cpp
src/base/tools/Arguments.cpp
src/base/tools/Chrono.cpp
src/base/tools/cryptonote/BlockTemplate.cpp
src/base/tools/cryptonote/crypto-ops-data.c
src/base/tools/cryptonote/crypto-ops.c
@@ -244,7 +245,7 @@ else()
endif()
if (WITH_KAWPOW)
if (WITH_KAWPOW OR WITH_GHOSTRIDER)
list(APPEND HEADERS_BASE
src/base/net/stratum/AutoClient.h
src/base/net/stratum/EthStratumClient.h

View File

@@ -189,6 +189,7 @@ public:
inline Id id() const { return m_id; }
inline size_t l2() const { return l2(m_id); }
inline uint32_t family() const { return family(m_id); }
inline uint32_t minIntensity() const { return ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
inline uint32_t maxIntensity() const { return isCN() ? 5 : ((m_id == GHOSTRIDER_RTM) ? 8 : 1); };
inline size_t l3() const

View File

@@ -73,7 +73,7 @@ int64_t xmrig::EthStratumClient::submit(const JobResult& result)
auto& allocator = doc.GetAllocator();
Value params(kArrayType);
params.PushBack(m_pool.user().toJSON(), allocator);
params.PushBack(m_user.toJSON(), allocator);
params.PushBack(result.jobId.toJSON(), allocator);
# ifdef XMRIG_ALGO_GHOSTRIDER
@@ -213,12 +213,13 @@ void xmrig::EthStratumClient::parseNotification(const char *method, const rapidj
return;
}
if (!arr[0].IsDouble()) {
if (!arr[0].IsDouble() && !arr[0].IsUint64()) {
LOG_ERR("%s " RED("invalid mining.set_difficulty notification: difficulty is not a number"), tag());
return;
}
m_nextDifficulty = static_cast<uint64_t>(ceil(arr[0].GetDouble() * 65536.0));
const double diff = arr[0].IsDouble() ? arr[0].GetDouble() : arr[0].GetUint64();
m_nextDifficulty = static_cast<uint64_t>(ceil(diff * 65536.0));
}
# endif
@@ -470,8 +471,8 @@ void xmrig::EthStratumClient::authorize()
auto &allocator = doc.GetAllocator();
Value params(kArrayType);
params.PushBack(m_pool.user().toJSON(), allocator);
params.PushBack(m_pool.password().toJSON(), allocator);
params.PushBack(m_user.toJSON(), allocator);
params.PushBack(m_password.toJSON(), allocator);
JsonRequest::create(doc, m_sequence, "mining.authorize", params);

View File

@@ -31,7 +31,7 @@
#include "base/kernel/Platform.h"
#include "base/net/stratum/Client.h"
#ifdef XMRIG_ALGO_KAWPOW
#if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
# include "base/net/stratum/AutoClient.h"
# include "base/net/stratum/EthStratumClient.h"
#endif
@@ -218,7 +218,7 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
IClient *client = nullptr;
if (m_mode == MODE_POOL) {
# ifdef XMRIG_ALGO_KAWPOW
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
const uint32_t f = m_algorithm.family();
if ((f == Algorithm::KAWPOW) || (f == Algorithm::GHOSTRIDER) || (m_coin == Coin::RAVEN)) {
client = new EthStratumClient(id, Platform::userAgent(), listener);
@@ -237,7 +237,7 @@ xmrig::IClient *xmrig::Pool::createClient(int id, IClientListener *listener) con
client = new SelfSelectClient(id, Platform::userAgent(), listener, m_submitToOrigin);
}
# endif
# ifdef XMRIG_ALGO_KAWPOW
# if defined XMRIG_ALGO_KAWPOW || defined XMRIG_ALGO_GHOSTRIDER
else if (m_mode == MODE_AUTO_ETH) {
client = new AutoClient(id, Platform::userAgent(), listener);
}

View File

@@ -50,6 +50,7 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
# ifdef XMRIG_ALGO_GHOSTRIDER
if (m_benchmark->algorithm() == Algorithm::GHOSTRIDER_RTM) {
const uint32_t q = (benchmark->rotation() / 20) & 1;
const uint32_t r = benchmark->rotation() % 20;
static constexpr uint32_t indices[20][3] = {
@@ -75,9 +76,9 @@ xmrig::BenchClient::BenchClient(const std::shared_ptr<BenchConfig> &benchmark, I
{ 3, 4, 5 },
};
blob[ 8] = '0' + indices[r][1];
blob[ 8] = '0' + indices[r][q ? 2 : 1];
blob[ 9] = '0' + indices[r][0];
blob[11] = '0' + indices[r][2];
blob[11] = '0' + indices[r][q ? 1 : 2];
}
# endif

44
src/base/tools/Chrono.cpp Normal file
View File

@@ -0,0 +1,44 @@
/* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "Chrono.h"
#ifdef XMRIG_OS_WIN
# include <Windows.h>
#endif
namespace xmrig {
double Chrono::highResolutionMSecs()
{
# ifdef XMRIG_OS_WIN
LARGE_INTEGER f, t;
QueryPerformanceFrequency(&f);
QueryPerformanceCounter(&t);
return static_cast<double>(t.QuadPart) * 1e3 / f.QuadPart;
# else
using namespace std::chrono;
return static_cast<uint64_t>(duration_cast<nanoseconds>(high_resolution_clock::now().time_since_epoch()).count()) / 1e6;
# endif
}
} /* namespace xmrig */

View File

@@ -29,12 +29,7 @@ namespace xmrig {
class Chrono
{
public:
static inline uint64_t highResolutionMSecs()
{
using namespace std::chrono;
return static_cast<uint64_t>(time_point_cast<milliseconds>(high_resolution_clock::now()).time_since_epoch().count());
}
static double highResolutionMSecs();
static inline uint64_t steadyMSecs()

View File

@@ -287,10 +287,12 @@ public:
void printHashrate(bool details)
{
char num[16 * 4] = { 0 };
char num[16 * 5] = { 0 };
double speed[3] = { 0.0 };
uint32_t count = 0;
double avg_hashrate = 0.0;
for (auto backend : backends) {
const auto hashrate = backend->hashrate();
if (hashrate) {
@@ -299,6 +301,8 @@ public:
speed[0] += hashrate->calc(Hashrate::ShortInterval);
speed[1] += hashrate->calc(Hashrate::MediumInterval);
speed[2] += hashrate->calc(Hashrate::LargeInterval);
avg_hashrate += hashrate->average();
}
backend->printHashrate(details);
@@ -318,12 +322,22 @@ public:
h = "MH/s";
}
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s"),
char avg_hashrate_buf[64];
avg_hashrate_buf[0] = '\0';
# ifdef XMRIG_ALGO_GHOSTRIDER
if (algorithm.family() == Algorithm::GHOSTRIDER) {
snprintf(avg_hashrate_buf, sizeof(avg_hashrate_buf), " avg " CYAN_BOLD("%s %s"), Hashrate::format(avg_hashrate * scale, num + 16 * 4, 16), h);
}
# endif
LOG_INFO("%s " WHITE_BOLD("speed") " 10s/60s/15m " CYAN_BOLD("%s") CYAN(" %s %s ") CYAN_BOLD("%s") " max " CYAN_BOLD("%s %s") "%s",
Tags::miner(),
Hashrate::format(speed[0] * scale, num, sizeof(num) / 4),
Hashrate::format(speed[1] * scale, num + 16, sizeof(num) / 4),
Hashrate::format(speed[2] * scale, num + 16 * 2, sizeof(num) / 4), h,
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, sizeof(num) / 4), h
Hashrate::format(speed[0] * scale, num, 16),
Hashrate::format(speed[1] * scale, num + 16, 16),
Hashrate::format(speed[2] * scale, num + 16 * 2, 16), h,
Hashrate::format(maxHashrate[algorithm] * scale, num + 16 * 3, 16), h,
avg_hashrate_buf
);
# ifdef XMRIG_FEATURE_BENCHMARK

View File

@@ -55,6 +55,10 @@
} while (0)
bool cn_sse41_enabled = false;
bool cn_vaes_enabled = false;
#ifdef XMRIG_FEATURE_ASM
# define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
@@ -97,6 +101,27 @@ cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
cn_mainloop_fun cn_upx2_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr0_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr1_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr2_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr3_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr4_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr5_single_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr0_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr1_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr2_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr3_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr4_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr5_double_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr0_quad_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr1_quad_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr2_quad_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr3_quad_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr4_quad_mainloop_asm = nullptr;
cn_mainloop_fun cn_gr5_quad_mainloop_asm = nullptr;
template<Algorithm::Id SOURCE_ALGO = Algorithm::CN_2, typename T, typename U>
static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t mask = CnAlgo<Algorithm::CN_HALF>().mask())
@@ -136,7 +161,7 @@ static void patchCode(T dst, U src, const uint32_t iterations, const uint32_t ma
static void patchAsmVariants()
{
const int allocation_size = 131072;
constexpr size_t allocation_size = 0x20000;
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
@@ -173,6 +198,29 @@ static void patchAsmVariants()
cn_upx2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x15000);
# endif
# ifdef XMRIG_ALGO_GHOSTRIDER
cn_gr0_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16000);
cn_gr1_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x16800);
cn_gr2_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17000);
cn_gr3_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x17800);
cn_gr4_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18000);
cn_gr5_single_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x18800);
cn_gr0_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19000);
cn_gr1_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x19800);
cn_gr2_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A000);
cn_gr3_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1A800);
cn_gr4_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B000);
cn_gr5_double_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1B800);
cn_gr0_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C000);
cn_gr1_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1C800);
cn_gr2_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D000);
cn_gr3_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1D800);
cn_gr4_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E000);
cn_gr5_quad_mainloop_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1E800);
# endif
{
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
@@ -232,6 +280,29 @@ static void patchAsmVariants()
}
# endif
# ifdef XMRIG_ALGO_GHOSTRIDER
patchCode<Algorithm::CN_1>(cn_gr0_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
patchCode<Algorithm::CN_1>(cn_gr1_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
patchCode<Algorithm::CN_1>(cn_gr2_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
patchCode<Algorithm::CN_1>(cn_gr3_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
patchCode<Algorithm::CN_1>(cn_gr4_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
patchCode<Algorithm::CN_1>(cn_gr5_single_mainloop_asm, cnv1_single_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
patchCode<Algorithm::CN_1>(cn_gr0_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
patchCode<Algorithm::CN_1>(cn_gr1_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
patchCode<Algorithm::CN_1>(cn_gr2_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
patchCode<Algorithm::CN_1>(cn_gr3_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
patchCode<Algorithm::CN_1>(cn_gr4_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
patchCode<Algorithm::CN_1>(cn_gr5_double_mainloop_asm, cnv1_double_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
patchCode<Algorithm::CN_1>(cn_gr0_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_0>().iterations(), CnAlgo<Algorithm::CN_GR_0>().mask());
patchCode<Algorithm::CN_1>(cn_gr1_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_1>().iterations(), CnAlgo<Algorithm::CN_GR_1>().mask());
patchCode<Algorithm::CN_1>(cn_gr2_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_2>().iterations(), CnAlgo<Algorithm::CN_GR_2>().mask());
patchCode<Algorithm::CN_1>(cn_gr3_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_3>().iterations(), CnAlgo<Algorithm::CN_GR_3>().mask());
patchCode<Algorithm::CN_1>(cn_gr4_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_4>().iterations(), CnAlgo<Algorithm::CN_GR_4>().mask());
patchCode<Algorithm::CN_1>(cn_gr5_quad_mainloop_asm, cnv1_quad_mainloop_asm, CnAlgo<Algorithm::CN_GR_5>().iterations(), CnAlgo<Algorithm::CN_GR_5>().mask());
# endif
VirtualMemory::protectRX(base, allocation_size);
VirtualMemory::flushInstructionCache(base, allocation_size);
}
@@ -348,7 +419,7 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
if ((av == AV_SINGLE) && (assembly != Assembly::NONE) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) && (Cpu::info()->model() == 0x21)) {
switch (algorithm.id()) {
case Algorithm::CN_HEAVY_0:
return cryptonight_single_hash<Algorithm::CN_HEAVY_0, false, 3>;

View File

@@ -52,8 +52,10 @@ struct cryptonight_r_data {
struct cryptonight_ctx {
alignas(16) uint8_t state[224];
alignas(16) uint8_t *memory;
const uint32_t* tweak1_table;
uint64_t tweak1_2;
uint8_t unused[40];
uint8_t unused[24];
const uint32_t *saes_table;
cn_mainloop_fun_ms_abi generated_code;

View File

@@ -204,4 +204,7 @@
v4_random_math(code##part, r##part); \
}
extern bool cn_sse41_enabled;
extern bool cn_vaes_enabled;
#endif /* XMRIG_CRYPTONIGHT_MONERO_H */

View File

@@ -450,6 +450,29 @@ const static uint8_t astrobwt_dero_test_out[256] = {
#endif
#ifdef XMRIG_ALGO_GHOSTRIDER
// "GhostRider"
const static uint8_t test_output_gr[256] = {
0x42, 0x17, 0x0C, 0xC1, 0x85, 0xE6, 0x76, 0x3C, 0xC7, 0xCB, 0x27, 0xC4, 0x17, 0x39, 0x2D, 0xE2,
0x29, 0x6B, 0x40, 0x66, 0x85, 0xA4, 0xE3, 0xD3, 0x8C, 0xE9, 0xA5, 0x8F, 0x10, 0xFC, 0x81, 0xE4,
0x90, 0x56, 0xF2, 0x9E, 0x00, 0xD0, 0xF8, 0xA1, 0x88, 0x82, 0x86, 0xC0, 0x86, 0x04, 0x6B, 0x0E,
0x9A, 0xDB, 0xDB, 0xFD, 0x23, 0x16, 0x77, 0x94, 0xFE, 0x58, 0x93, 0x05, 0x10, 0x3F, 0x27, 0x75,
0x51, 0x44, 0xF3, 0x5F, 0xE2, 0xF9, 0x61, 0xBE, 0xC0, 0x30, 0xB5, 0x8E, 0xB1, 0x1B, 0xA1, 0xF7,
0x06, 0x4E, 0xF1, 0x6A, 0xFD, 0xA5, 0x44, 0x8E, 0x64, 0x47, 0x8C, 0x67, 0x51, 0xE2, 0x5C, 0x55,
0x3E, 0x39, 0xA6, 0xA5, 0xF7, 0xB8, 0xD0, 0x5E, 0xE2, 0xBF, 0x92, 0x44, 0xD9, 0xAA, 0x76, 0x22,
0xE3, 0x3E, 0x15, 0x96, 0xD8, 0x6A, 0x78, 0x2D, 0xA9, 0x77, 0x24, 0x1A, 0x4B, 0xE7, 0x5A, 0x2E,
0x89, 0x77, 0xAE, 0x92, 0xE4, 0xA4, 0x2D, 0xAF, 0x0B, 0x27, 0x09, 0xB2, 0x5F, 0x95, 0x61, 0xA9,
0xA8, 0xBE, 0x5D, 0x39, 0xBE, 0x41, 0x5F, 0x9C, 0x67, 0x28, 0x48, 0x4F, 0xAE, 0x2A, 0x50, 0x2B,
0xB8, 0xC7, 0x42, 0x73, 0x51, 0x60, 0x59, 0xD8, 0x9C, 0xBA, 0x22, 0x2F, 0x8E, 0x34, 0xDE, 0xC8,
0x1B, 0xAE, 0x9E, 0xBD, 0xF7, 0xE8, 0xFD, 0x8A, 0x97, 0xBE, 0xF0, 0x47, 0xAC, 0x27, 0xDD, 0x28,
0xC9, 0x28, 0xA8, 0x7B, 0x2A, 0xB8, 0x90, 0x3E, 0xCA, 0xB4, 0x78, 0x44, 0xCE, 0xCD, 0x91, 0xEC,
0xC2, 0x5A, 0x17, 0x59, 0x7C, 0x14, 0xF8, 0x95, 0x28, 0x14, 0xC3, 0xAD, 0xC4, 0xE1, 0x13, 0x5A,
0xC4, 0xA7, 0xC7, 0x77, 0xAD, 0xF8, 0x09, 0x61, 0x16, 0xBB, 0xAA, 0x7E, 0xAB, 0xC3, 0x00, 0x25,
0xBA, 0xA8, 0x97, 0xC7, 0x7D, 0x38, 0x46, 0x0E, 0x59, 0xAC, 0xCB, 0xAE, 0xFE, 0x3C, 0x6F, 0x01
};
#endif
} // namespace xmrig

View File

@@ -43,6 +43,11 @@
#include "crypto/cn/soft_aes.h"
#ifdef XMRIG_VAES
# include "crypto/cn/CryptoNight_x86_vaes.h"
#endif
extern "C"
{
#include "crypto/cn/c_groestl.h"
@@ -289,6 +294,13 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
return;
}
# endif
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
__m128i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
@@ -341,7 +353,7 @@ static NOINLINE void cn_explode_scratchpad(cryptonight_ctx *ctx)
constexpr int output_increment = (64 << interleave) / sizeof(__m128i);
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
__m128i* e = output + N - prefetch_dist;
__m128i* e = output + (N << interleave) - prefetch_dist;
__m128i* prefetch_ptr = output + prefetch_dist;
for (int i = 0; i < 2; ++i) {
@@ -396,6 +408,13 @@ static NOINLINE void cn_implode_scratchpad(cryptonight_ctx *ctx)
{
constexpr CnAlgo<ALGO> props;
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes(ctx, props.memory(), props.half_mem());
return;
}
# endif
constexpr bool IS_HEAVY = props.isHeavy();
constexpr size_t N = (props.memory() / sizeof(__m128i)) / (props.half_mem() ? 2 : 1);
@@ -615,9 +634,35 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
#ifdef XMRIG_FEATURE_ASM
template<Algorithm::Id ALGO>
static void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height);
#endif
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
# ifdef XMRIG_FEATURE_ASM
if (!SOFT_AES) {
switch (ALGO) {
case Algorithm::CN_GR_0:
case Algorithm::CN_GR_1:
case Algorithm::CN_GR_2:
case Algorithm::CN_GR_3:
case Algorithm::CN_GR_4:
case Algorithm::CN_GR_5:
if (cn_sse41_enabled) {
cryptonight_single_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
return;
}
break;
default:
break;
}
}
# endif
constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
@@ -803,6 +848,9 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
#ifdef XMRIG_FEATURE_ASM
extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
@@ -846,6 +894,28 @@ extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_upx2_mainloop_asm;
extern cn_mainloop_fun cn_upx2_double_mainloop_asm;
extern cn_mainloop_fun cn_gr0_single_mainloop_asm;
extern cn_mainloop_fun cn_gr1_single_mainloop_asm;
extern cn_mainloop_fun cn_gr2_single_mainloop_asm;
extern cn_mainloop_fun cn_gr3_single_mainloop_asm;
extern cn_mainloop_fun cn_gr4_single_mainloop_asm;
extern cn_mainloop_fun cn_gr5_single_mainloop_asm;
extern cn_mainloop_fun cn_gr0_double_mainloop_asm;
extern cn_mainloop_fun cn_gr1_double_mainloop_asm;
extern cn_mainloop_fun cn_gr2_double_mainloop_asm;
extern cn_mainloop_fun cn_gr3_double_mainloop_asm;
extern cn_mainloop_fun cn_gr4_double_mainloop_asm;
extern cn_mainloop_fun cn_gr5_double_mainloop_asm;
extern cn_mainloop_fun cn_gr0_quad_mainloop_asm;
extern cn_mainloop_fun cn_gr1_quad_mainloop_asm;
extern cn_mainloop_fun cn_gr2_quad_mainloop_asm;
extern cn_mainloop_fun cn_gr3_quad_mainloop_asm;
extern cn_mainloop_fun cn_gr4_quad_mainloop_asm;
extern cn_mainloop_fun cn_gr5_quad_mainloop_asm;
} // namespace xmrig
@@ -996,8 +1066,17 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
}
if (ALGO == Algorithm::CN_2) {
cnv2_double_mainloop_sandybridge_asm(ctx);
@@ -1036,8 +1115,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
}
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@@ -1054,9 +1141,130 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
namespace xmrig {
#ifdef XMRIG_FEATURE_ASM
template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_single_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 32);
return;
}
keccak(input, size, ctx[0]->state);
if (props.half_mem()) {
ctx[0]->first_half = true;
}
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
VARIANT1_INIT(0);
ctx[0]->tweak1_2 = tweak1_2_0;
ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_single_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_single_mainloop_asm(ctx);
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_double_hash_gr_sse41(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 64);
return;
}
keccak(input, size, ctx[0]->state);
keccak(input + size, size, ctx[1]->state);
if (props.half_mem()) {
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
}
VARIANT1_INIT(0);
VARIANT1_INIT(1);
ctx[0]->tweak1_2 = tweak1_2_0;
ctx[1]->tweak1_2 = tweak1_2_1;
ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_double_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_double_mainloop_asm(ctx);
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
}
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
}
#endif
template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
# ifdef XMRIG_FEATURE_ASM
if (!SOFT_AES) {
switch (ALGO) {
case Algorithm::CN_GR_0:
case Algorithm::CN_GR_1:
case Algorithm::CN_GR_2:
case Algorithm::CN_GR_3:
case Algorithm::CN_GR_4:
case Algorithm::CN_GR_5:
if (cn_sse41_enabled) {
cryptonight_double_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
return;
}
break;
default:
break;
}
}
# endif
constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
@@ -1092,8 +1300,17 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
ctx[0]->first_half = true;
ctx[1]->first_half = true;
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@@ -1288,8 +1505,16 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
bx10 = cx1;
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
}
else
# endif
{
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
}
keccakf(h0, 24);
keccakf(h1, 24);
@@ -1299,27 +1524,15 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
}
static inline void cryptonight_monero_tweak_gr(uint64_t* mem_out, const uint8_t* l, uint64_t idx, __m128i ax0, __m128i bx0, __m128i cx)
{
__m128i tmp = _mm_xor_si128(bx0, cx);
mem_out[0] = _mm_cvtsi128_si64(tmp);
tmp = _mm_castps_si128(_mm_movehl_ps(_mm_castsi128_ps(tmp), _mm_castsi128_ps(tmp)));
uint64_t vh = _mm_cvtsi128_si64(tmp);
mem_out[1] = vh ^ tweak1_table[static_cast<uint32_t>(vh) >> 24];
}
template<Algorithm::Id ALGO, bool SOFT_AES>
void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
#ifdef XMRIG_FEATURE_ASM
template<Algorithm::Id ALGO>
static NOINLINE void cryptonight_quad_hash_gr_sse41(const uint8_t* __restrict__ input, size_t size, uint8_t* __restrict__ output, cryptonight_ctx** __restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask();
constexpr Algorithm::Id BASE = props.base();
if (BASE == Algorithm::CN_1 && size < 43) {
memset(output, 0, 64);
memset(output, 0, 32 * 4);
return;
}
@@ -1328,21 +1541,6 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
keccak(input + size * 2, size, ctx[2]->state);
keccak(input + size * 3, size, ctx[3]->state);
uint8_t* l0 = ctx[0]->memory;
uint8_t* l1 = ctx[1]->memory;
uint8_t* l2 = ctx[2]->memory;
uint8_t* l3 = ctx[3]->memory;
uint64_t* h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint64_t* h1 = reinterpret_cast<uint64_t*>(ctx[1]->state);
uint64_t* h2 = reinterpret_cast<uint64_t*>(ctx[2]->state);
uint64_t* h3 = reinterpret_cast<uint64_t*>(ctx[3]->state);
VARIANT1_INIT(0);
VARIANT1_INIT(1);
VARIANT1_INIT(2);
VARIANT1_INIT(3);
if (props.half_mem()) {
ctx[0]->first_half = true;
ctx[1]->first_half = true;
@@ -1350,145 +1548,59 @@ void cryptonight_quad_hash_zen(const uint8_t* __restrict__ input, size_t size, u
ctx[3]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
uint64_t al2 = h2[0] ^ h2[4];
uint64_t al3 = h3[0] ^ h3[4];
uint64_t ah0 = h0[1] ^ h0[5];
uint64_t ah1 = h1[1] ^ h1[5];
uint64_t ah2 = h2[1] ^ h2[5];
uint64_t ah3 = h3[1] ^ h3[5];
__m128i bx00 = _mm_set_epi64x(h0[3] ^ h0[7], h0[2] ^ h0[6]);
__m128i bx10 = _mm_set_epi64x(h1[3] ^ h1[7], h1[2] ^ h1[6]);
__m128i bx20 = _mm_set_epi64x(h2[3] ^ h2[7], h2[2] ^ h2[6]);
__m128i bx30 = _mm_set_epi64x(h3[3] ^ h3[7], h3[2] ^ h3[6]);
uint64_t idx0 = al0;
uint64_t idx1 = al1;
uint64_t idx2 = al2;
uint64_t idx3 = al3;
__m128i cx0, cx1, cx2, cx3;
if (!SOFT_AES) {
cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
}
else
# endif
{
cn_explode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, false, 0>(ctx[3]);
}
for (size_t i = 0; i < props.iterations(); i++) {
const __m128i ax0 = _mm_set_epi64x(ah0, al0);
const __m128i ax1 = _mm_set_epi64x(ah1, al1);
const __m128i ax2 = _mm_set_epi64x(ah2, al2);
const __m128i ax3 = _mm_set_epi64x(ah3, al3);
VARIANT1_INIT(0); ctx[0]->tweak1_2 = tweak1_2_0;
VARIANT1_INIT(1); ctx[1]->tweak1_2 = tweak1_2_1;
VARIANT1_INIT(2); ctx[2]->tweak1_2 = tweak1_2_2;
VARIANT1_INIT(3); ctx[3]->tweak1_2 = tweak1_2_3;
if (SOFT_AES) {
cx0 = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
cx1 = soft_aesenc(&l1[idx1 & MASK], ax1, reinterpret_cast<const uint32_t*>(saes_table));
cx2 = soft_aesenc(&l2[idx2 & MASK], ax2, reinterpret_cast<const uint32_t*>(saes_table));
cx3 = soft_aesenc(&l3[idx3 & MASK], ax3, reinterpret_cast<const uint32_t*>(saes_table));
}
else {
cx0 = _mm_aesenc_si128(cx0, ax0);
cx1 = _mm_aesenc_si128(cx1, ax1);
cx2 = _mm_aesenc_si128(cx2, ax2);
cx3 = _mm_aesenc_si128(cx3, ax3);
if (MASK > 131072) {
_mm_prefetch((const char*)(&l0[_mm_cvtsi128_si32(cx0) & MASK]), _MM_HINT_T0);
_mm_prefetch((const char*)(&l1[_mm_cvtsi128_si32(cx1) & MASK]), _MM_HINT_T0);
_mm_prefetch((const char*)(&l2[_mm_cvtsi128_si32(cx2) & MASK]), _MM_HINT_T0);
_mm_prefetch((const char*)(&l3[_mm_cvtsi128_si32(cx3) & MASK]), _MM_HINT_T0);
ctx[0]->tweak1_table = tweak1_table;
if (ALGO == Algorithm::CN_GR_0) cn_gr0_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_1) cn_gr1_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_2) cn_gr2_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_3) cn_gr3_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_4) cn_gr4_quad_mainloop_asm(ctx);
if (ALGO == Algorithm::CN_GR_5) cn_gr5_quad_mainloop_asm(ctx);
# ifdef XMRIG_VAES
if (!props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
}
else
# endif
{
cn_implode_scratchpad<ALGO, false, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, false, 0>(ctx[3]);
}
cryptonight_monero_tweak_gr((uint64_t*)&l0[idx0 & MASK], l0, idx0 & MASK, ax0, bx00, cx0);
cryptonight_monero_tweak_gr((uint64_t*)&l1[idx1 & MASK], l1, idx1 & MASK, ax1, bx10, cx1);
cryptonight_monero_tweak_gr((uint64_t*)&l2[idx2 & MASK], l2, idx2 & MASK, ax2, bx20, cx2);
cryptonight_monero_tweak_gr((uint64_t*)&l3[idx3 & MASK], l3, idx3 & MASK, ax3, bx30, cx3);
idx0 = _mm_cvtsi128_si64(cx0);
idx1 = _mm_cvtsi128_si64(cx1);
idx2 = _mm_cvtsi128_si64(cx2);
idx3 = _mm_cvtsi128_si64(cx3);
uint64_t hi, lo, cl, ch;
cl = ((uint64_t*)&l0[idx0 & MASK])[0];
ch = ((uint64_t*)&l0[idx0 & MASK])[1];
lo = __umul128(idx0, cl, &hi);
al0 += hi;
ah0 += lo;
((uint64_t*)&l0[idx0 & MASK])[0] = al0;
((uint64_t*)&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
al0 ^= cl;
ah0 ^= ch;
idx0 = al0;
bx00 = cx0;
if (!SOFT_AES) cx0 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cl = ((uint64_t*)&l1[idx1 & MASK])[0];
ch = ((uint64_t*)&l1[idx1 & MASK])[1];
lo = __umul128(idx1, cl, &hi);
al1 += hi;
ah1 += lo;
((uint64_t*)&l1[idx1 & MASK])[0] = al1;
((uint64_t*)&l1[idx1 & MASK])[1] = ah1 ^ tweak1_2_1;
al1 ^= cl;
ah1 ^= ch;
idx1 = al1;
bx10 = cx1;
if (!SOFT_AES) cx1 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l1[idx1 & MASK]));
cl = ((uint64_t*)&l2[idx2 & MASK])[0];
ch = ((uint64_t*)&l2[idx2 & MASK])[1];
lo = __umul128(idx2, cl, &hi);
al2 += hi;
ah2 += lo;
((uint64_t*)&l2[idx2 & MASK])[0] = al2;
((uint64_t*)&l2[idx2 & MASK])[1] = ah2 ^ tweak1_2_2;
al2 ^= cl;
ah2 ^= ch;
idx2 = al2;
bx20 = cx2;
if (!SOFT_AES) cx2 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l2[idx2 & MASK]));
cl = ((uint64_t*)&l3[idx3 & MASK])[0];
ch = ((uint64_t*)&l3[idx3 & MASK])[1];
lo = __umul128(idx3, cl, &hi);
al3 += hi;
ah3 += lo;
((uint64_t*)&l3[idx3 & MASK])[0] = al3;
((uint64_t*)&l3[idx3 & MASK])[1] = ah3 ^ tweak1_2_3;
al3 ^= cl;
ah3 ^= ch;
idx3 = al3;
bx30 = cx3;
if (!SOFT_AES) cx3 = _mm_load_si128(reinterpret_cast<const __m128i*>(&l3[idx3 & MASK]));
}
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
keccakf(h0, 24);
keccakf(h1, 24);
keccakf(h2, 24);
keccakf(h3, 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[2]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[3]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
extra_hashes[ctx[1]->state[0] & 3](ctx[1]->state, 200, output + 32);
extra_hashes[ctx[2]->state[0] & 3](ctx[2]->state, 200, output + 64);
extra_hashes[ctx[3]->state[0] & 3](ctx[3]->state, 200, output + 96);
}
#endif
#define CN_STEP1(a, b0, b1, c, l, ptr, idx, conc_var) \
@@ -1684,13 +1796,26 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
template<Algorithm::Id ALGO, bool SOFT_AES>
inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
const auto arch = Cpu::info()->arch();
if ((arch >= ICpuInfo::ARCH_ZEN) && (arch <= ICpuInfo::ARCH_ZEN3)) {
if ((ALGO == Algorithm::CN_GR_0) || (ALGO == Algorithm::CN_GR_1) || (ALGO == Algorithm::CN_GR_2) || (ALGO == Algorithm::CN_GR_3) || (ALGO == Algorithm::CN_GR_4) || (ALGO == Algorithm::CN_GR_5)) {
cryptonight_quad_hash_zen<ALGO, SOFT_AES>(input, size, output, ctx, height);
# ifdef XMRIG_FEATURE_ASM
if (!SOFT_AES) {
switch (ALGO) {
case Algorithm::CN_GR_0:
case Algorithm::CN_GR_1:
case Algorithm::CN_GR_2:
case Algorithm::CN_GR_3:
case Algorithm::CN_GR_4:
case Algorithm::CN_GR_5:
if (cn_sse41_enabled) {
cryptonight_quad_hash_gr_sse41<ALGO>(input, size, output, ctx, height);
return;
}
break;
default:
break;
}
}
# endif
constexpr CnAlgo<ALGO> props;
constexpr size_t MASK = props.mask();
@@ -1714,7 +1839,20 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
if (props.half_mem()) {
ctx[i]->first_half = true;
}
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_explode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_explode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
}
else
# endif
{
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
uint8_t* l0 = ctx[0]->memory;
@@ -1766,8 +1904,21 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
CN_STEP4(3, ax3, bx30, bx31, cx3, l3, mc3, ptr3, idx3);
}
# ifdef XMRIG_VAES
if (!SOFT_AES && !props.isHeavy() && cn_vaes_enabled) {
cn_implode_scratchpad_vaes_double(ctx[0], ctx[1], props.memory(), props.half_mem());
cn_implode_scratchpad_vaes_double(ctx[2], ctx[3], props.memory(), props.half_mem());
}
else
# endif
{
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[0]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[1]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[2]);
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[3]);
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(ctx[i]);
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}

View File

@@ -0,0 +1,478 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "CryptoNight_x86_vaes.h"
#include "CryptoNight_monero.h"
#include "CryptoNight.h"
#ifdef __GNUC__
# include <x86intrin.h>
#if !defined(__clang__) && !defined(__ICC) && __GNUC__ < 10
static inline __m256i
__attribute__((__always_inline__))
_mm256_loadu2_m128i(const __m128i* const hiaddr, const __m128i* const loaddr)
{
return _mm256_inserti128_si256(
_mm256_castsi128_si256(_mm_loadu_si128(loaddr)), _mm_loadu_si128(hiaddr), 1);
}
static inline void
__attribute__((__always_inline__))
_mm256_storeu2_m128i(__m128i* const hiaddr, __m128i* const loaddr, const __m256i a)
{
_mm_storeu_si128(loaddr, _mm256_castsi256_si128(a));
_mm_storeu_si128(hiaddr, _mm256_extracti128_si256(a, 1));
}
#endif
#else
# include <intrin.h>
#endif
// This will shift and xor tmp1 into itself as 4 32-bit vals such as
// sl_xor(a1 a2 a3 a4) = a1 (a2^a1) (a3^a2^a1) (a4^a3^a2^a1)
static FORCEINLINE __m128i sl_xor(__m128i tmp1)
{
__m128i tmp4;
tmp4 = _mm_slli_si128(tmp1, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
tmp4 = _mm_slli_si128(tmp4, 0x04);
tmp1 = _mm_xor_si128(tmp1, tmp4);
return tmp1;
}
template<uint8_t rcon>
static FORCEINLINE void aes_genkey_sub(__m128i* xout0, __m128i* xout2)
{
__m128i xout1 = _mm_aeskeygenassist_si128(*xout2, rcon);
xout1 = _mm_shuffle_epi32(xout1, 0xFF); // see PSHUFD, set all elems to 4th elem
*xout0 = sl_xor(*xout0);
*xout0 = _mm_xor_si128(*xout0, xout1);
xout1 = _mm_aeskeygenassist_si128(*xout0, 0x00);
xout1 = _mm_shuffle_epi32(xout1, 0xAA); // see PSHUFD, set all elems to 3rd elem
*xout2 = sl_xor(*xout2);
*xout2 = _mm_xor_si128(*xout2, xout1);
}
static NOINLINE void vaes_genkey(const __m128i* memory, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
{
__m128i xout0 = _mm_load_si128(memory);
__m128i xout2 = _mm_load_si128(memory + 1);
*k0 = _mm256_set_m128i(xout0, xout0);
*k1 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x01>(&xout0, &xout2);
*k2 = _mm256_set_m128i(xout0, xout0);
*k3 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x02>(&xout0, &xout2);
*k4 = _mm256_set_m128i(xout0, xout0);
*k5 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x04>(&xout0, &xout2);
*k6 = _mm256_set_m128i(xout0, xout0);
*k7 = _mm256_set_m128i(xout2, xout2);
aes_genkey_sub<0x08>(&xout0, &xout2);
*k8 = _mm256_set_m128i(xout0, xout0);
*k9 = _mm256_set_m128i(xout2, xout2);
}
static NOINLINE void vaes_genkey_double(const __m128i* memory1, const __m128i* memory2, __m256i* k0, __m256i* k1, __m256i* k2, __m256i* k3, __m256i* k4, __m256i* k5, __m256i* k6, __m256i* k7, __m256i* k8, __m256i* k9)
{
__m128i xout0 = _mm_load_si128(memory1);
__m128i xout1 = _mm_load_si128(memory1 + 1);
__m128i xout2 = _mm_load_si128(memory2);
__m128i xout3 = _mm_load_si128(memory2 + 1);
*k0 = _mm256_set_m128i(xout2, xout0);
*k1 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x01>(&xout0, &xout1);
aes_genkey_sub<0x01>(&xout2, &xout3);
*k2 = _mm256_set_m128i(xout2, xout0);
*k3 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x02>(&xout0, &xout1);
aes_genkey_sub<0x02>(&xout2, &xout3);
*k4 = _mm256_set_m128i(xout2, xout0);
*k5 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x04>(&xout0, &xout1);
aes_genkey_sub<0x04>(&xout2, &xout3);
*k6 = _mm256_set_m128i(xout2, xout0);
*k7 = _mm256_set_m128i(xout3, xout1);
aes_genkey_sub<0x08>(&xout0, &xout1);
aes_genkey_sub<0x08>(&xout2, &xout3);
*k8 = _mm256_set_m128i(xout2, xout0);
*k9 = _mm256_set_m128i(xout3, xout1);
}
static FORCEINLINE void vaes_round(__m256i key, __m256i& x01, __m256i& x23, __m256i& x45, __m256i& x67)
{
x01 = _mm256_aesenc_epi128(x01, key);
x23 = _mm256_aesenc_epi128(x23, key);
x45 = _mm256_aesenc_epi128(x45, key);
x67 = _mm256_aesenc_epi128(x67, key);
}
static FORCEINLINE void vaes_round(__m256i key, __m256i& x0, __m256i& x1, __m256i& x2, __m256i& x3, __m256i& x4, __m256i& x5, __m256i& x6, __m256i& x7)
{
x0 = _mm256_aesenc_epi128(x0, key);
x1 = _mm256_aesenc_epi128(x1, key);
x2 = _mm256_aesenc_epi128(x2, key);
x3 = _mm256_aesenc_epi128(x3, key);
x4 = _mm256_aesenc_epi128(x4, key);
x5 = _mm256_aesenc_epi128(x5, key);
x6 = _mm256_aesenc_epi128(x6, key);
x7 = _mm256_aesenc_epi128(x7, key);
}
namespace xmrig {
NOINLINE void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
{
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
__m256i xin01, xin23, xin45, xin67;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input = reinterpret_cast<const __m128i*>(ctx->state);
__m256i* output = reinterpret_cast<__m256i*>(ctx->memory);
vaes_genkey(input, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
if (half_mem && !ctx->first_half) {
const __m256i* p = reinterpret_cast<const __m256i*>(ctx->save_state);
xin01 = _mm256_loadu_si256(p + 0);
xin23 = _mm256_loadu_si256(p + 1);
xin45 = _mm256_loadu_si256(p + 2);
xin67 = _mm256_loadu_si256(p + 3);
}
else {
xin01 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 4));
xin23 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 6));
xin45 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 8));
xin67 = _mm256_loadu_si256(reinterpret_cast<const __m256i*>(input + 10));
}
constexpr int output_increment = 64 / sizeof(__m256i);
constexpr int prefetch_dist = 2048 / sizeof(__m256i);
__m256i* e = output + N - prefetch_dist;
__m256i* prefetch_ptr = output + prefetch_dist;
for (int i = 0; i < 2; ++i) {
do {
_mm_prefetch((const char*)(prefetch_ptr), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr + output_increment), _MM_HINT_T0);
vaes_round(k0, xin01, xin23, xin45, xin67);
vaes_round(k1, xin01, xin23, xin45, xin67);
vaes_round(k2, xin01, xin23, xin45, xin67);
vaes_round(k3, xin01, xin23, xin45, xin67);
vaes_round(k4, xin01, xin23, xin45, xin67);
vaes_round(k5, xin01, xin23, xin45, xin67);
vaes_round(k6, xin01, xin23, xin45, xin67);
vaes_round(k7, xin01, xin23, xin45, xin67);
vaes_round(k8, xin01, xin23, xin45, xin67);
vaes_round(k9, xin01, xin23, xin45, xin67);
_mm256_store_si256(output + 0, xin01);
_mm256_store_si256(output + 1, xin23);
_mm256_store_si256(output + output_increment + 0, xin45);
_mm256_store_si256(output + output_increment + 1, xin67);
output += output_increment * 2;
prefetch_ptr += output_increment * 2;
} while (output < e);
e += prefetch_dist;
prefetch_ptr = output;
}
if (half_mem && ctx->first_half) {
__m256i* p = reinterpret_cast<__m256i*>(ctx->save_state);
_mm256_storeu_si256(p + 0, xin01);
_mm256_storeu_si256(p + 1, xin23);
_mm256_storeu_si256(p + 2, xin45);
_mm256_storeu_si256(p + 3, xin67);
}
_mm256_zeroupper();
}
NOINLINE void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
{
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
__m256i xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->state);
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->state);
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->memory);
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->memory);
vaes_genkey_double(input1, input2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
{
const bool b = half_mem && !ctx1->first_half && !ctx2->first_half;
const __m128i* p1 = b ? reinterpret_cast<const __m128i*>(ctx1->save_state) : (input1 + 4);
const __m128i* p2 = b ? reinterpret_cast<const __m128i*>(ctx2->save_state) : (input2 + 4);
xin0 = _mm256_loadu2_m128i(p2 + 0, p1 + 0);
xin1 = _mm256_loadu2_m128i(p2 + 1, p1 + 1);
xin2 = _mm256_loadu2_m128i(p2 + 2, p1 + 2);
xin3 = _mm256_loadu2_m128i(p2 + 3, p1 + 3);
xin4 = _mm256_loadu2_m128i(p2 + 4, p1 + 4);
xin5 = _mm256_loadu2_m128i(p2 + 5, p1 + 5);
xin6 = _mm256_loadu2_m128i(p2 + 6, p1 + 6);
xin7 = _mm256_loadu2_m128i(p2 + 7, p1 + 7);
}
constexpr int output_increment = 64 / sizeof(__m128i);
constexpr int prefetch_dist = 2048 / sizeof(__m128i);
__m128i* e = output1 + N - prefetch_dist;
__m128i* prefetch_ptr1 = output1 + prefetch_dist;
__m128i* prefetch_ptr2 = output2 + prefetch_dist;
for (int i = 0; i < 2; ++i) {
do {
_mm_prefetch((const char*)(prefetch_ptr1), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr1 + output_increment), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr2), _MM_HINT_T0);
_mm_prefetch((const char*)(prefetch_ptr2 + output_increment), _MM_HINT_T0);
vaes_round(k0, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k1, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k2, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k3, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k4, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k5, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k6, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k7, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k8, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
vaes_round(k9, xin0, xin1, xin2, xin3, xin4, xin5, xin6, xin7);
_mm256_storeu2_m128i(output2 + 0, output1 + 0, xin0);
_mm256_storeu2_m128i(output2 + 1, output1 + 1, xin1);
_mm256_storeu2_m128i(output2 + 2, output1 + 2, xin2);
_mm256_storeu2_m128i(output2 + 3, output1 + 3, xin3);
_mm256_storeu2_m128i(output2 + output_increment + 0, output1 + output_increment + 0, xin4);
_mm256_storeu2_m128i(output2 + output_increment + 1, output1 + output_increment + 1, xin5);
_mm256_storeu2_m128i(output2 + output_increment + 2, output1 + output_increment + 2, xin6);
_mm256_storeu2_m128i(output2 + output_increment + 3, output1 + output_increment + 3, xin7);
output1 += output_increment * 2;
prefetch_ptr1 += output_increment * 2;
output2 += output_increment * 2;
prefetch_ptr2 += output_increment * 2;
} while (output1 < e);
e += prefetch_dist;
prefetch_ptr1 = output1;
prefetch_ptr2 = output2;
}
if (half_mem && ctx1->first_half && ctx2->first_half) {
__m128i* p1 = reinterpret_cast<__m128i*>(ctx1->save_state);
__m128i* p2 = reinterpret_cast<__m128i*>(ctx2->save_state);
_mm256_storeu2_m128i(p2 + 0, p1 + 0, xin0);
_mm256_storeu2_m128i(p2 + 1, p1 + 1, xin1);
_mm256_storeu2_m128i(p2 + 2, p1 + 2, xin2);
_mm256_storeu2_m128i(p2 + 3, p1 + 3, xin3);
_mm256_storeu2_m128i(p2 + 4, p1 + 4, xin4);
_mm256_storeu2_m128i(p2 + 5, p1 + 5, xin5);
_mm256_storeu2_m128i(p2 + 6, p1 + 6, xin6);
_mm256_storeu2_m128i(p2 + 7, p1 + 7, xin7);
}
_mm256_zeroupper();
}
NOINLINE void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem)
{
const size_t N = (memory / sizeof(__m256i)) / (half_mem ? 2 : 1);
__m256i xout01, xout23, xout45, xout67;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m256i* input = reinterpret_cast<const __m256i*>(ctx->memory);
__m256i* output = reinterpret_cast<__m256i*>(ctx->state);
vaes_genkey(reinterpret_cast<__m128i*>(output) + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout01 = _mm256_loadu_si256(output + 2);
xout23 = _mm256_loadu_si256(output + 3);
xout45 = _mm256_loadu_si256(output + 4);
xout67 = _mm256_loadu_si256(output + 5);
const __m256i* input_begin = input;
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
if (half_mem && (part == 1)) {
input = input_begin;
ctx->first_half = false;
cn_explode_scratchpad_vaes(ctx, memory, half_mem);
}
for (size_t i = 0; i < N;) {
xout01 = _mm256_xor_si256(xout01, input[0]);
xout23 = _mm256_xor_si256(xout23, input[1]);
constexpr int input_increment = 64 / sizeof(__m256i);
xout45 = _mm256_xor_si256(xout45, input[input_increment]);
xout67 = _mm256_xor_si256(xout67, input[input_increment + 1]);
input += input_increment * 2;
i += 4;
if (i < N) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + input_increment), _MM_HINT_T0);
}
vaes_round(k0, xout01, xout23, xout45, xout67);
vaes_round(k1, xout01, xout23, xout45, xout67);
vaes_round(k2, xout01, xout23, xout45, xout67);
vaes_round(k3, xout01, xout23, xout45, xout67);
vaes_round(k4, xout01, xout23, xout45, xout67);
vaes_round(k5, xout01, xout23, xout45, xout67);
vaes_round(k6, xout01, xout23, xout45, xout67);
vaes_round(k7, xout01, xout23, xout45, xout67);
vaes_round(k8, xout01, xout23, xout45, xout67);
vaes_round(k9, xout01, xout23, xout45, xout67);
}
}
_mm256_storeu_si256(output + 2, xout01);
_mm256_storeu_si256(output + 3, xout23);
_mm256_storeu_si256(output + 4, xout45);
_mm256_storeu_si256(output + 5, xout67);
_mm256_zeroupper();
}
NOINLINE void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem)
{
const size_t N = (memory / sizeof(__m128i)) / (half_mem ? 2 : 1);
__m256i xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7;
__m256i k0, k1, k2, k3, k4, k5, k6, k7, k8, k9;
const __m128i* input1 = reinterpret_cast<const __m128i*>(ctx1->memory);
const __m128i* input2 = reinterpret_cast<const __m128i*>(ctx2->memory);
__m128i* output1 = reinterpret_cast<__m128i*>(ctx1->state);
__m128i* output2 = reinterpret_cast<__m128i*>(ctx2->state);
vaes_genkey_double(output1 + 2, output2 + 2, &k0, &k1, &k2, &k3, &k4, &k5, &k6, &k7, &k8, &k9);
xout0 = _mm256_loadu2_m128i(output2 + 4, output1 + 4);
xout1 = _mm256_loadu2_m128i(output2 + 5, output1 + 5);
xout2 = _mm256_loadu2_m128i(output2 + 6, output1 + 6);
xout3 = _mm256_loadu2_m128i(output2 + 7, output1 + 7);
xout4 = _mm256_loadu2_m128i(output2 + 8, output1 + 8);
xout5 = _mm256_loadu2_m128i(output2 + 9, output1 + 9);
xout6 = _mm256_loadu2_m128i(output2 + 10, output1 + 10);
xout7 = _mm256_loadu2_m128i(output2 + 11, output1 + 11);
const __m128i* input_begin1 = input1;
const __m128i* input_begin2 = input2;
for (size_t part = 0; part < (half_mem ? 2 : 1); ++part) {
if (half_mem && (part == 1)) {
input1 = input_begin1;
input2 = input_begin2;
ctx1->first_half = false;
ctx2->first_half = false;
cn_explode_scratchpad_vaes_double(ctx1, ctx2, memory, half_mem);
}
for (size_t i = 0; i < N;) {
xout0 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 0, input1 + 0), xout0);
xout1 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 1, input1 + 1), xout1);
xout2 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 2, input1 + 2), xout2);
xout3 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + 3, input1 + 3), xout3);
constexpr int input_increment = 64 / sizeof(__m128i);
xout4 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 0, input1 + input_increment + 0), xout4);
xout5 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 1, input1 + input_increment + 1), xout5);
xout6 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 2, input1 + input_increment + 2), xout6);
xout7 = _mm256_xor_si256(_mm256_loadu2_m128i(input2 + input_increment + 3, input1 + input_increment + 3), xout7);
input1 += input_increment * 2;
input2 += input_increment * 2;
i += 8;
if (i < N) {
_mm_prefetch((const char*)(input1), _MM_HINT_T0);
_mm_prefetch((const char*)(input1 + input_increment), _MM_HINT_T0);
_mm_prefetch((const char*)(input2), _MM_HINT_T0);
_mm_prefetch((const char*)(input2 + input_increment), _MM_HINT_T0);
}
vaes_round(k0, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k1, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k2, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k3, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k4, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k5, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k6, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k7, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k8, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
vaes_round(k9, xout0, xout1, xout2, xout3, xout4, xout5, xout6, xout7);
}
}
_mm256_storeu2_m128i(output2 + 4, output1 + 4, xout0);
_mm256_storeu2_m128i(output2 + 5, output1 + 5, xout1);
_mm256_storeu2_m128i(output2 + 6, output1 + 6, xout2);
_mm256_storeu2_m128i(output2 + 7, output1 + 7, xout3);
_mm256_storeu2_m128i(output2 + 8, output1 + 8, xout4);
_mm256_storeu2_m128i(output2 + 9, output1 + 9, xout5);
_mm256_storeu2_m128i(output2 + 10, output1 + 10, xout6);
_mm256_storeu2_m128i(output2 + 11, output1 + 11, xout7);
_mm256_zeroupper();
}
} // xmrig

View File

@@ -0,0 +1,48 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2019 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#ifndef XMRIG_CRYPTONIGHT_X86_VAES_H
#define XMRIG_CRYPTONIGHT_X86_VAES_H
#include "crypto/cn/CnAlgo.h"
struct cryptonight_ctx;
namespace xmrig {
void cn_explode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
void cn_explode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
void cn_implode_scratchpad_vaes(cryptonight_ctx* ctx, size_t memory, bool half_mem);
void cn_implode_scratchpad_vaes_double(cryptonight_ctx* ctx1, cryptonight_ctx* ctx2, size_t memory, bool half_mem);
} // xmrig
#endif /* XMRIG_CRYPTONIGHT_X86_VAES_H */

View File

@@ -0,0 +1,132 @@
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rbp
mov QWORD PTR [rsp+24], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 32
mov rdx, QWORD PTR [rcx]
mov r8, QWORD PTR [rcx+8]
mov r12d, 524288
movaps XMMWORD PTR [rsp+16], xmm6
mov rbx, QWORD PTR [rdx+32]
xor rbx, QWORD PTR [rdx]
mov rsi, QWORD PTR [rdx+40]
mov r10, rbx
xor rsi, QWORD PTR [rdx+8]
and r10d, 2097136
mov rdi, QWORD PTR [r8+32]
xor rdi, QWORD PTR [r8]
movq xmm3, rbx
mov rbp, QWORD PTR [r8+40]
mov r9, rdi
xor rbp, QWORD PTR [r8+8]
movq xmm0, rsi
mov rcx, QWORD PTR [rdx+56]
and r9d, 2097136
xor rcx, QWORD PTR [rdx+24]
movq xmm4, rdi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
mov r14, QWORD PTR [rdx+224]
mov r13, QWORD PTR [rdx+232]
mov r15, QWORD PTR [r8+224]
punpcklqdq xmm3, xmm0
movq xmm0, rbp
movq xmm5, rax
punpcklqdq xmm4, xmm0
mov rax, QWORD PTR [r8+48]
movq xmm0, rcx
xor rax, QWORD PTR [r8+16]
mov rcx, QWORD PTR [r8+56]
xor rcx, QWORD PTR [r8+24]
movdqu xmm1, XMMWORD PTR [r14+r10]
movq xmm6, rax
punpcklqdq xmm5, xmm0
mov rax, QWORD PTR [rdx+240]
movq xmm0, rcx
movdqu xmm2, XMMWORD PTR [r15+r9]
mov QWORD PTR [rsp], rax
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [rsp+8], rax
punpcklqdq xmm6, xmm0
ALIGN(64)
main_loop_cnv1_double:
aesenc xmm1, xmm3
aesenc xmm2, xmm4
movdqa xmm0, xmm1
movq r11, xmm2
pxor xmm0, xmm5
movdqa xmm5, xmm1
movq QWORD PTR [r14+r10], xmm0
pextrq rcx, xmm0, 1
mov eax, ecx
movdqa xmm0, xmm2
shr rax, 24
pxor xmm0, xmm6
movdqa xmm6, xmm2
mov eax, DWORD PTR [r13+rax*4]
xor rax, rcx
mov QWORD PTR [r14+r10+8], rax
movq QWORD PTR [r15+r9], xmm0
pextrq rcx, xmm0, 1
mov eax, ecx
shr rax, 24
mov eax, DWORD PTR [r13+rax*4]
xor rax, rcx
movq rcx, xmm1
mov QWORD PTR [r15+r9+8], rax
mov r9, rcx
and r9d, 2097136
mov r10, QWORD PTR [r14+r9]
mov r8, QWORD PTR [r14+r9+8]
mov rax, r10
mul rcx
add rsi, rax
add rbx, rdx
mov rax, QWORD PTR [rsp]
mov QWORD PTR [r14+r9], rbx
xor rax, rsi
mov QWORD PTR [r14+r9+8], rax
xor rsi, r8
xor rbx, r10
mov r8, r11
and r8d, 2097136
mov r10, rbx
and r10d, 2097136
movq xmm3, rbx
pinsrq xmm3, rsi, 1
mov r9, QWORD PTR [r15+r8]
mov rcx, QWORD PTR [r15+r8+8]
mov rax, r9
movdqu xmm1, XMMWORD PTR [r14+r10]
mul r11
add rbp, rax
add rdi, rdx
mov rax, QWORD PTR [rsp+8]
mov QWORD PTR [r15+r8], rdi
xor rax, rbp
xor rdi, r9
mov QWORD PTR [r15+r8+8], rax
mov r9, rdi
xor rbp, rcx
and r9d, 2097136
movq xmm4, rdi
pinsrq xmm4, rbp, 1
movdqu xmm2, XMMWORD PTR [r15+r9]
sub r12, 1
jne main_loop_cnv1_double
mov rbx, QWORD PTR [rsp+80]
mov rbp, QWORD PTR [rsp+88]
mov rsi, QWORD PTR [rsp+96]
movaps xmm6, XMMWORD PTR [rsp+16]
add rsp, 32
pop r15
pop r14
pop r13
pop r12
pop rdi

View File

@@ -0,0 +1,263 @@
mov rax, rsp
mov QWORD PTR [rax+8], rbx
mov QWORD PTR [rax+16], rbp
mov QWORD PTR [rax+24], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 144
mov r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
mov rbp, QWORD PTR [r8+224]
mov r13, QWORD PTR [r8+232]
mov r14, QWORD PTR [r9+224]
mov r15, QWORD PTR [r10+224]
mov r12, QWORD PTR [r11+224]
mov rcx, QWORD PTR [r8+40]
xor rcx, QWORD PTR [r8+8]
mov rbx, QWORD PTR [r8+32]
xor rbx, QWORD PTR [r8]
mov rdi, QWORD PTR [r9+32]
xor rdi, QWORD PTR [r9]
movq xmm0, rcx
mov rcx, QWORD PTR [r9+40]
xor rcx, QWORD PTR [r9+8]
movq xmm1, rbx
movaps XMMWORD PTR [rax-56], xmm6
movaps XMMWORD PTR [rax-72], xmm7
movaps XMMWORD PTR [rax-88], xmm8
movaps XMMWORD PTR [rax-104], xmm9
movaps XMMWORD PTR [rax-120], xmm10
movaps XMMWORD PTR [rsp+48], xmm11
movaps XMMWORD PTR [rsp+32], xmm12
and ebx, 2097136
mov rsi, QWORD PTR [r10+32]
movq xmm2, rdi
mov rax, QWORD PTR [r8+240]
and edi, 2097136
xor rsi, QWORD PTR [r10]
mov rdx, QWORD PTR [r8+56]
xor rdx, QWORD PTR [r8+24]
mov QWORD PTR [rsp], rax
mov rax, QWORD PTR [r9+240]
movq xmm3, rsi
mov QWORD PTR [rsp+8], rax
and esi, 2097136
mov rax, QWORD PTR [r10+240]
punpcklqdq xmm1, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [r10+40]
xor rcx, QWORD PTR [r10+8]
mov QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [r11+240]
punpcklqdq xmm2, xmm0
movq xmm0, rcx
mov QWORD PTR [rsp+24], rax
mov rcx, QWORD PTR [r11+40]
xor rcx, QWORD PTR [r11+8]
mov rax, QWORD PTR [r11+32]
xor rax, QWORD PTR [r11]
punpcklqdq xmm3, xmm0
movq xmm0, rcx
mov rcx, QWORD PTR [r8+48]
xor rcx, QWORD PTR [r8+16]
movq xmm4, rax
and eax, 2097136
punpcklqdq xmm4, xmm0
movq xmm0, rdx
mov rdx, QWORD PTR [r9+56]
xor rdx, QWORD PTR [r9+24]
movq xmm5, rcx
mov rcx, QWORD PTR [r9+48]
xor rcx, QWORD PTR [r9+16]
punpcklqdq xmm5, xmm0
movq xmm0, rdx
mov rdx, QWORD PTR [r10+56]
xor rdx, QWORD PTR [r10+24]
movq xmm6, rcx
mov rcx, QWORD PTR [r10+48]
xor rcx, QWORD PTR [r10+16]
punpcklqdq xmm6, xmm0
movq xmm0, rdx
mov rdx, QWORD PTR [r11+56]
movq xmm7, rcx
punpcklqdq xmm7, xmm0
xor rdx, QWORD PTR [r11+24]
mov rcx, QWORD PTR [r11+48]
xor rcx, QWORD PTR [r11+16]
mov r11d, 524288
movdqu xmm9, XMMWORD PTR [rbp+rbx]
movdqu xmm10, XMMWORD PTR [r14+rdi]
movq xmm0, rdx
movdqu xmm11, XMMWORD PTR [r15+rsi]
movdqu xmm12, XMMWORD PTR [r12+rax]
movq xmm8, rcx
punpcklqdq xmm8, xmm0
ALIGN(64)
main_loop_cnv1_quad:
aesenc xmm9, xmm1
aesenc xmm10, xmm2
aesenc xmm11, xmm3
aesenc xmm12, xmm4
movd ecx, xmm9
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+rbp]
movd ecx, xmm10
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r14]
movd ecx, xmm11
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r15]
movd ecx, xmm12
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r12]
movdqa xmm0, xmm9
pxor xmm0, xmm5
movdqa xmm5, xmm9
movq QWORD PTR [rbp+rbx], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm10
shr rcx, 24
pxor xmm0, xmm6
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [rbp+rbx+8], rcx
movq rbx, xmm1
movq QWORD PTR [r14+rdi], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm11
shr rcx, 24
pxor xmm0, xmm7
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r14+rdi+8], rcx
movq rdi, xmm2
movq QWORD PTR [r15+rsi], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm12
shr rcx, 24
pxor xmm0, xmm8
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r15+rsi+8], rcx
movq QWORD PTR [r12+rax], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
shr rcx, 24
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r12+rax+8], rcx
movq rcx, xmm9
mov r8, rcx
and r8d, 2097136
mov r9, QWORD PTR [rbp+r8]
mov r10, QWORD PTR [rbp+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm1, 1
add rcx, rax
add rbx, rdx
mov rax, QWORD PTR [rsp]
mov QWORD PTR [rbp+r8], rbx
xor rax, rcx
mov QWORD PTR [rbp+r8+8], rax
xor rcx, r10
xor rbx, r9
movq xmm1, rbx
and ebx, 2097136
pinsrq xmm1, rcx, 1
movq rcx, xmm10
mov r8, rcx
and r8d, 2097136
movdqu xmm9, XMMWORD PTR [rbp+rbx]
mov r9, QWORD PTR [r14+r8]
mov r10, QWORD PTR [r14+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm2, 1
add rcx, rax
add rdi, rdx
mov rax, QWORD PTR [rsp+8]
mov QWORD PTR [r14+r8], rdi
xor rax, rcx
xor rdi, r9
mov QWORD PTR [r14+r8+8], rax
xor rcx, r10
movq xmm2, rdi
and edi, 2097136
pinsrq xmm2, rcx, 1
movq rcx, xmm11
movq rsi, xmm3
mov r8, rcx
and r8d, 2097136
movdqa xmm6, xmm10
movdqa xmm7, xmm11
movdqa xmm8, xmm12
movdqu xmm10, XMMWORD PTR [r14+rdi]
mov r9, QWORD PTR [r15+r8]
mov r10, QWORD PTR [r15+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm3, 1
add rcx, rax
add rsi, rdx
mov rax, QWORD PTR [rsp+16]
xor rax, rcx
mov QWORD PTR [r15+r8], rsi
mov QWORD PTR [r15+r8+8], rax
xor rcx, r10
xor rsi, r9
movq xmm3, rsi
and esi, 2097136
pinsrq xmm3, rcx, 1
movq rcx, xmm12
mov r8, rcx
and r8d, 2097136
movdqu xmm11, XMMWORD PTR [r15+rsi]
mov r9, QWORD PTR [r12+r8]
mov r10, QWORD PTR [r12+r8+8]
mov rax, r9
mul rcx
mov rcx, rax
movq rax, xmm4
add rax, rdx
mov QWORD PTR [r12+r8], rax
xor rax, r9
pextrq rdx, xmm4, 1
add rdx, rcx
mov rcx, QWORD PTR [rsp+24]
xor rcx, rdx
xor rdx, r10
movq xmm4, rax
mov QWORD PTR [r12+r8+8], rcx
and eax, 2097136
pinsrq xmm4, rdx, 1
movdqu xmm12, XMMWORD PTR [r12+rax]
sub r11, 1
jne main_loop_cnv1_quad
movaps xmm7, XMMWORD PTR [rsp+112]
lea r11, QWORD PTR [rsp+144]
mov rbx, QWORD PTR [r11+48]
mov rbp, QWORD PTR [r11+56]
mov rsi, QWORD PTR [r11+64]
movaps xmm6, XMMWORD PTR [r11-16]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm9, XMMWORD PTR [r11-64]
movaps xmm10, XMMWORD PTR [r11-80]
movaps xmm11, XMMWORD PTR [r11-96]
movaps xmm12, XMMWORD PTR [r11-112]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi

View File

@@ -0,0 +1,66 @@
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rbp
mov QWORD PTR [rsp+24], rsi
mov QWORD PTR [rsp+32], rdi
push r13
push r14
push r15
mov rdx, QWORD PTR [rcx]
mov esi, 524288
mov r11, QWORD PTR [rdx+32]
xor r11, QWORD PTR [rdx]
mov rdi, QWORD PTR [rdx+224]
mov rbx, QWORD PTR [rdx+40]
xor rbx, QWORD PTR [rdx+8]
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
mov rbp, QWORD PTR [rdx+240]
mov r14, QWORD PTR [rdx+232]
movq xmm2, rax
pinsrq xmm2, rcx, 1
ALIGN(64)
main_loop_cnv1_single:
mov r8, r11
and r8d, 2097136
movdqu xmm1, XMMWORD PTR [rdi+r8]
movq xmm0, r11
pinsrq xmm0, rbx, 1
aesenc xmm1, xmm0
movq r15, xmm1
mov r9, r15
and r9d, 2097136
movdqa xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm2, xmm1
movq QWORD PTR [rdi+r8], xmm0
pextrq rdx, xmm0, 1
mov eax, edx
shr rax, 24
mov ecx, DWORD PTR [r14+rax*4]
xor rcx, rdx
mov QWORD PTR [rdi+r8+8], rcx
mov r10, QWORD PTR [rdi+r9]
mov r8, QWORD PTR [rdi+r9+8]
mov rax, r10
mul r15
add rbx, rax
add r11, rdx
mov QWORD PTR [rdi+r9], r11
mov rax, rbx
xor rbx, r8
xor r11, r10
xor rax, rbp
mov QWORD PTR [rdi+r9+8], rax
sub rsi, 1
jne main_loop_cnv1_single
pop r15
pop r14
pop r13
mov rbx, QWORD PTR [rsp+8]
mov rbp, QWORD PTR [rsp+16]
mov rsi, QWORD PTR [rsp+24]
mov rdi, QWORD PTR [rsp+32]

View File

@@ -11,6 +11,9 @@
# define FN_PREFIX(fn) fn
.section .text
#endif
.global FN_PREFIX(cnv1_single_mainloop_asm)
.global FN_PREFIX(cnv1_double_mainloop_asm)
.global FN_PREFIX(cnv1_quad_mainloop_asm)
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
.global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
@@ -19,6 +22,33 @@
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
ALIGN(64)
FN_PREFIX(cnv1_single_mainloop_asm):
sub rsp, 48
mov rcx, rdi
#include "cn1/cnv1_single_main_loop.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv1_double_mainloop_asm):
sub rsp, 48
mov rcx, rdi
#include "cn1/cnv1_double_main_loop.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv1_quad_mainloop_asm):
sub rsp, 48
mov rcx, rdi
#include "cn1/cnv1_quad_main_loop.inc"
add rsp, 48
ret 0
mov eax, 3735929054
ALIGN(64)
FN_PREFIX(cnv2_mainloop_ivybridge_asm):
sub rsp, 48

View File

@@ -1,4 +1,7 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv1_single_mainloop_asm
PUBLIC cnv1_double_mainloop_asm
PUBLIC cnv1_quad_mainloop_asm
PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
@@ -6,6 +9,27 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
PUBLIC cnv2_rwz_mainloop_asm
PUBLIC cnv2_rwz_double_mainloop_asm
ALIGN(64)
cnv1_single_mainloop_asm PROC
INCLUDE cn1/cnv1_single_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_single_mainloop_asm ENDP
ALIGN(64)
cnv1_double_mainloop_asm PROC
INCLUDE cn1/cnv1_double_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_double_mainloop_asm ENDP
ALIGN(64)
cnv1_quad_mainloop_asm PROC
INCLUDE cn1/cnv1_quad_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_quad_mainloop_asm ENDP
ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cn2/cnv2_main_loop_ivybridge.inc

View File

@@ -0,0 +1,132 @@
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rbp
mov QWORD PTR [rsp+24], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 32
mov rdx, QWORD PTR [rcx]
mov r8, QWORD PTR [rcx+8]
mov r12d, 524288
movaps XMMWORD PTR [rsp+16], xmm6
mov rbx, QWORD PTR [rdx+32]
xor rbx, QWORD PTR [rdx]
mov rsi, QWORD PTR [rdx+40]
mov r10, rbx
xor rsi, QWORD PTR [rdx+8]
and r10d, 2097136
mov rdi, QWORD PTR [r8+32]
xor rdi, QWORD PTR [r8]
movd xmm3, rbx
mov rbp, QWORD PTR [r8+40]
mov r9, rdi
xor rbp, QWORD PTR [r8+8]
movd xmm0, rsi
mov rcx, QWORD PTR [rdx+56]
and r9d, 2097136
xor rcx, QWORD PTR [rdx+24]
movd xmm4, rdi
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
mov r14, QWORD PTR [rdx+224]
mov r13, QWORD PTR [rdx+232]
mov r15, QWORD PTR [r8+224]
punpcklqdq xmm3, xmm0
movd xmm0, rbp
movd xmm5, rax
punpcklqdq xmm4, xmm0
mov rax, QWORD PTR [r8+48]
movd xmm0, rcx
xor rax, QWORD PTR [r8+16]
mov rcx, QWORD PTR [r8+56]
xor rcx, QWORD PTR [r8+24]
movdqu xmm1, XMMWORD PTR [r14+r10]
movd xmm6, rax
punpcklqdq xmm5, xmm0
mov rax, QWORD PTR [rdx+240]
movd xmm0, rcx
movdqu xmm2, XMMWORD PTR [r15+r9]
mov QWORD PTR [rsp], rax
mov rax, QWORD PTR [r8+240]
mov QWORD PTR [rsp+8], rax
punpcklqdq xmm6, xmm0
ALIGN(64)
main_loop_cnv1_double:
aesenc xmm1, xmm3
aesenc xmm2, xmm4
movdqa xmm0, xmm1
movd r11, xmm2
pxor xmm0, xmm5
movdqa xmm5, xmm1
movd QWORD PTR [r14+r10], xmm0
pextrq rcx, xmm0, 1
mov eax, ecx
movdqa xmm0, xmm2
shr rax, 24
pxor xmm0, xmm6
movdqa xmm6, xmm2
mov eax, DWORD PTR [r13+rax*4]
xor rax, rcx
mov QWORD PTR [r14+r10+8], rax
movd QWORD PTR [r15+r9], xmm0
pextrq rcx, xmm0, 1
mov eax, ecx
shr rax, 24
mov eax, DWORD PTR [r13+rax*4]
xor rax, rcx
movd rcx, xmm1
mov QWORD PTR [r15+r9+8], rax
mov r9, rcx
and r9d, 2097136
mov r10, QWORD PTR [r14+r9]
mov r8, QWORD PTR [r14+r9+8]
mov rax, r10
mul rcx
add rsi, rax
add rbx, rdx
mov rax, QWORD PTR [rsp]
mov QWORD PTR [r14+r9], rbx
xor rax, rsi
mov QWORD PTR [r14+r9+8], rax
xor rsi, r8
xor rbx, r10
mov r8, r11
and r8d, 2097136
mov r10, rbx
and r10d, 2097136
movd xmm3, rbx
pinsrq xmm3, rsi, 1
mov r9, QWORD PTR [r15+r8]
mov rcx, QWORD PTR [r15+r8+8]
mov rax, r9
movdqu xmm1, XMMWORD PTR [r14+r10]
mul r11
add rbp, rax
add rdi, rdx
mov rax, QWORD PTR [rsp+8]
mov QWORD PTR [r15+r8], rdi
xor rax, rbp
xor rdi, r9
mov QWORD PTR [r15+r8+8], rax
mov r9, rdi
xor rbp, rcx
and r9d, 2097136
movd xmm4, rdi
pinsrq xmm4, rbp, 1
movdqu xmm2, XMMWORD PTR [r15+r9]
sub r12, 1
jne main_loop_cnv1_double
mov rbx, QWORD PTR [rsp+80]
mov rbp, QWORD PTR [rsp+88]
mov rsi, QWORD PTR [rsp+96]
movaps xmm6, XMMWORD PTR [rsp+16]
add rsp, 32
pop r15
pop r14
pop r13
pop r12
pop rdi

View File

@@ -0,0 +1,263 @@
mov rax, rsp
mov QWORD PTR [rax+8], rbx
mov QWORD PTR [rax+16], rbp
mov QWORD PTR [rax+24], rsi
push rdi
push r12
push r13
push r14
push r15
sub rsp, 144
mov r8, QWORD PTR [rcx]
mov r9, QWORD PTR [rcx+8]
mov r10, QWORD PTR [rcx+16]
mov r11, QWORD PTR [rcx+24]
mov rbp, QWORD PTR [r8+224]
mov r13, QWORD PTR [r8+232]
mov r14, QWORD PTR [r9+224]
mov r15, QWORD PTR [r10+224]
mov r12, QWORD PTR [r11+224]
mov rcx, QWORD PTR [r8+40]
xor rcx, QWORD PTR [r8+8]
mov rbx, QWORD PTR [r8+32]
xor rbx, QWORD PTR [r8]
mov rdi, QWORD PTR [r9+32]
xor rdi, QWORD PTR [r9]
movd xmm0, rcx
mov rcx, QWORD PTR [r9+40]
xor rcx, QWORD PTR [r9+8]
movd xmm1, rbx
movaps XMMWORD PTR [rax-56], xmm6
movaps XMMWORD PTR [rax-72], xmm7
movaps XMMWORD PTR [rax-88], xmm8
movaps XMMWORD PTR [rax-104], xmm9
movaps XMMWORD PTR [rax-120], xmm10
movaps XMMWORD PTR [rsp+48], xmm11
movaps XMMWORD PTR [rsp+32], xmm12
and ebx, 2097136
mov rsi, QWORD PTR [r10+32]
movd xmm2, rdi
mov rax, QWORD PTR [r8+240]
and edi, 2097136
xor rsi, QWORD PTR [r10]
mov rdx, QWORD PTR [r8+56]
xor rdx, QWORD PTR [r8+24]
mov QWORD PTR [rsp], rax
mov rax, QWORD PTR [r9+240]
movd xmm3, rsi
mov QWORD PTR [rsp+8], rax
and esi, 2097136
mov rax, QWORD PTR [r10+240]
punpcklqdq xmm1, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [r10+40]
xor rcx, QWORD PTR [r10+8]
mov QWORD PTR [rsp+16], rax
mov rax, QWORD PTR [r11+240]
punpcklqdq xmm2, xmm0
movd xmm0, rcx
mov QWORD PTR [rsp+24], rax
mov rcx, QWORD PTR [r11+40]
xor rcx, QWORD PTR [r11+8]
mov rax, QWORD PTR [r11+32]
xor rax, QWORD PTR [r11]
punpcklqdq xmm3, xmm0
movd xmm0, rcx
mov rcx, QWORD PTR [r8+48]
xor rcx, QWORD PTR [r8+16]
movd xmm4, rax
and eax, 2097136
punpcklqdq xmm4, xmm0
movd xmm0, rdx
mov rdx, QWORD PTR [r9+56]
xor rdx, QWORD PTR [r9+24]
movd xmm5, rcx
mov rcx, QWORD PTR [r9+48]
xor rcx, QWORD PTR [r9+16]
punpcklqdq xmm5, xmm0
movd xmm0, rdx
mov rdx, QWORD PTR [r10+56]
xor rdx, QWORD PTR [r10+24]
movd xmm6, rcx
mov rcx, QWORD PTR [r10+48]
xor rcx, QWORD PTR [r10+16]
punpcklqdq xmm6, xmm0
movd xmm0, rdx
mov rdx, QWORD PTR [r11+56]
movd xmm7, rcx
punpcklqdq xmm7, xmm0
xor rdx, QWORD PTR [r11+24]
mov rcx, QWORD PTR [r11+48]
xor rcx, QWORD PTR [r11+16]
mov r11d, 524288
movdqu xmm9, XMMWORD PTR [rbp+rbx]
movdqu xmm10, XMMWORD PTR [r14+rdi]
movd xmm0, rdx
movdqu xmm11, XMMWORD PTR [r15+rsi]
movdqu xmm12, XMMWORD PTR [r12+rax]
movd xmm8, rcx
punpcklqdq xmm8, xmm0
ALIGN(64)
main_loop_cnv1_quad:
aesenc xmm9, xmm1
aesenc xmm10, xmm2
aesenc xmm11, xmm3
aesenc xmm12, xmm4
movd ecx, xmm9
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+rbp]
movd ecx, xmm10
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r14]
movd ecx, xmm11
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r15]
movd ecx, xmm12
and ecx, 2097136
prefetcht0 BYTE PTR [rcx+r12]
movdqa xmm0, xmm9
pxor xmm0, xmm5
movdqa xmm5, xmm9
movd QWORD PTR [rbp+rbx], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm10
shr rcx, 24
pxor xmm0, xmm6
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [rbp+rbx+8], rcx
movd rbx, xmm1
movd QWORD PTR [r14+rdi], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm11
shr rcx, 24
pxor xmm0, xmm7
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r14+rdi+8], rcx
movd rdi, xmm2
movd QWORD PTR [r15+rsi], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
movdqa xmm0, xmm12
shr rcx, 24
pxor xmm0, xmm8
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r15+rsi+8], rcx
movd QWORD PTR [r12+rax], xmm0
pextrq rdx, xmm0, 1
mov ecx, edx
shr rcx, 24
mov ecx, DWORD PTR [r13+rcx*4]
xor rcx, rdx
mov QWORD PTR [r12+rax+8], rcx
movd rcx, xmm9
mov r8, rcx
and r8d, 2097136
mov r9, QWORD PTR [rbp+r8]
mov r10, QWORD PTR [rbp+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm1, 1
add rcx, rax
add rbx, rdx
mov rax, QWORD PTR [rsp]
mov QWORD PTR [rbp+r8], rbx
xor rax, rcx
mov QWORD PTR [rbp+r8+8], rax
xor rcx, r10
xor rbx, r9
movd xmm1, rbx
and ebx, 2097136
pinsrq xmm1, rcx, 1
movd rcx, xmm10
mov r8, rcx
and r8d, 2097136
movdqu xmm9, XMMWORD PTR [rbp+rbx]
mov r9, QWORD PTR [r14+r8]
mov r10, QWORD PTR [r14+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm2, 1
add rcx, rax
add rdi, rdx
mov rax, QWORD PTR [rsp+8]
mov QWORD PTR [r14+r8], rdi
xor rax, rcx
xor rdi, r9
mov QWORD PTR [r14+r8+8], rax
xor rcx, r10
movd xmm2, rdi
and edi, 2097136
pinsrq xmm2, rcx, 1
movd rcx, xmm11
movd rsi, xmm3
mov r8, rcx
and r8d, 2097136
movdqa xmm6, xmm10
movdqa xmm7, xmm11
movdqa xmm8, xmm12
movdqu xmm10, XMMWORD PTR [r14+rdi]
mov r9, QWORD PTR [r15+r8]
mov r10, QWORD PTR [r15+r8+8]
mov rax, r9
mul rcx
pextrq rcx, xmm3, 1
add rcx, rax
add rsi, rdx
mov rax, QWORD PTR [rsp+16]
xor rax, rcx
mov QWORD PTR [r15+r8], rsi
mov QWORD PTR [r15+r8+8], rax
xor rcx, r10
xor rsi, r9
movd xmm3, rsi
and esi, 2097136
pinsrq xmm3, rcx, 1
movd rcx, xmm12
mov r8, rcx
and r8d, 2097136
movdqu xmm11, XMMWORD PTR [r15+rsi]
mov r9, QWORD PTR [r12+r8]
mov r10, QWORD PTR [r12+r8+8]
mov rax, r9
mul rcx
mov rcx, rax
movd rax, xmm4
add rax, rdx
mov QWORD PTR [r12+r8], rax
xor rax, r9
pextrq rdx, xmm4, 1
add rdx, rcx
mov rcx, QWORD PTR [rsp+24]
xor rcx, rdx
xor rdx, r10
movd xmm4, rax
mov QWORD PTR [r12+r8+8], rcx
and eax, 2097136
pinsrq xmm4, rdx, 1
movdqu xmm12, XMMWORD PTR [r12+rax]
sub r11, 1
jne main_loop_cnv1_quad
movaps xmm7, XMMWORD PTR [rsp+112]
lea r11, QWORD PTR [rsp+144]
mov rbx, QWORD PTR [r11+48]
mov rbp, QWORD PTR [r11+56]
mov rsi, QWORD PTR [r11+64]
movaps xmm6, XMMWORD PTR [r11-16]
movaps xmm8, XMMWORD PTR [r11-48]
movaps xmm9, XMMWORD PTR [r11-64]
movaps xmm10, XMMWORD PTR [r11-80]
movaps xmm11, XMMWORD PTR [r11-96]
movaps xmm12, XMMWORD PTR [r11-112]
mov rsp, r11
pop r15
pop r14
pop r13
pop r12
pop rdi

View File

@@ -0,0 +1,66 @@
mov QWORD PTR [rsp+8], rbx
mov QWORD PTR [rsp+16], rbp
mov QWORD PTR [rsp+24], rsi
mov QWORD PTR [rsp+32], rdi
push r13
push r14
push r15
mov rdx, QWORD PTR [rcx]
mov esi, 524288
mov r11, QWORD PTR [rdx+32]
xor r11, QWORD PTR [rdx]
mov rdi, QWORD PTR [rdx+224]
mov rbx, QWORD PTR [rdx+40]
xor rbx, QWORD PTR [rdx+8]
mov rcx, QWORD PTR [rdx+56]
xor rcx, QWORD PTR [rdx+24]
mov rax, QWORD PTR [rdx+48]
xor rax, QWORD PTR [rdx+16]
mov rbp, QWORD PTR [rdx+240]
mov r14, QWORD PTR [rdx+232]
movd xmm2, rax
pinsrq xmm2, rcx, 1
ALIGN(64)
main_loop_cnv1_single:
mov r8, r11
and r8d, 2097136
movdqu xmm1, XMMWORD PTR [rdi+r8]
movd xmm0, r11
pinsrq xmm0, rbx, 1
aesenc xmm1, xmm0
movd r15, xmm1
mov r9, r15
and r9d, 2097136
movdqa xmm0, xmm1
pxor xmm0, xmm2
movdqa xmm2, xmm1
movd QWORD PTR [rdi+r8], xmm0
pextrq rdx, xmm0, 1
mov eax, edx
shr rax, 24
mov ecx, DWORD PTR [r14+rax*4]
xor rcx, rdx
mov QWORD PTR [rdi+r8+8], rcx
mov r10, QWORD PTR [rdi+r9]
mov r8, QWORD PTR [rdi+r9+8]
mov rax, r10
mul r15
add rbx, rax
add r11, rdx
mov QWORD PTR [rdi+r9], r11
mov rax, rbx
xor rbx, r8
xor r11, r10
xor rax, rbp
mov QWORD PTR [rdi+r9+8], rax
sub rsi, 1
jne main_loop_cnv1_single
pop r15
pop r14
pop r13
mov rbx, QWORD PTR [rsp+8]
mov rbp, QWORD PTR [rsp+16]
mov rsi, QWORD PTR [rsp+24]
mov rdi, QWORD PTR [rsp+32]

View File

@@ -1,6 +1,9 @@
#define ALIGN(x) .align 64
.intel_syntax noprefix
.section .text
.global cnv1_single_mainloop_asm
.global cnv1_double_mainloop_asm
.global cnv1_quad_mainloop_asm
.global cnv2_mainloop_ivybridge_asm
.global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm
@@ -9,6 +12,24 @@
.global cnv2_rwz_double_mainloop_asm
.global cnv2_upx_double_mainloop_zen3_asm
ALIGN(64)
cnv1_single_mainloop_asm:
#include "../cn1/cnv1_single_main_loop.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv1_double_mainloop_asm:
#include "../cn1/cnv1_double_main_loop.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv1_quad_mainloop_asm:
#include "../cn1/cnv1_quad_main_loop.inc"
ret 0
mov eax, 3735929054
ALIGN(64)
cnv2_mainloop_ivybridge_asm:
#include "../cn2/cnv2_main_loop_ivybridge.inc"

View File

@@ -1,4 +1,7 @@
_TEXT_CNV2_MAINLOOP SEGMENT PAGE READ EXECUTE
PUBLIC cnv1_single_mainloop_asm
PUBLIC cnv1_double_mainloop_asm
PUBLIC cnv1_quad_mainloop_asm
PUBLIC cnv2_mainloop_ivybridge_asm
PUBLIC cnv2_mainloop_ryzen_asm
PUBLIC cnv2_mainloop_bulldozer_asm
@@ -6,28 +9,49 @@ PUBLIC cnv2_double_mainloop_sandybridge_asm
PUBLIC cnv2_rwz_mainloop_asm
PUBLIC cnv2_rwz_double_mainloop_asm
ALIGN 64
ALIGN(64)
cnv1_single_mainloop_asm PROC
INCLUDE cn1/cnv1_single_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_single_mainloop_asm ENDP
ALIGN(64)
cnv1_double_mainloop_asm PROC
INCLUDE cn1/cnv1_double_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_double_mainloop_asm ENDP
ALIGN(64)
cnv1_quad_mainloop_asm PROC
INCLUDE cn1/cnv1_quad_main_loop.inc
ret 0
mov eax, 3735929054
cnv1_quad_mainloop_asm ENDP
ALIGN(64)
cnv2_mainloop_ivybridge_asm PROC
INCLUDE cn2/cnv2_main_loop_ivybridge.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ivybridge_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_mainloop_ryzen_asm PROC
INCLUDE cn2/cnv2_main_loop_ryzen.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_ryzen_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_mainloop_bulldozer_asm PROC
INCLUDE cn2/cnv2_main_loop_bulldozer.inc
ret 0
mov eax, 3735929054
cnv2_mainloop_bulldozer_asm ENDP
ALIGN 64
ALIGN(64)
cnv2_double_mainloop_sandybridge_asm PROC
INCLUDE cn2/cnv2_double_main_loop_sandybridge.inc
ret 0

View File

@@ -42,14 +42,40 @@ set(SOURCES
ghostrider.cpp
)
if (CMAKE_C_COMPILER_ID MATCHES GNU)
# gcc 11.2.0 crashes with -ftree-vrp
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vrp")
# gcc 11.2.0 creates incorrect code with -O3
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-O2")
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Ofast -Wno-unused-const-variable")
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "/O1 /Oi /Os")
elseif (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(sph_blake.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_bmw.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_cubehash.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_echo.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_fugue.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_groestl.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_hamsi.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_jh.c PROPERTIES COMPILE_FLAGS "-Os -fno-tree-vrp")
set_source_files_properties(sph_keccak.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_luffa.c PROPERTIES COMPILE_FLAGS "-Os -Wno-unused-const-variable")
set_source_files_properties(sph_shabal.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_shavite.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_simd.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_sha2.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_skein.c PROPERTIES COMPILE_FLAGS "-Os")
set_source_files_properties(sph_whirlpool.c PROPERTIES COMPILE_FLAGS "-Os")
endif()
include_directories(.)

View File

@@ -4,8 +4,6 @@
No tuning is required - auto-config works well on most CPUs!
**Note for Windows users: MSVC binary is ~5% faster than GCC binary!**
### Sample command line (non-SSL port)
```
xmrig -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS
@@ -18,22 +16,24 @@ xmrig -a gr -o us.flockpool.com:5555 --tls -u WALLET_ADDRESS
You can use **rtm_ghostrider_example.cmd** as a template and put pool URL and your wallet address there. The general XMRig documentation is available [here](https://xmrig.com/docs/miner).
**Using `--threads` or `-t` option is NOT recommended because it turns off advanced built-in config.** If you want to tweak the nubmer of threads used for GhostRider, it's recommended to start using config.json instead of command line. The best suitable command line option for this is `--cpu-max-threads-hint=N` where N can be between 0 and 100.
## Performance
While individual algorithm implementations are a bit unoptimized, XMRig achieves higher hashrates by employing better auto-config and more fine-grained thread scheduling: it can calculate a single batch of hashes using 2 threads for parts that don't require much cache. For example, on a typical Intel CPU (2 MB cache per core) it will use 1 thread per core for cn/fast, and 2 threads per core for other Cryptonight variants while calculating the same batch of hashes, always achieving more than 50% CPU load.
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system. Also, this is why it reports using only half the threads at startup - it's actually 2 threads per each reported thread.
For the same reason, XMRig can sometimes use less than 100% CPU on Ryzen 3000/5000 CPUs if it finds that running 1 thread per core is faster for some Cryptonight variants on your system.
**Windows** (detailed results [here](https://imgur.com/a/GCjEWpl))
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (MSVC build), h/s|Speedup
**Windows** (detailed results [here](https://imgur.com/a/0njIVVW))
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.2 (MSVC build), h/s|Speedup
-|-|-|-
AMD Ryzen 7 4700U|632.6|731|+15.5%
Intel Core i7-2600|496.4|533.6|+7.5%
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2469.1|+0.65%
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2221.2|+5.1%
AMD Ryzen 7 4700U|632.6|733.1|+15.89%
Intel Core i7-2600|496.4|554.6|+11.72%
AMD Ryzen 7 3700X @ 4.1 GHz|2453.0|2496.5|+1.77%
AMD Ryzen 5 5600X @ 4.65 GHz|2112.6|2337.5|+10.65%
**Linux** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
CPU|cpuminer-gr-avx2 (tuned), h/s|XMRig (GCC build), h/s|Speedup
**Linux (outdated)** (tested by **Delgon**, detailed results [here](https://cdn.discordapp.com/attachments/604375870236524574/913167614749048872/unknown.png))
CPU|cpuminer-gr-avx2 1.2.4.1 (tuned), h/s|XMRig v6.16.0 (GCC build), h/s|Speedup
-|-|-|-
AMD Ryzen 9 3900X|3746.51|3604.89|-3.78%
2xIntel Xeon E5-2698v3|2563.4|2638.38|+2.925%

View File

@@ -36,6 +36,7 @@
#include "base/io/log/Log.h"
#include "base/io/log/Tags.h"
#include "base/tools/Chrono.h"
#include "backend/cpu/Cpu.h"
#include "crypto/cn/CnHash.h"
#include "crypto/cn/CnCtx.h"
@@ -44,7 +45,6 @@
#include <thread>
#include <atomic>
#include <chrono>
#include <uv.h>
#ifdef XMRIG_FEATURE_HWLOC
@@ -328,8 +328,6 @@ void benchmark()
LOG_VERBOSE("%24s | N | Hashrate", "Algorithm");
LOG_VERBOSE("-------------------------|-----|-------------");
using namespace std::chrono;
for (uint32_t algo = 0; algo < 6; ++algo) {
for (uint64_t step : { 1, 2, 4}) {
const size_t cur_scratchpad_size = cn_sizes[algo] * step;
@@ -339,26 +337,26 @@ void benchmark()
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
double start_time = Chrono::highResolutionMSecs();
double min_dt = 1e10;
for (uint32_t iter = 0;; ++iter) {
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
double t1 = Chrono::highResolutionMSecs();
// Stop after 15 milliseconds, but only if at least 10 iterations were done
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 15)) {
if ((iter >= 10) && (t1 - start_time >= 15.0)) {
break;
}
f(buf, sizeof(buf), hash, ctx, 0);
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
const double dt = Chrono::highResolutionMSecs() - t1;
if (dt < min_dt) {
min_dt = dt;
}
}
const double hashrate = step / min_dt;
const double hashrate = step * 1e3 / min_dt;
LOG_VERBOSE("%24s | %" PRIu64 "x1 | %.2f h/s", cn_names[algo], step, hashrate);
if (hashrate > tune8MB[algo].hashrate) {
@@ -388,14 +386,14 @@ void benchmark()
auto f = CnHash::fn(cn_hash[algo], av[step], Assembly::AUTO);
const high_resolution_clock::time_point start_time = high_resolution_clock::now();
double start_time = Chrono::highResolutionMSecs();
double min_dt = 1e10;
for (uint32_t iter = 0;; ++iter) {
const high_resolution_clock::time_point t1 = high_resolution_clock::now();
double t1 = Chrono::highResolutionMSecs();
// Stop after 30 milliseconds, but only if at least 10 iterations were done
if ((iter >= 10) && (duration_cast<milliseconds>(t1 - start_time).count() >= 30)) {
if ((iter >= 10) && (t1 - start_time >= 30.0)) {
break;
}
@@ -403,13 +401,13 @@ void benchmark()
f(buf, sizeof(buf), hash, ctx, 0);
helper->wait();
const double dt = duration_cast<nanoseconds>(high_resolution_clock::now() - t1).count() / 1e9;
const double dt = Chrono::highResolutionMSecs() - t1;
if (dt < min_dt) {
min_dt = dt;
}
}
const double hashrate = step * 2.0 / min_dt * 1.0075;
const double hashrate = step * 2e3 / min_dt * 1.0075;
LOG_VERBOSE("%24s | %" PRIu64 "x2 | %.2f h/s", cn_names[algo], step, hashrate);
if (hashrate > tune8MB[algo].hashrate) {
@@ -538,7 +536,7 @@ void destroy_helper_thread(HelperThread* t)
}
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper)
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose)
{
enum { N = 8 };
@@ -554,6 +552,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
uint32_t cn_indices[6];
select_indices(cn_indices, data + 4);
if (verbose) {
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
@@ -561,6 +560,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
}
}
}
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;
const AlgoTune* tune = (helper && helper->m_is8MB) ? tune8MB : tuneDefault;
@@ -765,7 +765,7 @@ HelperThread* create_helper_thread(int64_t, const std::vector<int64_t>&) { retur
void destroy_helper_thread(HelperThread*) {}
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*)
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread*, bool verbose)
{
constexpr uint32_t N = 8;
@@ -784,6 +784,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
uint32_t step[6] = { 4, 4, 1, 2, 4, 4 };
#endif
if (verbose) {
static uint32_t prev_indices[3];
if (memcmp(cn_indices, prev_indices, sizeof(prev_indices)) != 0) {
memcpy(prev_indices, cn_indices, sizeof(prev_indices));
@@ -791,6 +792,7 @@ void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ct
LOG_INFO("%s GhostRider algo %d: %s", Tags::cpu(), i + 1, cn_names[cn_indices[i]]);
}
}
}
const CnHash::AlgoVariant* av = Cpu::info()->hasAES() ? av_hw_aes : av_soft_aes;

View File

@@ -41,7 +41,7 @@ struct HelperThread;
void benchmark();
HelperThread* create_helper_thread(int64_t cpu_index, const std::vector<int64_t>& affinities);
void destroy_helper_thread(HelperThread* t);
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper);
void hash_octa(const uint8_t* data, size_t size, uint8_t* output, cryptonight_ctx** ctx, HelperThread* helper, bool verbose = true);
} // namespace ghostrider

View File

@@ -382,7 +382,7 @@ void SelectSoftAESImpl(size_t threadsCount)
double fast_speed = 0.0;
for (size_t run = 0; run < 3; ++run) {
for (size_t i = 0; i < impl.size(); ++i) {
const uint64_t t1 = xmrig::Chrono::highResolutionMSecs();
const double t1 = xmrig::Chrono::highResolutionMSecs();
std::vector<uint32_t> count(threadsCount, 0);
std::vector<std::thread> threads;
for (size_t t = 0; t < threadsCount; ++t) {
@@ -401,7 +401,7 @@ void SelectSoftAESImpl(size_t threadsCount)
threads[t].join();
total += count[t];
}
const uint64_t t2 = xmrig::Chrono::highResolutionMSecs();
const double t2 = xmrig::Chrono::highResolutionMSecs();
const double speed = total * 1e3 / (t2 - t1);
if (speed > fast_speed) {
fast_idx = i;

View File

@@ -295,7 +295,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
INST_HANDLE(IMUL_R, ISUB_M);
INST_HANDLE(IMUL_M, IMUL_R);
#if defined(_M_X64) || defined(__x86_64__)
#if defined(XMRIG_FEATURE_ASM) && (defined(_M_X64) || defined(__x86_64__))
if (hasBMI2) {
INST_HANDLE2(IMULH_R, IMULH_R_BMI2, IMUL_M);
INST_HANDLE2(IMULH_M, IMULH_M_BMI2, IMULH_R);
@@ -337,7 +337,7 @@ typedef void(randomx::JitCompilerX86::* InstructionGeneratorX86_2)(const randomx
INST_HANDLE(CBRANCH, FSQRT_R);
#endif
#if defined(_M_X64) || defined(__x86_64__)
#if defined(XMRIG_FEATURE_ASM) && (defined(_M_X64) || defined(__x86_64__))
if (hasBMI2) {
INST_HANDLE2(CFROUND, CFROUND_BMI2, CBRANCH);
}

View File

@@ -22,7 +22,7 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "6.16.0"
#define APP_VERSION "6.16.2"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
@@ -30,7 +30,7 @@
#define APP_VER_MAJOR 6
#define APP_VER_MINOR 16
#define APP_VER_PATCH 0
#define APP_VER_PATCH 2
#ifdef _MSC_VER
# if (_MSC_VER >= 1920)