1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-08 16:33:32 -05:00

Compare commits

..

38 Commits

Author SHA1 Message Date
XMRig
8bfaddd3fc v6.8.2 2021-02-12 18:47:16 +07:00
XMRig
dabafaaadb Merge branch 'dev' 2021-02-12 18:46:41 +07:00
xmrig
5cda714254 Update CHANGELOG.md 2021-02-12 18:35:43 +07:00
xmrig
91151ce4a1 Merge pull request #2089 from SChernykh/dev
Optimized cn-heavy for Zen3
2021-02-08 16:24:16 +07:00
SChernykh
dc1443f3b8 Cryptonight: add prefetching to interleaved mode 2021-02-07 23:29:54 +01:00
SChernykh
8af8df25aa Optimized cn-heavy for Zen3
- Uses scratchpad interleaving to access only the closest L3 slice from each CPU core.
- Also activates MSR mod for cn-heavy because CPU prefetchers get confused with interleaving
- 7-8% speedup on Zen3
2021-02-07 22:05:11 +01:00
XMRig
b1e14dc1d3 Always disable kawpow for CPU backend. 2021-02-07 18:49:54 +07:00
XMRig
f460d76f8d Add missing option to config example. 2021-02-06 16:17:53 +07:00
xmrig
1c63e9efba Merge pull request #2080 from SChernykh/dev 2021-02-04 04:29:59 +07:00
SChernykh
21abbe4e84 Fix compile error in Termux 2021-02-03 19:05:05 +01:00
XMRig
3080f47cd6 v6.8.2-dev 2021-02-03 18:01:14 +07:00
XMRig
f4ebdaa8e5 Merge branch 'master' into dev 2021-02-03 18:00:42 +07:00
XMRig
1bcfd0cdea v6.8.1 2021-02-03 07:00:39 +07:00
XMRig
9396ecf93d Merge branch 'dev' 2021-02-03 06:57:11 +07:00
xmrig
a4af964696 Update CHANGELOG.md 2021-02-03 06:04:30 +07:00
XMRig
2c8d8ee2ab Fixed macOS build and compile warning. 2021-02-02 13:53:45 +07:00
xmrig
631a8ca802 Merge pull request #2077 from SChernykh/dev
Fix for illegal instruction crash on ARM
2021-02-02 04:57:36 +07:00
SChernykh
346892e170 Update jit_compiler_a64.cpp 2021-02-01 22:52:02 +01:00
SChernykh
db03573804 ARM JIT: added missing cache flush 2021-02-01 22:42:35 +01:00
SChernykh
e74573f81f Fixed code allocation for ARM 2021-02-01 22:36:11 +01:00
xmrig
0e70974d7d Merge pull request #2076 from xmrig/feature-flexible-hugepages
Added support for flexible huge page sizes on Linux.
2021-02-02 04:07:41 +07:00
xmrig
3a3ee91324 Merge pull request #2075 from SChernykh/dev
Fixed crashes on ARM
2021-02-02 03:06:58 +07:00
SChernykh
4108428872 Fixed crashes on ARM 2021-02-01 17:07:45 +01:00
XMRig
4c3425a958 Added "--hugepage-size" command line option. 2021-02-01 05:06:24 +07:00
XMRig
09624c4f9b Added support for flexible huge page sizes on Linux. 2021-01-31 23:38:57 +07:00
XMRig
8faef28e7d Detect Apple M1 on Linux. 2021-01-31 05:41:32 +07:00
XMRig
62450f4ed8 Update ARM CPUs names. 2021-01-31 03:53:22 +07:00
XMRig
2c52a5a352 #2066 Fixed AMD GPUs health data readings. 2021-01-30 02:42:59 +07:00
XMRig
7d52bd7454 Extend normalization rules. 2021-01-29 18:22:24 +07:00
XMRig
f68b105bd9 Normalize DMI memory slot name. 2021-01-29 04:23:50 +07:00
XMRig
9ca1a6129b #2066 Quick fix for AMD GPUs health data. 2021-01-29 01:23:35 +07:00
xmrig
7a3df1c0bb Merge pull request #2067 from SChernykh/dev
Fix compilation error when RandomX and Argon2 are disabled
2021-01-28 20:44:03 +07:00
SChernykh
22a1b8d82d Fix compilation error when RandomX and Argon2 are disabled 2021-01-28 14:38:28 +01:00
xmrig
0a462fbef5 Merge pull request #2064 from SChernykh/dev
Added documentation for config.json CPU options
2021-01-28 19:41:15 +07:00
SChernykh
f302b4b0ef Added documentation for config.json CPU options 2021-01-28 13:37:27 +01:00
XMRig
65fe26dc6c Don't print empty memory slots if the total count above 8. 2021-01-28 00:00:00 +07:00
XMRig
e6d4921e21 v6.8.1-dev 2021-01-26 16:40:10 +07:00
XMRig
f82d67e76e Merge branch 'master' into dev 2021-01-26 16:38:37 +07:00
37 changed files with 558 additions and 304 deletions

View File

@@ -1,3 +1,14 @@
# v6.8.2
- [#2080](https://github.com/xmrig/xmrig/pull/2080) Fixed compile error in Termux.
- [#2089](https://github.com/xmrig/xmrig/pull/2089) Optimized CryptoNight-Heavy for Zen3, 7-8% speedup.
# v6.8.1
- [#2064](https://github.com/xmrig/xmrig/pull/2064) Added documentation for config.json CPU options.
- [#2066](https://github.com/xmrig/xmrig/issues/2066) Fixed AMD GPUs health data readings on Linux.
- [#2067](https://github.com/xmrig/xmrig/pull/2067) Fixed compilation error when RandomX and Argon2 are disabled.
- [#2076](https://github.com/xmrig/xmrig/pull/2076) Added support for flexible huge page sizes on Linux.
- [#2077](https://github.com/xmrig/xmrig/pull/2077) Fixed `illegal instruction` crash on ARM.
# v6.8.0
- [#2052](https://github.com/xmrig/xmrig/pull/2052) Added DMI/SMBIOS reader.
- Added information about memory modules on the miner startup and for online benchmark.

View File

@@ -75,6 +75,35 @@ Each number represent one thread and means CPU affinity, this is default format
```
Internal format, but can be user defined.
## RandomX options
#### `init`
Thread count to initialize RandomX dataset. Auto-detect (`-1`) or any number greater than 0 to use that many threads.
#### `init-avx2`
Use AVX2 for dataset initialization. Faster on some CPUs. Auto-detect (`-1`), disabled (`0`), always enabled on CPUs that support AVX2 (`1`).
#### `mode`
RandomX mining mode: `auto`, `fast` (2 GB memory), `light` (256 MB memory).
#### `1gb-pages`
Use 1GB hugepages for RandomX dataset (Linux only). Enabled (`true`) or disabled (`false`). It gives 1-3% speedup.
#### `rdmsr`
Restore MSR register values to their original values on exit. Used together with `wrmsr`. Enabled (`true`) or disabled (`false`).
#### `wrmsr`
[MSR mod](https://xmrig.com/docs/miner/randomx-optimization-guide/msr). Enabled (`true`) or disabled (`false`). It gives up to 15% speedup depending on your system.
#### `cache_qos`
[Cache QoS](https://xmrig.com/docs/miner/randomx-optimization-guide/qos). Enabled (`true`) or disabled (`false`). It's useful when you can't or don't want to mine on all CPU cores to make mining hashrate more stable.
#### `numa`
NUMA support (better hashrate on multi-CPU servers and Ryzen Threadripper 1xxx/2xxx). Enabled (`true`) or disabled (`false`).
#### `scratchpad_prefetch_mode`
Which instruction to use in RandomX loop to prefetch data from scratchpad. `1` is default and fastest in most cases. Can be off (`0`), `prefetcht0` instruction (`1`), `prefetchnta` instruction (`2`, a bit faster on Coffee Lake and a few other CPUs), `mov` instruction (`3`).
## Shared options
#### `enabled`
@@ -83,23 +112,32 @@ Enable (`true`) or disable (`false`) CPU backend, by default `true`.
#### `huge-pages`
Enable (`true`) or disable (`false`) huge pages support, by default `true`.
#### `huge-pages-jit`
Enable (`true`) or disable (`false`) huge pages support for RandomX JIT code, by default `false`. It gives a very small boost on Ryzen CPUs, but hashrate is unstable between launches. Use with caution.
#### `hw-aes`
Force enable (`true`) or disable (`false`) hardware AES support. Default value `null` means miner autodetect this feature. Usually don't need change this option, this option useful for some rare cases when miner can't detect hardware AES, but it available. If you force enable this option, but your hardware not support it, miner will crash.
#### `priority`
Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all.
Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all. Setting priority higher than 2 can make your PC unresponsive.
#### `memory-pool` (since v4.3.0)
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm swithing. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid loosing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
#### `yield` (since v5.1.1)
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.
#### `asm`
Enable/configure or disable ASM optimizations. Possible values: `true`, `false`, `"intel"`, `"ryzen"`, `"bulldozer"`.
#### `argon2-impl` (since v3.1.0)
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards, if you CPU not support required instuctions, miner will crash.
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instuctions, miner will crash.
#### `astrobwt-max-size`
AstroBWT algorithm: skip hashes with large stage 2 size, default: `550`, min: `400`, max: `1200`. Optimal value depends on your CPU/GPU
#### `astrobwt-avx2`
AstroBWT algorithm: use AVX2 code. It's faster on some CPUs and slower on other
#### `max-threads-hint` (since v4.2.0)
Maximum CPU threads count (in percentage) hint for autoconfig. [CPU_MAX_USAGE.md](CPU_MAX_USAGE.md)
#### `memory-pool` (since v4.3.0)
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm swithing. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages.
#### `yield` (since v5.1.1)
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.

View File

@@ -1,5 +0,0 @@
6bb1a2e3a0fbca5195be6022f2a9fbff8a353c37c7542e7ab89420cb45b64505 xmrig-5.0.1-gcc-win32.zip
24dba9ec281acfb2ea2c401ebd0e4e2d1f1ee5fd557da5ff3c7049020c1f78b6 xmrig-5.0.1-gcc-win64.zip
86d65c6693ec9e35cd7547329580638b85c9eb0cf8383892a1c15199de5b556f xmrig-5.0.1-msvc-cuda10_1-win64.zip
0fbfe518b1c4b6993b0f66ff01302626375b15620ccf8f64d6fb97845068ffca xmrig-5.0.1-msvc-win64.zip
aa34890738a3494de2fa0e44db346937fea7339852f5f10b5d4655f95e2d8f1f xmrig-5.0.1-xenial-x64.tar.gz

View File

@@ -1,11 +0,0 @@
-----BEGIN PGP SIGNATURE-----
iQEzBAABCgAdFiEEmsTOqOZuNaXHzdwbRGpTY4vpRAkFAl3VcsoACgkQRGpTY4vp
RAm9vQgA1MyTUU2jley2TCYLUzQy2Fffc8fbXYv64r44jbWOjC/6qo2iIlRgPhIc
oVyPKr5TYS3QjDzCEm8IvozS0YudS6soESbPzqDonboK8pd0K4bsML9TQY2feV7A
NL5vln0rfVHp1wxLLrQpfBqAgvJUXEyaHece6gFQN79JOGhEo2bHL2NyrOl+FViS
b2BaMtXq410Fh+XT6ShnOaG/2EuO8ZqSGdCO6A/2LHQw1UY+mZiCvue6P6B06HmB
WD/urOv38V389v+V+Sp4UlEW6VpBOOjvtChoVWtLt+tKzydrnt2EmoWWWg475pka
4G6whHuMWS8CTt5/PDhJpvVXNQTIOw==
=C764
-----END PGP SIGNATURE-----

View File

@@ -146,7 +146,7 @@ static void print_memory(const Config *config)
return;
}
const bool vm = Cpu::info()->isVM();
const bool printEmpty = reader.memory().size() <= 8;
for (const auto &memory : reader.memory()) {
if (!memory.isValid()) {
@@ -155,14 +155,14 @@ static void print_memory(const Config *config)
if (memory.size()) {
Log::print(WHITE_BOLD(" %-13s") "%s: " CYAN_BOLD("%" PRIu64) CYAN(" GB ") WHITE_BOLD("%s @ %" PRIu64 " MHz ") BLACK_BOLD("%s"),
"", memory.slot().data(), memory.size() / oneGiB, memory.type(), memory.speed() / 1000000ULL, memory.product().data());
"", memory.id().data(), memory.size() / oneGiB, memory.type(), memory.speed() / 1000000ULL, memory.product().data());
}
else if (!vm) {
else if (printEmpty) {
Log::print(WHITE_BOLD(" %-13s") "%s: " BLACK_BOLD("<empty>"), "", memory.slot().data());
}
}
const auto &board = vm ? reader.system() : reader.board();
const auto &board = Cpu::info()->isVM() ? reader.system() : reader.board();
if (board.isValid()) {
Log::print(GREEN_BOLD(" * ") WHITE_BOLD("%-13s") WHITE_BOLD("%s") " - " WHITE_BOLD("%s"), "MOTHERBOARD", board.vendor().data(), board.product().data());

View File

@@ -1,13 +1,6 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018 Lee Clagett <https://github.com/vtnerd>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -42,10 +35,15 @@ public:
PciTopology() = default;
PciTopology(uint32_t bus, uint32_t device, uint32_t function) : m_valid(true), m_bus(bus), m_device(device), m_function(function) {}
inline bool isValid() const { return m_valid; }
inline uint8_t bus() const { return m_bus; }
inline uint8_t device() const { return m_device; }
inline uint8_t function() const { return m_function; }
inline bool isEqual(const PciTopology &other) const { return m_valid == other.m_valid && toUint32() == other.toUint32(); }
inline bool isValid() const { return m_valid; }
inline uint8_t bus() const { return m_bus; }
inline uint8_t device() const { return m_device; }
inline uint8_t function() const { return m_function; }
inline bool operator!=(const PciTopology &other) const { return !isEqual(other); }
inline bool operator<(const PciTopology &other) const { return toUint32() < other.toUint32(); }
inline bool operator==(const PciTopology &other) const { return isEqual(other); }
String toString() const
{
@@ -60,6 +58,8 @@ public:
}
private:
inline uint32_t toUint32() const { return m_bus << 16 | m_device << 8 | m_function; }
bool m_valid = false;
uint8_t m_bus = 0;
uint8_t m_device = 0;

View File

@@ -81,6 +81,7 @@ public:
inline void start(const std::vector<CpuLaunchData> &threads, size_t memory)
{
m_workersMemory.clear();
m_hugePages.reset();
m_memory = memory;
m_started = 0;
@@ -95,8 +96,10 @@ public:
if (ready) {
m_started++;
m_hugePages += worker->memory()->hugePages();
m_ways += worker->intensity();
if (m_workersMemory.insert(worker->memory()).second) {
m_hugePages += worker->memory()->hugePages();
}
m_ways += worker->intensity();
}
else {
m_errors++;
@@ -126,6 +129,7 @@ public:
}
private:
std::set<const VirtualMemory*> m_workersMemory;
HugePagesInfo m_hugePages;
size_t m_errors = 0;
size_t m_memory = 0;

View File

@@ -1,12 +1,6 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -77,7 +71,7 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
Value obj(kObjectType);
obj.AddMember(StringRef(kEnabled), m_enabled, allocator);
obj.AddMember(StringRef(kHugePages), m_hugePages, allocator);
obj.AddMember(StringRef(kHugePages), m_hugePageSize == 0 || m_hugePageSize == kDefaultHugePageSizeKb ? Value(isHugePages()) : Value(static_cast<uint32_t>(m_hugePageSize)), allocator);
obj.AddMember(StringRef(kHugePagesJit), m_hugePagesJit, allocator);
obj.AddMember(StringRef(kHwAes), m_aes == AES_AUTO ? Value(kNullType) : Value(m_aes == AES_HW), allocator);
obj.AddMember(StringRef(kPriority), priority() != -1 ? Value(priority()) : Value(kNullType), allocator);
@@ -109,12 +103,16 @@ rapidjson::Value xmrig::CpuConfig::toJSON(rapidjson::Document &doc) const
size_t xmrig::CpuConfig::memPoolSize() const
{
return m_memoryPool < 0 ? Cpu::info()->threads() : m_memoryPool;
return m_memoryPool < 0 ? std::max(Cpu::info()->threads(), Cpu::info()->L3() >> 21) : m_memoryPool;
}
std::vector<xmrig::CpuLaunchData> xmrig::CpuConfig::get(const Miner *miner, const Algorithm &algorithm) const
{
if (algorithm.family() == Algorithm::KAWPOW) {
return {};
}
std::vector<CpuLaunchData> out;
const auto &threads = m_threads.get(algorithm);
@@ -137,14 +135,14 @@ void xmrig::CpuConfig::read(const rapidjson::Value &value)
{
if (value.IsObject()) {
m_enabled = Json::getBool(value, kEnabled, m_enabled);
m_hugePages = Json::getBool(value, kHugePages, m_hugePages);
m_hugePagesJit = Json::getBool(value, kHugePagesJit, m_hugePagesJit);
m_limit = Json::getUint(value, kMaxThreadsHint, m_limit);
m_yield = Json::getBool(value, kYield, m_yield);
setAesMode(Json::getValue(value, kHwAes));
setPriority(Json::getInt(value, kPriority, -1));
setHugePages(Json::getValue(value, kHugePages));
setMemoryPool(Json::getValue(value, kMemoryPool));
setPriority(Json::getInt(value, kPriority, -1));
# ifdef XMRIG_FEATURE_ASM
m_assembly = Json::getValue(value, kAsm);
@@ -218,6 +216,19 @@ void xmrig::CpuConfig::setAesMode(const rapidjson::Value &value)
}
void xmrig::CpuConfig::setHugePages(const rapidjson::Value &value)
{
if (value.IsBool()) {
m_hugePageSize = value.GetBool() ? kDefaultHugePageSizeKb : 0U;
}
else if (value.IsUint()) {
const uint32_t size = value.GetUint();
m_hugePageSize = size < kOneGbPageSizeKb ? size : kDefaultHugePageSizeKb;
}
}
void xmrig::CpuConfig::setMemoryPool(const rapidjson::Value &value)
{
if (value.IsBool()) {

View File

@@ -1,12 +1,6 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -75,8 +69,9 @@ public:
std::vector<CpuLaunchData> get(const Miner *miner, const Algorithm &algorithm) const;
void read(const rapidjson::Value &value);
inline bool astrobwtAVX2() const { return m_astrobwtAVX2; }
inline bool isEnabled() const { return m_enabled; }
inline bool isHugePages() const { return m_hugePages; }
inline bool isHugePages() const { return m_hugePageSize > 0; }
inline bool isHugePagesJit() const { return m_hugePagesJit; }
inline bool isShouldSave() const { return m_shouldSave; }
inline bool isYield() const { return m_yield; }
@@ -84,13 +79,17 @@ public:
inline const String &argon2Impl() const { return m_argon2Impl; }
inline const Threads<CpuThreads> &threads() const { return m_threads; }
inline int astrobwtMaxSize() const { return m_astrobwtMaxSize; }
inline bool astrobwtAVX2() const { return m_astrobwtAVX2; }
inline int priority() const { return m_priority; }
inline size_t hugePageSize() const { return m_hugePageSize * 1024U; }
inline uint32_t limit() const { return m_limit; }
private:
constexpr static size_t kDefaultHugePageSizeKb = 2048U;
constexpr static size_t kOneGbPageSizeKb = 1048576U;
void generate();
void setAesMode(const rapidjson::Value &value);
void setHugePages(const rapidjson::Value &value);
void setMemoryPool(const rapidjson::Value &value);
inline void setPriority(int priority) { m_priority = (priority >= -1 && priority <= 5) ? priority : -1; }
@@ -99,13 +98,13 @@ private:
Assembly m_assembly;
bool m_astrobwtAVX2 = false;
bool m_enabled = true;
bool m_hugePages = true;
bool m_hugePagesJit = false;
bool m_shouldSave = false;
bool m_yield = true;
int m_astrobwtMaxSize = 550;
int m_memoryPool = 0;
int m_priority = -1;
size_t m_hugePageSize = kDefaultHugePageSizeKb;
String m_argon2Impl;
Threads<CpuThreads> m_threads;
uint32_t m_limit = 100;

View File

@@ -19,8 +19,10 @@
#include <cassert>
#include <thread>
#include <mutex>
#include "backend/cpu/Cpu.h"
#include "backend/cpu/CpuWorker.h"
#include "base/tools/Chrono.h"
#include "core/config/Config.h"
@@ -55,6 +57,12 @@ namespace xmrig {
static constexpr uint32_t kReserveCount = 32768;
#ifdef XMRIG_ALGO_CN_HEAVY
static std::mutex cn_heavyZen3MemoryMutex;
VirtualMemory* cn_heavyZen3Memory = nullptr;
#endif
} // namespace xmrig
@@ -73,7 +81,20 @@ xmrig::CpuWorker<N>::CpuWorker(size_t id, const CpuLaunchData &data) :
m_threads(data.threads),
m_ctx()
{
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if ((N == 1) && (m_av == CnHash::AV_SINGLE) && (m_algorithm.family() == Algorithm::CN_HEAVY) && (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3)) {
std::lock_guard<std::mutex> lock(cn_heavyZen3MemoryMutex);
if (!cn_heavyZen3Memory) {
cn_heavyZen3Memory = new VirtualMemory(m_algorithm.l3() * m_threads, data.hugePages, false, false, node());
}
m_memory = cn_heavyZen3Memory;
}
else
# endif
{
m_memory = new VirtualMemory(m_algorithm.l3() * N, data.hugePages, false, true, node());
}
}
@@ -85,7 +106,13 @@ xmrig::CpuWorker<N>::~CpuWorker()
# endif
CnCtx::release(m_ctx, N);
delete m_memory;
# ifdef XMRIG_ALGO_CN_HEAVY
if (m_memory != cn_heavyZen3Memory)
# endif
{
delete m_memory;
}
}
@@ -387,7 +414,16 @@ template<size_t N>
void xmrig::CpuWorker<N>::allocateCnCtx()
{
if (m_ctx[0] == nullptr) {
CnCtx::create(m_ctx, m_memory->scratchpad(), m_algorithm.l3(), N);
int shift = 0;
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if (m_memory == cn_heavyZen3Memory) {
shift = (id() / 8) * m_algorithm.l3() * 8 + (id() % 8) * 64;
}
# endif
CnCtx::create(m_ctx, m_memory->scratchpad() + shift, m_algorithm.l3(), N);
}
}

View File

@@ -363,10 +363,14 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
return;
}
std::vector<std::pair<int64_t, int32_t>> threads_data;
threads_data.reserve(cores.size());
size_t pu_id = 0;
while (cacheHashes > 0 && PUs > 0) {
bool allocated_pu = false;
threads_data.clear();
for (hwloc_obj_t core : cores) {
const std::vector<hwloc_obj_t> units = findByType(core, HWLOC_OBJ_PU);
if (units.size() <= pu_id) {
@@ -377,18 +381,31 @@ void xmrig::HwlocCpuInfo::processTopLevelCache(hwloc_obj_t cache, const Algorith
PUs--;
allocated_pu = true;
threads.add(units[pu_id]->os_index, intensity);
threads_data.emplace_back(units[pu_id]->os_index, intensity);
if (cacheHashes == 0) {
break;
}
}
// Reversing of "threads_data" and "cores" is done to fill in virtual cores starting from the last one, but still in order
// For example, cn-heavy threads on 6-core Zen2/Zen3 will have affinity [0,2,4,6,8,10,9,11]
// This is important for Zen3 cn-heavy optimization
if (pu_id & 1) {
std::reverse(threads_data.begin(), threads_data.end());
}
for (const auto& t : threads_data) {
threads.add(t.first, t.second);
}
if (!allocated_pu) {
break;
}
pu_id++;
std::reverse(cores.begin(), cores.end());
}
# endif
}

View File

@@ -1,7 +1,7 @@
/* XMRig
* Copyright (c) 2018 Riku Voipio <riku.voipio@iki.fi>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -87,16 +87,22 @@ static const id_part arm_part[] = {
{ 0xd03, "Cortex-A53" },
{ 0xd04, "Cortex-A35" },
{ 0xd05, "Cortex-A55" },
{ 0xd06, "Cortex-A65" },
{ 0xd07, "Cortex-A57" },
{ 0xd08, "Cortex-A72" },
{ 0xd09, "Cortex-A73" },
{ 0xd0a, "Cortex-A75" },
{ 0xd0b, "Cortex-A76" },
{ 0xd0c, "Neoverse-N1" },
{ 0xd0d, "Cortex-A77" },
{ 0xd0e, "Cortex-A76AE" },
{ 0xd13, "Cortex-R52" },
{ 0xd20, "Cortex-M23" },
{ 0xd21, "Cortex-M33" },
{ 0xd41, "Cortex-A78" },
{ 0xd42, "Cortex-A78AE" },
{ 0xd4a, "Neoverse-E1" },
{ 0xd4b, "Cortex-A78C" },
{ -1, nullptr },
};
@@ -150,6 +156,7 @@ static const id_part samsung_part[] = {
static const id_part nvidia_part[] = {
{ 0x000, "Denver" },
{ 0x003, "Denver 2" },
{ 0x004, "Carmel" },
{ -1, nullptr },
};
@@ -191,23 +198,36 @@ static const id_part intel_part[] = {
{ -1, nullptr },
};
static const struct id_part fujitsu_part[] = {
{ 0x001, "A64FX" },
{ -1, "unknown" },
};
static const id_part hisi_part[] = {
{ 0xd01, "Kunpeng-920" }, /* aka tsv110 */
{ -1, nullptr },
};
static const id_part apple_part[] = {
{ 0x022, "M1" },
{ 0x023, "M1" },
{ -1, nullptr },
};
static const hw_impl hw_implementer[] = {
{ 0x41, arm_part, "ARM" },
{ 0x42, brcm_part, "Broadcom" },
{ 0x43, cavium_part, "Cavium" },
{ 0x44, dec_part, "DEC" },
{ 0x46, fujitsu_part, "FUJITSU" },
{ 0x48, hisi_part, "HiSilicon" },
{ 0x4e, nvidia_part, "Nvidia" },
{ 0x50, apm_part, "APM" },
{ 0x51, qcom_part, "Qualcomm" },
{ 0x53, samsung_part, "Samsung" },
{ 0x56, marvell_part, "Marvell" },
{ 0x61, apple_part, "Apple" },
{ 0x66, faraday_part, "Faraday" },
{ 0x69, intel_part, "Intel" }
};

View File

@@ -1,8 +1,8 @@
/* XMRig
* Copyright 2008-2018 Advanced Micro Devices, Inc.
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright 2020 Patrick Bollinger <https://github.com/pjbollinger>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2008-2018 Advanced Micro Devices, Inc.
* Copyright (c) 2020 Patrick Bollinger <https://github.com/pjbollinger>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -20,10 +20,13 @@
#include "backend/opencl/wrappers/AdlLib.h"
#include "3rdparty/fmt/core.h"
#include "backend/opencl/wrappers/OclDevice.h"
#include <dirent.h>
#include <fstream>
#include <map>
#include <string>
#include <sys/stat.h>
#include <sys/types.h>
@@ -35,18 +38,27 @@ namespace xmrig {
bool AdlLib::m_initialized = false;
bool AdlLib::m_ready = false;
static const std::string kPrefix = "/sys/bus/pci/drivers/amdgpu/";
static std::map<PciTopology, std::string> hwmon_cache;
static inline bool sysfs_is_file(const std::string &path)
static inline bool sysfs_is_file(const char *path)
{
struct stat sb;
return stat(path.c_str(), &sb) == 0 && ((sb.st_mode & S_IFMT) == S_IFREG);
return stat(path, &sb) == 0 && ((sb.st_mode & S_IFMT) == S_IFREG);
}
static inline bool sysfs_is_amdgpu(const std::string &path)
static inline int dir_filter(const struct dirent *dirp)
{
return strlen(dirp->d_name) > 5 ? 1 : 0;
}
static bool sysfs_is_amdgpu(const char *path, char *buf, const char *filename)
{
strcpy(buf, filename);
if (!sysfs_is_file(path)) {
return false;
}
@@ -63,8 +75,10 @@ static inline bool sysfs_is_amdgpu(const std::string &path)
}
uint32_t sysfs_read(const std::string &path)
static uint32_t sysfs_read(const char *path, char *buf, const char *filename)
{
strcpy(buf, filename);
std::ifstream file(path);
if (!file.is_open()) {
return 0;
@@ -77,18 +91,44 @@ uint32_t sysfs_read(const std::string &path)
}
static inline std::string sysfs_prefix(const PciTopology &topology)
static size_t sysfs_prefix(char path[PATH_MAX], const PciTopology &topology)
{
const std::string path = kPrefix + "0000:" + topology.toString().data() + "/hwmon/hwmon";
const auto it = hwmon_cache.find(topology);
if (it != hwmon_cache.end()) {
strcpy(path, it->second.data());
for (uint32_t i = 1; i < 10; ++i) {
const std::string prefix = path + std::to_string(i) + "/";
if (sysfs_is_amdgpu(prefix + "name") && (sysfs_read(prefix + "temp1_input") || sysfs_read(prefix + "power1_average"))) {
return prefix;
}
return it->second.size();
}
return {};
char *base = fmt::format_to(path, "{}0000:{}/hwmon/", kPrefix, topology.toString());
*base = '\0';
char *end = nullptr;
struct dirent **namelist;
int n = scandir(path, &namelist, dir_filter, nullptr);
if (n < 0) {
return {};
}
while (n--) {
if (!end) {
char *tmp = fmt::format_to(base, "{}/", namelist[n]->d_name);
end = (sysfs_is_amdgpu(path, tmp, "name") && (sysfs_read(path, tmp, "temp1_input") || sysfs_read(path, tmp, "power1_average"))) ? tmp : nullptr;
}
free(namelist[n]);
}
free(namelist);
if (end) {
*end = '\0';
hwmon_cache.insert({ topology, path });
return end - path;
}
return 0;
}
@@ -124,20 +164,22 @@ AdlHealth xmrig::AdlLib::health(const OclDevice &device)
return {};
}
const auto prefix = sysfs_prefix(device.topology());
if (prefix.empty()) {
static char path[PATH_MAX]{};
char *buf = path + sysfs_prefix(path, device.topology());
if (buf == path) {
return {};
}
AdlHealth health;
health.clock = sysfs_read(prefix + "freq1_input") / 1000000;
health.memClock = sysfs_read(prefix + "freq2_input") / 1000000;
health.power = sysfs_read(prefix + "power1_average") / 1000000;
health.rpm = sysfs_read(prefix + "fan1_input");
health.temperature = sysfs_read(prefix + "temp2_input") / 1000;
health.clock = sysfs_read(path, buf, "freq1_input") / 1000000;
health.memClock = sysfs_read(path, buf, "freq2_input") / 1000000;
health.power = sysfs_read(path, buf, "power1_average") / 1000000;
health.rpm = sysfs_read(path, buf, "fan1_input");
health.temperature = sysfs_read(path, buf, "temp2_input") / 1000;
if (!health.temperature) {
health.temperature = sysfs_read(prefix + "temp1_input") / 1000;
health.temperature = sysfs_read(path, buf, "temp1_input") / 1000;
}
return health;

View File

@@ -85,6 +85,7 @@ public:
BenchHashKey = 1047,
BenchTokenKey = 1048,
DmiKey = 1049,
HugePageSizeKey = 1050,
// xmrig common
CPUPriorityKey = 1021,

View File

@@ -37,9 +37,9 @@
"asm": true,
"argon2-impl": null,
"astrobwt-max-size": 550,
"astrobwt-avx2": false,
"cn/0": false,
"cn-lite/0": false,
"kawpow": false
"cn-lite/0": false
},
"opencl": {
"enabled": false,

View File

@@ -50,7 +50,7 @@ int xmrig::Controller::init()
{
Base::init();
VirtualMemory::init(config()->cpu().memPoolSize(), config()->cpu().isHugePages());
VirtualMemory::init(config()->cpu().memPoolSize(), config()->cpu().hugePageSize());
m_network = std::make_shared<Network>(this);

View File

@@ -125,9 +125,10 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
BaseTransform::transform(doc, key, arg);
switch (key) {
case IConfig::AVKey: /* --av */
case IConfig::CPUPriorityKey: /* --cpu-priority */
case IConfig::ThreadsKey: /* --threads */
case IConfig::AVKey: /* --av */
case IConfig::CPUPriorityKey: /* --cpu-priority */
case IConfig::ThreadsKey: /* --threads */
case IConfig::HugePageSizeKey: /* --hugepage-size */
return transformUint64(doc, key, static_cast<uint64_t>(strtol(arg, nullptr, 10)));
case IConfig::HugePagesKey: /* --no-huge-pages */
@@ -149,8 +150,10 @@ void xmrig::ConfigTransform::transform(rapidjson::Document &doc, int key, const
case IConfig::YieldKey: /* --cpu-no-yield */
return set(doc, CpuConfig::kField, CpuConfig::kYield, false);
# ifdef XMRIG_ALGO_ARGON2
case IConfig::Argon2ImplKey: /* --argon2-impl */
return set(doc, CpuConfig::kField, CpuConfig::kArgon2Impl, arg);
# endif
# ifdef XMRIG_FEATURE_ASM
case IConfig::AssemblyKey: /* --asm */
@@ -304,6 +307,9 @@ void xmrig::ConfigTransform::transformUint64(rapidjson::Document &doc, int key,
case IConfig::CPUPriorityKey: /* --cpu-priority */
return set(doc, CpuConfig::kField, CpuConfig::kPriority, arg);
case IConfig::HugePageSizeKey: /* --hugepage-size */
return set(doc, CpuConfig::kField, CpuConfig::kHugePages, arg);
default:
break;
}

View File

@@ -67,9 +67,9 @@ R"===(
"asm": true,
"argon2-impl": null,
"astrobwt-max-size": 550,
"astrobwt-avx2": false,
"cn/0": false,
"cn-lite/0": false,
"kawpow": false
"cn-lite/0": false
},
"opencl": {
"enabled": false,

View File

@@ -71,6 +71,8 @@ static const option options[] = {
{ "nicehash", 0, nullptr, IConfig::NicehashKey },
{ "no-color", 0, nullptr, IConfig::ColorKey },
{ "no-huge-pages", 0, nullptr, IConfig::HugePagesKey },
{ "no-hugepages", 0, nullptr, IConfig::HugePagesKey },
{ "hugepage-size", 1, nullptr, IConfig::HugePageSizeKey },
{ "pass", 1, nullptr, IConfig::PasswordKey },
{ "print-time", 1, nullptr, IConfig::PrintTimeKey },
{ "retries", 1, nullptr, IConfig::RetriesKey },

View File

@@ -83,6 +83,9 @@ static inline const std::string &usage()
u += " --cpu-memory-pool=N number of 2 MB pages for persistent memory pool, -1 (auto), 0 (disable)\n";
u += " --cpu-no-yield prefer maximum hashrate rather than system response/stability\n";
u += " --no-huge-pages disable huge pages support\n";
# ifdef XMRIG_OS_LINUX
u += " --hugepage-size=N custom hugepage size in kB\n";
# endif
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
# if defined(__x86_64__) || defined(_M_AMD64)
@@ -155,7 +158,7 @@ static inline const std::string &usage()
u += " -l, --log-file=FILE log all output to a file\n";
u += " --print-time=N print hashrate report every N seconds\n";
# ifdef XMRIG_FEATURE_NVML
# if defined(XMRIG_FEATURE_NVML) || defined(XMRIG_FEATURE_ADL)
u += " --health-print-time=N print health report every N seconds\n";
# endif
u += " --no-color disable colored output\n";

View File

@@ -49,15 +49,15 @@
#define ADD_FN(algo) \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true>; \
m_map[algo][AV_DOUBLE][Assembly::NONE] = cryptonight_double_hash<algo, false>; \
m_map[algo][AV_DOUBLE_SOFT][Assembly::NONE] = cryptonight_double_hash<algo, true>; \
m_map[algo][AV_TRIPLE][Assembly::NONE] = cryptonight_triple_hash<algo, false>; \
m_map[algo][AV_TRIPLE_SOFT][Assembly::NONE] = cryptonight_triple_hash<algo, true>; \
m_map[algo][AV_QUAD][Assembly::NONE] = cryptonight_quad_hash<algo, false>; \
m_map[algo][AV_QUAD_SOFT][Assembly::NONE] = cryptonight_quad_hash<algo, true>; \
m_map[algo][AV_PENTA][Assembly::NONE] = cryptonight_penta_hash<algo, false>; \
m_map[algo][AV_SINGLE][Assembly::NONE] = cryptonight_single_hash<algo, false, 0>; \
m_map[algo][AV_SINGLE_SOFT][Assembly::NONE] = cryptonight_single_hash<algo, true, 0>; \
m_map[algo][AV_DOUBLE][Assembly::NONE] = cryptonight_double_hash<algo, false>; \
m_map[algo][AV_DOUBLE_SOFT][Assembly::NONE] = cryptonight_double_hash<algo, true>; \
m_map[algo][AV_TRIPLE][Assembly::NONE] = cryptonight_triple_hash<algo, false>; \
m_map[algo][AV_TRIPLE_SOFT][Assembly::NONE] = cryptonight_triple_hash<algo, true>; \
m_map[algo][AV_QUAD][Assembly::NONE] = cryptonight_quad_hash<algo, false>; \
m_map[algo][AV_QUAD_SOFT][Assembly::NONE] = cryptonight_quad_hash<algo, true>; \
m_map[algo][AV_PENTA][Assembly::NONE] = cryptonight_penta_hash<algo, false>; \
m_map[algo][AV_PENTA_SOFT][Assembly::NONE] = cryptonight_penta_hash<algo, true>;
@@ -298,6 +298,22 @@ xmrig::cn_hash_fun xmrig::CnHash::fn(const Algorithm &algorithm, AlgoVariant av,
return nullptr;
}
# ifdef XMRIG_ALGO_CN_HEAVY
// cn-heavy optimization for Zen3 CPUs
if ((av == AV_SINGLE) && (xmrig::Cpu::info()->arch() == xmrig::ICpuInfo::ARCH_ZEN3)) {
switch (algorithm.id()) {
case xmrig::Algorithm::CN_HEAVY_0:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_0, false, 3>;
case xmrig::Algorithm::CN_HEAVY_TUBE:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_TUBE, false, 3>;
case xmrig::Algorithm::CN_HEAVY_XHV:
return cryptonight_single_hash<xmrig::Algorithm::CN_HEAVY_XHV, false, 3>;
default:
break;
}
}
# endif
# ifdef XMRIG_FEATURE_ASM
cn_hash_fun fun = cnHash.m_map[algorithm][av][Cpu::assembly(assembly)];
if (fun) {

View File

@@ -431,7 +431,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;

View File

@@ -306,7 +306,21 @@ inline void mix_and_propagate(__m128i& x0, __m128i& x1, __m128i& x2, __m128i& x3
namespace xmrig {
template<Algorithm::Id ALGO, bool SOFT_AES>
template<int interleave>
static inline constexpr uint64_t interleaved_index(uint64_t k)
{
return ((k & ~63ULL) << interleave) | (k & 63);
}
template<>
inline constexpr uint64_t interleaved_index<0>(uint64_t k)
{
return k;
}
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
{
constexpr CnAlgo<ALGO> props;
@@ -343,6 +357,11 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
}
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
if (interleave > 0) {
_mm_prefetch((const char*)(output), _MM_HINT_T0);
_mm_prefetch((const char*)(output + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k1, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k2, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
@@ -354,19 +373,21 @@ static inline void cn_explode_scratchpad(const __m128i *input, __m128i *output)
aes_round<SOFT_AES>(k8, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
aes_round<SOFT_AES>(k9, &xin0, &xin1, &xin2, &xin3, &xin4, &xin5, &xin6, &xin7);
_mm_store_si128(output + i + 0, xin0);
_mm_store_si128(output + i + 1, xin1);
_mm_store_si128(output + i + 2, xin2);
_mm_store_si128(output + i + 3, xin3);
_mm_store_si128(output + i + 4, xin4);
_mm_store_si128(output + i + 5, xin5);
_mm_store_si128(output + i + 6, xin6);
_mm_store_si128(output + i + 7, xin7);
_mm_store_si128(output + 0, xin0);
_mm_store_si128(output + 1, xin1);
_mm_store_si128(output + 2, xin2);
_mm_store_si128(output + 3, xin3);
output += (64 << interleave) / sizeof(__m128i);
_mm_store_si128(output + 0, xin4);
_mm_store_si128(output + 1, xin5);
_mm_store_si128(output + 2, xin6);
_mm_store_si128(output + 3, xin7);
output += (64 << interleave) / sizeof(__m128i);
}
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
{
constexpr CnAlgo<ALGO> props;
@@ -387,15 +408,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
xout6 = _mm_load_si128(output + 10);
xout7 = _mm_load_si128(output + 11);
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
const __m128i* input_begin = input;
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
input += (64 << interleave) / sizeof(__m128i);
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
input += (64 << interleave) / sizeof(__m128i);
i += 8;
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@@ -414,15 +445,25 @@ static inline void cn_implode_scratchpad(const __m128i *input, __m128i *output)
}
if (IS_HEAVY) {
for (size_t i = 0; i < props.memory() / sizeof(__m128i); i += 8) {
xout0 = _mm_xor_si128(_mm_load_si128(input + i + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + i + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + i + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + i + 3), xout3);
xout4 = _mm_xor_si128(_mm_load_si128(input + i + 4), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + i + 5), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + i + 6), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + i + 7), xout7);
input = input_begin;
for (size_t i = 0; i < props.memory() / sizeof(__m128i);) {
xout0 = _mm_xor_si128(_mm_load_si128(input + 0), xout0);
xout1 = _mm_xor_si128(_mm_load_si128(input + 1), xout1);
xout2 = _mm_xor_si128(_mm_load_si128(input + 2), xout2);
xout3 = _mm_xor_si128(_mm_load_si128(input + 3), xout3);
input += (64 << interleave) / sizeof(__m128i);
xout4 = _mm_xor_si128(_mm_load_si128(input + 0), xout4);
xout5 = _mm_xor_si128(_mm_load_si128(input + 1), xout5);
xout6 = _mm_xor_si128(_mm_load_si128(input + 2), xout6);
xout7 = _mm_xor_si128(_mm_load_si128(input + 3), xout7);
input += (64 << interleave) / sizeof(__m128i);
i += 8;
if ((interleave > 0) && (i < props.memory() / sizeof(__m128i))) {
_mm_prefetch((const char*)(input), _MM_HINT_T0);
_mm_prefetch((const char*)(input + (64 << interleave) / sizeof(__m128i)), _MM_HINT_T0);
}
aes_round<SOFT_AES>(k0, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
aes_round<SOFT_AES>(k1, &xout0, &xout1, &xout2, &xout3, &xout4, &xout5, &xout6, &xout7);
@@ -558,7 +599,7 @@ static inline void cryptonight_conceal_tweak(__m128i& cx, __m128& conc_var)
cx = _mm_xor_si128(cx, _mm_cvttps_epi32(nc));
}
template<Algorithm::Id ALGO, bool SOFT_AES>
template<Algorithm::Id ALGO, bool SOFT_AES, int interleave>
inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t size, uint8_t *__restrict__ output, cryptonight_ctx **__restrict__ ctx, uint64_t height)
{
constexpr CnAlgo<ALGO> props;
@@ -577,7 +618,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->state), reinterpret_cast<__m128i *>(ctx[0]->memory));
uint64_t *h0 = reinterpret_cast<uint64_t*>(ctx[0]->state);
uint8_t *l0 = ctx[0]->memory;
@@ -620,7 +661,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
for (size_t i = 0; i < props.iterations(); i++) {
__m128i cx;
if (IS_CN_HEAVY_TUBE || !SOFT_AES) {
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[idx0 & MASK]));
cx = _mm_load_si128(reinterpret_cast<const __m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
if (ALGO == Algorithm::CN_CCX) {
cryptonight_conceal_tweak(cx, conc_var);
}
@@ -632,12 +673,12 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
else if (SOFT_AES) {
if (ALGO == Algorithm::CN_CCX) {
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[idx0 & MASK]));
cx = _mm_load_si128(reinterpret_cast<const __m128i*>(&l0[interleaved_index<interleave>(idx0 & MASK)]));
cryptonight_conceal_tweak(cx, conc_var);
cx = soft_aesenc(&cx, ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
else {
cx = soft_aesenc(&l0[idx0 & MASK], ax0, reinterpret_cast<const uint32_t*>(saes_table));
cx = soft_aesenc(&l0[interleaved_index<interleave>(idx0 & MASK)], ax0, reinterpret_cast<const uint32_t*>(saes_table));
}
}
else {
@@ -645,16 +686,16 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
if (BASE == Algorithm::CN_1 || BASE == Algorithm::CN_2) {
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
cryptonight_monero_tweak<ALGO>(reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]), l0, idx0 & MASK, ax0, bx0, bx1, cx);
} else {
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[idx0 & MASK]), _mm_xor_si128(bx0, cx));
_mm_store_si128(reinterpret_cast<__m128i *>(&l0[interleaved_index<interleave>(idx0 & MASK)]), _mm_xor_si128(bx0, cx));
}
idx0 = static_cast<uint64_t>(_mm_cvtsi128_si64(cx));
uint64_t hi, lo, cl, ch;
cl = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[0];
ch = (reinterpret_cast<uint64_t*>(&l0[idx0 & MASK]))[1];
cl = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[0];
ch = (reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)]))[1];
if (BASE == Algorithm::CN_2) {
if (props.isR()) {
@@ -681,14 +722,14 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
al0 += hi;
ah0 += lo;
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[0] = al0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = al0;
if (IS_CN_HEAVY_TUBE || ALGO == Algorithm::CN_RTO) {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0 ^ al0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0 ^ al0;
} else if (BASE == Algorithm::CN_1) {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0 ^ tweak1_2_0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0 ^ tweak1_2_0;
} else {
reinterpret_cast<uint64_t*>(&l0[idx0 & MASK])[1] = ah0;
reinterpret_cast<uint64_t*>(&l0[interleaved_index<interleave>(idx0 & MASK)])[1] = ah0;
}
al0 ^= cl;
@@ -697,11 +738,11 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
# ifdef XMRIG_ALGO_CN_HEAVY
if (props.isHeavy()) {
int64_t n = ((int64_t*)&l0[idx0 & MASK])[0];
int32_t d = ((int32_t*)&l0[idx0 & MASK])[2];
int64_t n = ((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0];
int32_t d = ((int32_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[2];
int64_t q = n / (d | 0x5);
((int64_t*)&l0[idx0 & MASK])[0] = n ^ q;
((int64_t*)&l0[interleaved_index<interleave>(idx0 & MASK)])[0] = n ^ q;
if (ALGO == Algorithm::CN_HEAVY_XHV) {
d = ~d;
@@ -722,7 +763,7 @@ inline void cryptonight_single_hash(const uint8_t *__restrict__ input, size_t si
}
# endif
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, interleave>(reinterpret_cast<const __m128i *>(ctx[0]->memory), reinterpret_cast<__m128i *>(ctx[0]->state));
keccakf(h0, 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
@@ -810,7 +851,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
}
keccak(input, size, ctx[0]->state);
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
if (ALGO == Algorithm::CN_2) {
if (ASM == Assembly::INTEL) {
@@ -887,7 +928,7 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
extra_hashes[ctx[0]->state[0] & 3](ctx[0]->state, 200, output);
}
@@ -909,8 +950,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
keccak(input, size, ctx[0]->state);
keccak(input + size, size, ctx[1]->state);
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->state), reinterpret_cast<__m128i*>(ctx[0]->memory));
cn_explode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->state), reinterpret_cast<__m128i*>(ctx[1]->memory));
if (ALGO == Algorithm::CN_2) {
cnv2_double_mainloop_sandybridge_asm(ctx);
@@ -939,8 +980,8 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
ctx[0]->generated_code(ctx);
}
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[0]->memory), reinterpret_cast<__m128i*>(ctx[0]->state));
cn_implode_scratchpad<ALGO, false, 0>(reinterpret_cast<const __m128i*>(ctx[1]->memory), reinterpret_cast<__m128i*>(ctx[1]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[0]->state), 24);
keccakf(reinterpret_cast<uint64_t*>(ctx[1]->state), 24);
@@ -991,8 +1032,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
VARIANT4_RANDOM_MATH_INIT(0);
VARIANT4_RANDOM_MATH_INIT(1);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h0), reinterpret_cast<__m128i *>(l0));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(h1), reinterpret_cast<__m128i *>(l1));
uint64_t al0 = h0[0] ^ h0[4];
uint64_t al1 = h1[0] ^ h1[4];
@@ -1187,8 +1228,8 @@ inline void cryptonight_double_hash(const uint8_t *__restrict__ input, size_t si
bx10 = cx1;
}
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l0), reinterpret_cast<__m128i *>(h0));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i *>(l1), reinterpret_cast<__m128i *>(h1));
keccakf(h0, 24);
keccakf(h1, 24);
@@ -1333,7 +1374,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
for (size_t i = 0; i < 3; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@@ -1378,7 +1419,7 @@ inline void cryptonight_triple_hash(const uint8_t *__restrict__ input, size_t si
}
for (size_t i = 0; i < 3; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
@@ -1407,7 +1448,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
for (size_t i = 0; i < 4; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@@ -1460,7 +1501,7 @@ inline void cryptonight_quad_hash(const uint8_t *__restrict__ input, size_t size
}
for (size_t i = 0; i < 4; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}
@@ -1489,7 +1530,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
for (size_t i = 0; i < 5; i++) {
keccak(input + size * i, size, ctx[i]->state);
cn_explode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
cn_explode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->state), reinterpret_cast<__m128i*>(ctx[i]->memory));
}
uint8_t* l0 = ctx[0]->memory;
@@ -1550,7 +1591,7 @@ inline void cryptonight_penta_hash(const uint8_t *__restrict__ input, size_t siz
}
for (size_t i = 0; i < 5; i++) {
cn_implode_scratchpad<ALGO, SOFT_AES>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
cn_implode_scratchpad<ALGO, SOFT_AES, 0>(reinterpret_cast<const __m128i*>(ctx[i]->memory), reinterpret_cast<__m128i*>(ctx[i]->state));
keccakf(reinterpret_cast<uint64_t*>(ctx[i]->state), 24);
extra_hashes[ctx[i]->state[0] & 3](ctx[i]->state, 200, output + 32 * i);
}

View File

@@ -1,12 +1,6 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -27,24 +21,16 @@
#include "crypto/common/VirtualMemory.h"
namespace xmrig {
constexpr size_t twoMiB = 2U * 1024U * 1024U;
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
} // namespace xmrig
xmrig::HugePagesInfo::HugePagesInfo(const VirtualMemory *memory)
{
if (memory->isOneGbPages()) {
size = VirtualMemory::align(memory->size(), oneGiB);
total = size / oneGiB;
allocated = size / oneGiB;
size = VirtualMemory::align(memory->size(), VirtualMemory::kOneGiB);
total = size / VirtualMemory::kOneGiB;
allocated = size / VirtualMemory::kOneGiB;
}
else {
size = memory->size();
total = size / twoMiB;
size = VirtualMemory::alignToHugePageSize(memory->size());
total = size / VirtualMemory::hugePageSize();
allocated = memory->isHugePages() ? total : 0;
}
}

View File

@@ -1,12 +1,6 @@
/* XMRig
* Copyright 2010 Jeff Garzik <jgarzik@pobox.com>
* Copyright 2012-2014 pooler <pooler@litecoinpool.org>
* Copyright 2014 Lucas Jones <https://github.com/lucasjones>
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2019 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2019 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by

View File

@@ -1,6 +1,6 @@
/* XMRig
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,8 +18,6 @@
#include "crypto/common/LinuxMemory.h"
#include "3rdparty/fmt/core.h"
#include "backend/cpu/Cpu.h"
#include "base/io/log/Log.h"
#include "crypto/common/VirtualMemory.h"
@@ -37,33 +35,32 @@ constexpr size_t twoMiB = 2U * 1024U * 1024U;
constexpr size_t oneGiB = 1024U * 1024U * 1024U;
static inline std::string sysfs_path(uint32_t node, bool oneGbPages, bool nr)
static inline std::string sysfs_path(uint32_t node, size_t hugePageSize, bool nr)
{
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, oneGbPages ? "1048576" : "2048", nr ? "nr" : "free");
return fmt::format("/sys/devices/system/node/node{}/hugepages/hugepages-{}kB/{}_hugepages", node, hugePageSize / 1024, nr ? "nr" : "free");
}
static inline bool write_nr_hugepages(uint32_t node, bool oneGbPages, uint64_t count) { return LinuxMemory::write(sysfs_path(node, oneGbPages, true).c_str(), count); }
static inline int64_t free_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, false).c_str()); }
static inline int64_t nr_hugepages(uint32_t node, bool oneGbPages) { return LinuxMemory::read(sysfs_path(node, oneGbPages, true).c_str()); }
static inline bool write_nr_hugepages(uint32_t node, size_t hugePageSize, uint64_t count) { return LinuxMemory::write(sysfs_path(node, hugePageSize, true).c_str(), count); }
static inline int64_t free_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, false).c_str()); }
static inline int64_t nr_hugepages(uint32_t node, size_t hugePageSize) { return LinuxMemory::read(sysfs_path(node, hugePageSize, true).c_str()); }
} // namespace xmrig
bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, bool oneGbPages)
bool xmrig::LinuxMemory::reserve(size_t size, uint32_t node, size_t hugePageSize)
{
std::lock_guard<std::mutex> lock(mutex);
const size_t pageSize = oneGbPages ? oneGiB : twoMiB;
const size_t required = VirtualMemory::align(size, pageSize) / pageSize;
const size_t required = VirtualMemory::align(size, hugePageSize) / hugePageSize;
const auto available = free_hugepages(node, oneGbPages);
const auto available = free_hugepages(node, hugePageSize);
if (available < 0 || static_cast<size_t>(available) >= required) {
return false;
}
return write_nr_hugepages(node, oneGbPages, std::max<size_t>(nr_hugepages(node, oneGbPages), 0) + (required - available));
return write_nr_hugepages(node, hugePageSize, std::max<size_t>(nr_hugepages(node, hugePageSize), 0) + (required - available));
}

View File

@@ -1,6 +1,6 @@
/* XMRig
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -30,7 +30,7 @@ namespace xmrig {
class LinuxMemory
{
public:
static bool reserve(size_t size, uint32_t node, bool oneGbPages = false);
static bool reserve(size_t size, uint32_t node, size_t hugePageSize);
static bool write(const char *path, uint64_t value);
static int64_t read(const char *path);

View File

@@ -1,7 +1,7 @@
/* XMRig
* Copyright (c) 2018-2020 tevador <tevador@gmail.com>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -36,16 +36,19 @@
namespace xmrig {
static IMemoryPool *pool = nullptr;
size_t VirtualMemory::m_hugePageSize = VirtualMemory::kDefaultHugePageSize;
static IMemoryPool *pool = nullptr;
static std::mutex mutex;
} // namespace xmrig
xmrig::VirtualMemory::VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node, size_t alignSize) :
m_size(align(size)),
m_capacity(m_size),
m_node(node)
m_size(alignToHugePageSize(size)),
m_node(node),
m_capacity(m_size)
{
if (usePool) {
std::lock_guard<std::mutex> lock(mutex);
@@ -114,18 +117,18 @@ void xmrig::VirtualMemory::destroy()
}
void xmrig::VirtualMemory::init(size_t poolSize, bool hugePages)
void xmrig::VirtualMemory::init(size_t poolSize, size_t hugePageSize)
{
if (!pool) {
osInit(hugePages);
osInit(hugePageSize);
}
# ifdef XMRIG_FEATURE_HWLOC
if (Cpu::info()->nodes() > 1) {
pool = new NUMAMemoryPool(align(poolSize, Cpu::info()->nodes()), hugePages);
pool = new NUMAMemoryPool(align(poolSize, Cpu::info()->nodes()), hugePageSize > 0);
} else
# endif
{
pool = new MemoryPool(poolSize, hugePages);
pool = new MemoryPool(poolSize, hugePageSize > 0);
}
}

View File

@@ -1,7 +1,7 @@
/* XMRig
* Copyright (c) 2018-2020 tevador <tevador@gmail.com>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -39,6 +39,9 @@ class VirtualMemory
public:
XMRIG_DISABLE_COPY_MOVE_DEFAULT(VirtualMemory)
constexpr static size_t kDefaultHugePageSize = 2U * 1024U * 1024U;
constexpr static size_t kOneGiB = 1024U * 1024U * 1024U;
VirtualMemory(size_t size, bool hugePages, bool oneGbPages, bool usePool, uint32_t node = 0, size_t alignSize = 64);
~VirtualMemory();
@@ -65,9 +68,11 @@ public:
static void destroy();
static void flushInstructionCache(void *p, size_t size);
static void freeLargePagesMemory(void *p, size_t size);
static void init(size_t poolSize, bool hugePages);
static void init(size_t poolSize, size_t hugePageSize);
static inline constexpr size_t align(size_t pos, size_t align = 2097152) { return ((pos - 1) / align + 1) * align; }
static inline constexpr size_t align(size_t pos, size_t align = kDefaultHugePageSize) { return ((pos - 1) / align + 1) * align; }
static inline size_t alignToHugePageSize(size_t pos) { return align(pos, hugePageSize()); }
static inline size_t hugePageSize() { return m_hugePageSize; }
private:
enum Flags {
@@ -78,15 +83,17 @@ private:
FLAG_MAX
};
static void osInit(bool hugePages);
static void osInit(size_t hugePageSize);
bool allocateLargePagesMemory();
bool allocateOneGbPagesMemory();
void freeLargePagesMemory();
static size_t m_hugePageSize;
const size_t m_size;
size_t m_capacity;
const uint32_t m_node;
size_t m_capacity;
std::bitset<FLAG_MAX> m_flags;
uint8_t *m_scratchpad = nullptr;
};

View File

@@ -1,7 +1,7 @@
/* XMRig
* Copyright (c) 2018-2020 tevador <tevador@gmail.com>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -18,13 +18,14 @@
*/
#include <cstdlib>
#include <sys/mman.h>
#include "crypto/common/VirtualMemory.h"
#include "backend/cpu/Cpu.h"
#include "crypto/common/portable/mm_malloc.h"
#include "crypto/common/VirtualMemory.h"
#include <cmath>
#include <cstdlib>
#include <sys/mman.h>
#ifdef XMRIG_OS_APPLE
@@ -42,14 +43,21 @@
#endif
#if defined(XMRIG_OS_LINUX)
# if (defined(MAP_HUGE_1GB) || defined(MAP_HUGE_SHIFT))
# define XMRIG_HAS_1GB_PAGES
# endif
#ifdef XMRIG_OS_LINUX
# include "crypto/common/LinuxMemory.h"
#endif
#ifndef MAP_HUGE_SHIFT
# define MAP_HUGE_SHIFT 26
#endif
#ifndef MAP_HUGE_MASK
# define MAP_HUGE_MASK 0x3f
#endif
#ifdef XMRIG_SECURE_JIT
# define SECURE_PROT_EXEC 0
#else
@@ -57,6 +65,14 @@
#endif
#if defined(XMRIG_OS_LINUX) || (!defined(XMRIG_OS_APPLE) && !defined(__FreeBSD__))
static inline int hugePagesFlag(size_t size)
{
return (static_cast<int>(log2(size)) & MAP_HUGE_MASK) << MAP_HUGE_SHIFT;
}
#endif
bool xmrig::VirtualMemory::isHugepagesAvailable()
{
# if defined(XMRIG_OS_MACOS) && defined(XMRIG_ARM)
@@ -69,7 +85,7 @@ bool xmrig::VirtualMemory::isHugepagesAvailable()
bool xmrig::VirtualMemory::isOneGbPagesAvailable()
{
# ifdef XMRIG_HAS_1GB_PAGES
# ifdef XMRIG_OS_LINUX
return Cpu::info()->hasOneGbPages();
# else
return false;
@@ -126,18 +142,10 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
# else
# if defined(MAP_HUGE_2MB)
constexpr int flag_2mb = MAP_HUGE_2MB;
# elif defined(MAP_HUGE_SHIFT)
constexpr int flag_2mb = (21 << MAP_HUGE_SHIFT);
# else
constexpr int flag_2mb = 0;
# endif
void *mem = nullptr;
if (hugePages) {
mem = mmap(0, align(size), PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | flag_2mb, -1, 0);
mem = mmap(0, align(size), PROT_READ | PROT_WRITE | SECURE_PROT_EXEC, MAP_PRIVATE | MAP_ANONYMOUS | MAP_POPULATE | hugePagesFlag(hugePageSize()), -1, 0);
}
if (!mem) {
@@ -152,22 +160,12 @@ void *xmrig::VirtualMemory::allocateExecutableMemory(size_t size, bool hugePages
void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
{
# if defined(__APPLE__)
# if defined(XMRIG_OS_APPLE)
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, VM_FLAGS_SUPERPAGE_SIZE_2MB, 0);
# elif defined(__FreeBSD__)
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_ALIGNED_SUPER | MAP_PREFAULT_READ, -1, 0);
# else
# if defined(MAP_HUGE_2MB)
constexpr int flag_2mb = MAP_HUGE_2MB;
# elif defined(MAP_HUGE_SHIFT)
constexpr int flag_2mb = (21 << MAP_HUGE_SHIFT);
# else
constexpr int flag_2mb = 0;
# endif
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_2mb, 0, 0);
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(hugePageSize()), 0, 0);
# endif
return mem == MAP_FAILED ? nullptr : mem;
@@ -176,17 +174,9 @@ void *xmrig::VirtualMemory::allocateLargePagesMemory(size_t size)
void *xmrig::VirtualMemory::allocateOneGbPagesMemory(size_t size)
{
# ifdef XMRIG_HAS_1GB_PAGES
# ifdef XMRIG_OS_LINUX
if (isOneGbPagesAvailable()) {
# if defined(MAP_HUGE_1GB)
constexpr int flag_1gb = MAP_HUGE_1GB;
# elif defined(MAP_HUGE_SHIFT)
constexpr int flag_1gb = (30 << MAP_HUGE_SHIFT);
# else
constexpr int flag_1gb = 0;
# endif
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | flag_1gb, 0, 0);
void *mem = mmap(0, size, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANONYMOUS | MAP_HUGETLB | MAP_POPULATE | hugePagesFlag(kOneGiB), 0, 0);
return mem == MAP_FAILED ? nullptr : mem;
}
@@ -212,15 +202,18 @@ void xmrig::VirtualMemory::freeLargePagesMemory(void *p, size_t size)
}
void xmrig::VirtualMemory::osInit(bool)
void xmrig::VirtualMemory::osInit(size_t hugePageSize)
{
if (hugePageSize) {
m_hugePageSize = hugePageSize;
}
}
bool xmrig::VirtualMemory::allocateLargePagesMemory()
{
# if defined(XMRIG_OS_LINUX)
LinuxMemory::reserve(m_size, m_node);
# ifdef XMRIG_OS_LINUX
LinuxMemory::reserve(m_size, m_node, hugePageSize());
# endif
m_scratchpad = static_cast<uint8_t*>(allocateLargePagesMemory(m_size));
@@ -242,8 +235,8 @@ bool xmrig::VirtualMemory::allocateLargePagesMemory()
bool xmrig::VirtualMemory::allocateOneGbPagesMemory()
{
# if defined(XMRIG_HAS_1GB_PAGES)
LinuxMemory::reserve(m_size, m_node, true);
# ifdef XMRIG_OS_LINUX
LinuxMemory::reserve(m_size, m_node, kOneGiB);
# endif
m_scratchpad = static_cast<uint8_t*>(allocateOneGbPagesMemory(m_size));

View File

@@ -1,7 +1,7 @@
/* XMRig
* Copyright (c) 2018-2020 tevador <tevador@gmail.com>
* Copyright (c) 2018-2020 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
@@ -24,9 +24,9 @@
#include <tchar.h>
#include "crypto/common/VirtualMemory.h"
#include "base/io/log/Log.h"
#include "crypto/common/portable/mm_malloc.h"
#include "crypto/common/VirtualMemory.h"
#ifdef XMRIG_SECURE_JIT
@@ -233,9 +233,9 @@ void xmrig::VirtualMemory::freeLargePagesMemory(void *p, size_t)
}
void xmrig::VirtualMemory::osInit(bool hugePages)
void xmrig::VirtualMemory::osInit(size_t hugePageSize)
{
if (hugePages) {
if (hugePageSize) {
hugepagesAvailable = TrySetLockPagesPrivilege();
}
}

View File

@@ -171,7 +171,7 @@ void JitCompilerA64::generateProgram(Program& program, ProgramConfiguration& con
emit32(ARMV8A::EOR | 10 | (IntRegMap[config.readReg0] << 5) | (IntRegMap[config.readReg1] << 16), code, codePos);
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), codePos - MainLoopBegin);
# endif
}
@@ -237,7 +237,7 @@ void JitCompilerA64::generateProgramLight(Program& program, ProgramConfiguration
emit32(ARMV8A::ADD_IMM_HI | 2 | (2 << 5) | (imm_hi << 10), code, codePos);
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), reinterpret_cast<char*>(code + codePos));
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + MainLoopBegin), codePos - MainLoopBegin);
# endif
}
@@ -364,7 +364,7 @@ void JitCompilerA64::generateSuperscalarHash(SuperscalarProgram(&programs)[N])
codePos += p2 - p1;
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + CodeSize), reinterpret_cast<char*>(code + codePos));
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code + CodeSize), codePos - MainLoopBegin);
# endif
}
@@ -401,6 +401,10 @@ void JitCompilerA64::allocate(size_t size)
code = static_cast<uint8_t*>(allocExecutableMemory(allocatedSize, hugePages));
memcpy(code, reinterpret_cast<const void *>(randomx_program_aarch64), CodeSize);
# ifndef XMRIG_OS_APPLE
xmrig::VirtualMemory::flushInstructionCache(reinterpret_cast<char*>(code), CodeSize);
# endif
}

View File

@@ -88,7 +88,12 @@ void xmrig::Rx::init(IRxListener *listener)
template<typename T>
bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu)
{
if (seed.algorithm().family() != Algorithm::RANDOM_X) {
const Algorithm::Family f = seed.algorithm().family();
if ((f != Algorithm::RANDOM_X)
# ifdef XMRIG_ALGO_CN_HEAVY
&& (f != Algorithm::CN_HEAVY)
# endif
) {
# ifdef XMRIG_FEATURE_MSR
RxMsr::destroy();
# endif
@@ -96,16 +101,22 @@ bool xmrig::Rx::init(const T &seed, const RxConfig &config, const CpuConfig &cpu
return true;
}
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
# ifdef XMRIG_FEATURE_MSR
if (!RxMsr::isInitialized()) {
RxMsr::init(config, cpu.threads().get(seed.algorithm()).data());
}
# endif
# ifdef XMRIG_ALGO_CN_HEAVY
if (f == Algorithm::CN_HEAVY) {
return true;
}
# endif
randomx_set_scratchpad_prefetch_mode(config.scratchpadPrefetchMode());
randomx_set_huge_pages_jit(cpu.isHugePagesJit());
randomx_set_optimized_dataset_init(config.initDatasetAVX2());
if (!osInitialized) {
# ifdef XMRIG_FIX_RYZEN
RxFix::setupMainLoopExceptionFrame();

View File

@@ -113,7 +113,6 @@ private:
Mode readMode(const rapidjson::Value &value) const;
bool m_numa = true;
bool m_oneGbPages = false;
bool m_rdmsr = true;
int m_threads = -1;
@@ -123,6 +122,7 @@ private:
ScratchpadPrefetchMode m_scratchpadPrefetchMode = ScratchpadPrefetchT0;
# ifdef XMRIG_FEATURE_HWLOC
bool m_numa = true;
std::vector<uint32_t> m_nodeset;
# endif

View File

@@ -20,17 +20,22 @@
#include "hw/dmi/DmiMemory.h"
#include "3rdparty/fmt/format.h"
#include "3rdparty/rapidjson/document.h"
#include "hw/dmi/DmiTools.h"
#include <algorithm>
#include <array>
#include <regex>
namespace xmrig {
static const char *kIdFormat = "DIMM_{}{}";
static inline uint16_t dmi_memory_device_width(uint16_t code)
{
return (code == 0xFFFF || code == 0) ? 0 : code;
@@ -143,9 +148,9 @@ xmrig::DmiMemory::DmiMemory(dmi_header *h)
m_size = (1024ULL * (size & 0x7FFF) * ((size & 0x8000) ? 1 : 1024ULL));
}
setId(dmi_string(h, 0x10), dmi_string(h, 0x11));
m_formFactor = h->data[0x0E];
m_slot = dmi_string(h, 0x10);
m_bank = dmi_string(h, 0x11);
m_type = h->data[0x12];
if (!m_size || h->length < 0x17) {
@@ -201,6 +206,7 @@ rapidjson::Value xmrig::DmiMemory::toJSON(rapidjson::Document &doc) const
auto &allocator = doc.GetAllocator();
Value out(kObjectType);
out.AddMember("id", id().toJSON(doc), allocator);
out.AddMember("slot", m_slot.toJSON(doc), allocator);
out.AddMember("type", StringRef(type()), allocator);
out.AddMember("form_factor", StringRef(formFactor()), allocator);
@@ -217,3 +223,21 @@ rapidjson::Value xmrig::DmiMemory::toJSON(rapidjson::Document &doc) const
return out;
}
#endif
void xmrig::DmiMemory::setId(const char *slot, const char *bank)
{
m_slot = slot;
m_bank = bank;
std::cmatch cm;
if (std::regex_match(slot, cm, std::regex("^Channel([A-Z])[-_]DIMM(\\d+)$", std::regex_constants::icase))) {
m_id = fmt::format(kIdFormat, cm.str(1), cm.str(2)).c_str();
}
else if (std::regex_search(bank, cm, std::regex("CHANNEL ([A-Z])$"))) {
std::cmatch cm2;
if (std::regex_match(slot, cm2, std::regex("^DIMM (\\d+)$"))) {
m_id = fmt::format(kIdFormat, cm.str(1), cm2.str(1)).c_str();
}
}
}

View File

@@ -39,6 +39,7 @@ public:
inline bool isValid() const { return !m_slot.isEmpty(); }
inline const String &bank() const { return m_bank; }
inline const String &id() const { return m_id.isNull() ? m_slot : m_id; }
inline const String &product() const { return m_product; }
inline const String &slot() const { return m_slot; }
inline const String &vendor() const { return m_vendor; }
@@ -57,7 +58,10 @@ public:
# endif
private:
void setId(const char *slot, const char *bank);
String m_bank;
String m_id;
String m_product;
String m_slot;
String m_vendor;

View File

@@ -28,7 +28,7 @@
#define APP_ID "xmrig"
#define APP_NAME "XMRig"
#define APP_DESC "XMRig miner"
#define APP_VERSION "6.8.0"
#define APP_VERSION "6.8.2"
#define APP_DOMAIN "xmrig.com"
#define APP_SITE "www.xmrig.com"
#define APP_COPYRIGHT "Copyright (C) 2016-2021 xmrig.com"
@@ -36,7 +36,7 @@
#define APP_VER_MAJOR 6
#define APP_VER_MINOR 8
#define APP_VER_PATCH 0
#define APP_VER_PATCH 2
#ifdef _MSC_VER
# if (_MSC_VER >= 1920)