1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-09 16:52:40 -05:00

Compare commits

...

4 Commits

Author SHA1 Message Date
Tony Butler
55e2c4119e Merge a776ebf394 into cd2fd9d7a6 2024-11-22 06:57:55 +00:00
XMRig
cd2fd9d7a6 Simplified getting PCI topology for the OpenCL backend. 2024-11-08 13:03:35 +07:00
XMRig
064cd3ef20 Fixed and simplified OpenCL GPU type detection. 2024-11-08 07:09:35 +07:00
Tony Butler
a776ebf394 Make AMD assembly completely optional through WITH_ASM_AMD (default ON) 2023-07-12 02:06:53 -06:00
16 changed files with 275 additions and 154 deletions

View File

@@ -14,7 +14,9 @@ option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_TLS "Enable OpenSSL support" ON)
option(WITH_ASM "Enable ASM PoW implementations" ON) option(WITH_ASM "Enable ASM PoW implementations" ON)
option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON) option(WITH_ASM_AMD "Enable ASM for AMD processors" ON)
option(WITH_MSR "Enable MSR mod" ON)
option(WITH_MSR_ZEN "Enable MSR mod for AMD Zen-based processors" ON)
option(WITH_ENV_VARS "Enable environment variables support in config file" ON) option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" ON) option(WITH_OPENCL "Enable OpenCL backend" ON)

View File

@@ -44,9 +44,17 @@ if (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C) set_property(TARGET ${XMRIG_ASM_LIBRARY} PROPERTY LINKER_LANGUAGE C)
add_definitions(/DXMRIG_FEATURE_ASM) add_definitions(/DXMRIG_FEATURE_ASM)
if (WITH_ASM_AMD)
add_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=ON (+amd)")
else()
message("-- WITH_ASM=ON (-amd)")
endif()
else() else()
set(XMRIG_ASM_SOURCES "") set(XMRIG_ASM_SOURCES "")
set(XMRIG_ASM_LIBRARY "") set(XMRIG_ASM_LIBRARY "")
remove_definitions(/DXMRIG_FEATURE_ASM) remove_definitions(/DXMRIG_FEATURE_ASM)
remove_definitions(/DXMRIG_FEATURE_ASM_AMD)
message("-- WITH_ASM=OFF")
endif() endif()

View File

@@ -104,8 +104,13 @@ if (WITH_RANDOMX)
if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX)) if (WITH_MSR AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8 AND (XMRIG_OS_WIN OR XMRIG_OS_LINUX))
add_definitions(/DXMRIG_FEATURE_MSR) add_definitions(/DXMRIG_FEATURE_MSR)
add_definitions(/DXMRIG_FIX_RYZEN) if (WITH_MSR_ZEN)
message("-- WITH_MSR=ON") add_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON (+zen)")
else()
remove_definitions(/DXMRIG_FIX_RYZEN)
message("-- WITH_MSR=ON (-zen)")
endif()
if (XMRIG_OS_WIN) if (XMRIG_OS_WIN)
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO

View File

@@ -1,6 +1,6 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -19,10 +19,8 @@
#ifndef XMRIG_PCITOPOLOGY_H #ifndef XMRIG_PCITOPOLOGY_H
#define XMRIG_PCITOPOLOGY_H #define XMRIG_PCITOPOLOGY_H
#include <cstdio> #include <cstdio>
#include "base/tools/String.h" #include "base/tools/String.h"
@@ -33,7 +31,14 @@ class PciTopology
{ {
public: public:
PciTopology() = default; PciTopology() = default;
PciTopology(uint32_t bus, uint32_t device, uint32_t function) : m_valid(true), m_bus(bus), m_device(device), m_function(function) {}
template<typename T>
inline PciTopology(T bus, T device, T function)
: m_valid(true),
m_bus(static_cast<uint8_t>(bus)),
m_device(static_cast<uint8_t>(device)),
m_function(static_cast<uint8_t>(function))
{}
inline bool isEqual(const PciTopology &other) const { return m_valid == other.m_valid && toUint32() == other.toUint32(); } inline bool isEqual(const PciTopology &other) const { return m_valid == other.m_valid && toUint32() == other.toUint32(); }
inline bool isValid() const { return m_valid; } inline bool isValid() const { return m_valid; }
@@ -70,4 +75,4 @@ private:
} // namespace xmrig } // namespace xmrig
#endif /* XMRIG_PCITOPOLOGY_H */ #endif // XMRIG_PCITOPOLOGY_H

View File

@@ -1,6 +1,6 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -227,7 +227,7 @@ public:
# endif # endif
Log::print("|" CYAN_BOLD("%3zu") " |" CYAN_BOLD("%4u") " |" YELLOW(" %7s") " |" CYAN_BOLD("%10d") " |" CYAN_BOLD("%8d") " |" Log::print("|" CYAN_BOLD("%3zu") " |" CYAN_BOLD("%4u") " |" YELLOW(" %7s") " |" CYAN_BOLD("%10d") " |" CYAN_BOLD("%8d") " |"
CYAN_BOLD("%7d") " |" CYAN_BOLD("%3d") " |" CYAN_BOLD("%4d") " |" CYAN("%7zu") " | " GREEN("%s"), CYAN_BOLD("%7d") " |" CYAN_BOLD("%3d") " |" CYAN_BOLD("%4d") " |" CYAN("%7zu") " | " GREEN_BOLD("%s"),
i, i,
data.thread.index(), data.thread.index(),
data.device.topology().toString().data(), data.device.topology().toString().data(),

View File

@@ -5,8 +5,8 @@
* Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet> * Copyright 2014-2016 Wolf9466 <https://github.com/OhGodAPet>
* Copyright 2016 Jay D Dee <jayddee246@gmail.com> * Copyright 2016 Jay D Dee <jayddee246@gmail.com>
* Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt> * Copyright 2017-2018 XMR-Stak <https://github.com/fireice-uk>, <https://github.com/psychocrypt>
* Copyright 2018-2020 SChernykh <https://github.com/SChernykh> * Copyright 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright 2016-2020 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -22,7 +22,6 @@
* along with this program. If not, see <http://www.gnu.org/licenses/>. * along with this program. If not, see <http://www.gnu.org/licenses/>.
*/ */
#include "backend/cuda/wrappers/CudaDevice.h" #include "backend/cuda/wrappers/CudaDevice.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "backend/cuda/CudaThreads.h" #include "backend/cuda/CudaThreads.h"
@@ -41,7 +40,7 @@
xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) : xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
m_index(index) m_index(index)
{ {
auto ctx = CudaLib::alloc(index, bfactor, bsleep); auto *ctx = CudaLib::alloc(index, bfactor, bsleep);
if (!CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID)) { if (!CudaLib::deviceInfo(ctx, 0, 0, Algorithm::INVALID)) {
CudaLib::release(ctx); CudaLib::release(ctx);
@@ -50,7 +49,7 @@ xmrig::CudaDevice::CudaDevice(uint32_t index, int32_t bfactor, int32_t bsleep) :
m_ctx = ctx; m_ctx = ctx;
m_name = CudaLib::deviceName(ctx); m_name = CudaLib::deviceName(ctx);
m_topology = PciTopology(CudaLib::deviceUint(ctx, CudaLib::DevicePciBusID), CudaLib::deviceUint(ctx, CudaLib::DevicePciDeviceID), 0); m_topology = { CudaLib::deviceUint(ctx, CudaLib::DevicePciBusID), CudaLib::deviceUint(ctx, CudaLib::DevicePciDeviceID), 0U };
} }

View File

@@ -5,13 +5,7 @@ if (BUILD_STATIC AND XMRIG_OS_UNIX AND WITH_OPENCL)
endif() endif()
if (WITH_OPENCL) if (WITH_OPENCL)
add_definitions(/DXMRIG_FEATURE_OPENCL) add_definitions(/DXMRIG_FEATURE_OPENCL /DCL_USE_DEPRECATED_OPENCL_1_2_APIS)
add_definitions(/DCL_USE_DEPRECATED_OPENCL_1_2_APIS)
if (XMRIG_OS_APPLE)
add_definitions(/DCL_TARGET_OPENCL_VERSION=120)
elseif (WITH_OPENCL_VERSION)
add_definitions(/DCL_TARGET_OPENCL_VERSION=${WITH_OPENCL_VERSION})
endif()
set(HEADERS_BACKEND_OPENCL set(HEADERS_BACKEND_OPENCL
src/backend/opencl/cl/OclSource.h src/backend/opencl/cl/OclSource.h
@@ -71,6 +65,13 @@ if (WITH_OPENCL)
src/backend/opencl/wrappers/OclPlatform.cpp src/backend/opencl/wrappers/OclPlatform.cpp
) )
if (XMRIG_OS_APPLE)
add_definitions(/DCL_TARGET_OPENCL_VERSION=120)
list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/wrappers/OclDevice_mac.cpp)
elseif (WITH_OPENCL_VERSION)
add_definitions(/DCL_TARGET_OPENCL_VERSION=${WITH_OPENCL_VERSION})
endif()
if (WIN32) if (WIN32)
list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/OclCache_win.cpp) list(APPEND SOURCES_BACKEND_OPENCL src/backend/opencl/OclCache_win.cpp)
else() else()

View File

@@ -1,6 +1,7 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh> * Copyright (c) 2021 Spudz76 <https://github.com/Spudz76>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -17,6 +18,7 @@
*/ */
#include "backend/opencl/wrappers/OclDevice.h" #include "backend/opencl/wrappers/OclDevice.h"
#include "3rdparty/fmt/core.h"
#include "3rdparty/rapidjson/document.h" #include "3rdparty/rapidjson/document.h"
#include "backend/opencl/OclGenerator.h" #include "backend/opencl/OclGenerator.h"
#include "backend/opencl/OclThreads.h" #include "backend/opencl/OclThreads.h"
@@ -30,19 +32,21 @@
#include <algorithm> #include <algorithm>
#include <map>
// NOLINTNEXTLINE(modernize-use-using)
typedef union
{
struct { cl_uint type; cl_uint data[5]; } raw;
struct { cl_uint type; cl_char unused[17]; cl_char bus; cl_char device; cl_char function; } pcie;
} topology_amd;
namespace xmrig { namespace xmrig {
struct topology_amd {
cl_uint type;
cl_char unused[17];
cl_char bus;
cl_char device;
cl_char function;
};
#ifdef XMRIG_ALGO_RANDOMX #ifdef XMRIG_ALGO_RANDOMX
extern bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads); extern bool ocl_generic_rx_generator(const OclDevice &device, const Algorithm &algorithm, OclThreads &threads);
#endif #endif
@@ -81,9 +85,11 @@ static OclVendor getPlatformVendorId(const String &vendor, const String &extensi
return OCL_VENDOR_INTEL; return OCL_VENDOR_INTEL;
} }
# ifdef XMRIG_OS_APPLE
if (extensions.contains("cl_APPLE_") || vendor.contains("Apple")) { if (extensions.contains("cl_APPLE_") || vendor.contains("Apple")) {
return OCL_VENDOR_APPLE; return OCL_VENDOR_APPLE;
} }
# endif
return OCL_VENDOR_UNKNOWN; return OCL_VENDOR_UNKNOWN;
} }
@@ -103,117 +109,16 @@ static OclVendor getVendorId(const String &vendor)
return OCL_VENDOR_INTEL; return OCL_VENDOR_INTEL;
} }
# ifdef XMRIG_OS_APPLE
if (vendor.contains("Apple")) { if (vendor.contains("Apple")) {
return OCL_VENDOR_APPLE; return OCL_VENDOR_APPLE;
} }
# endif
return OCL_VENDOR_UNKNOWN; return OCL_VENDOR_UNKNOWN;
} }
static OclDevice::Type getType(const String &name, const OclVendor platformVendorId)
{
if (platformVendorId == OCL_VENDOR_APPLE) {
// Apple Platform: uses product names, not gfx# or codenames
if (name.contains("AMD Radeon")) {
if (name.contains(" 450 ") ||
name.contains(" 455 ") ||
name.contains(" 460 ")) {
return OclDevice::Baffin;
}
if (name.contains(" 555 ") || name.contains(" 555X ") ||
name.contains(" 560 ") || name.contains(" 560X ") ||
name.contains(" 570 ") || name.contains(" 570X ") ||
name.contains(" 575 ") || name.contains(" 575X ")) {
return OclDevice::Polaris;
}
if (name.contains(" 580 ") || name.contains(" 580X ")) {
return OclDevice::Ellesmere;
}
if (name.contains(" Vega ")) {
if (name.contains(" 48 ") ||
name.contains(" 56 ") ||
name.contains(" 64 ") ||
name.contains(" 64X ")) {
return OclDevice::Vega_10;
}
if (name.contains(" 16 ") ||
name.contains(" 20 ") ||
name.contains(" II ")) {
return OclDevice::Vega_20;
}
}
if (name.contains(" 5700 ") || name.contains(" W5700X ")) {
return OclDevice::Navi_10;
}
if (name.contains(" 5600 ") || name.contains(" 5600M ")) {
return OclDevice::Navi_12;
}
if (name.contains(" 5300 ") || name.contains(" 5300M ") ||
name.contains(" 5500 ") || name.contains(" 5500M ")) {
return OclDevice::Navi_14;
}
if (name.contains(" W6800 ") || name.contains(" W6900X ")) {
return OclDevice::Navi_21;
}
}
}
if (name == "gfx900" || name == "gfx901") {
return OclDevice::Vega_10;
}
if (name == "gfx902" || name == "gfx903") {
return OclDevice::Raven;
}
if (name == "gfx906" || name == "gfx907") {
return OclDevice::Vega_20;
}
if (name == "gfx1010") {
return OclDevice::Navi_10;
}
if (name == "gfx1011") {
return OclDevice::Navi_12;
}
if (name == "gfx1012") {
return OclDevice::Navi_14;
}
if (name == "gfx1030") {
return OclDevice::Navi_21;
}
if (name == "gfx804") {
return OclDevice::Lexa;
}
if (name == "Baffin") {
return OclDevice::Baffin;
}
if (name.contains("Ellesmere")) {
return OclDevice::Ellesmere;
}
if (name == "gfx803" || name.contains("polaris")) {
return OclDevice::Polaris;
}
return OclDevice::Unknown;
}
} // namespace xmrig } // namespace xmrig
@@ -231,21 +136,21 @@ xmrig::OclDevice::OclDevice(uint32_t index, cl_device_id id, cl_platform_id plat
{ {
m_vendorId = getVendorId(m_vendor); m_vendorId = getVendorId(m_vendor);
m_platformVendorId = getPlatformVendorId(m_platformVendor, m_extensions); m_platformVendorId = getPlatformVendorId(m_platformVendor, m_extensions);
m_type = getType(m_name, m_platformVendorId); m_type = getType(m_name);
if (m_extensions.contains("cl_amd_device_attribute_query")) { if (m_extensions.contains("cl_amd_device_attribute_query")) {
topology_amd topology; topology_amd topology{};
if (OclLib::getDeviceInfo(id, CL_DEVICE_TOPOLOGY_AMD, sizeof(topology), &topology) == CL_SUCCESS && topology.type == CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD) {
if (OclLib::getDeviceInfo(id, CL_DEVICE_TOPOLOGY_AMD, sizeof(topology), &topology, nullptr) == CL_SUCCESS && topology.raw.type == CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD) { m_topology = { topology.bus, topology.device, topology.function };
m_topology = PciTopology(static_cast<uint32_t>(topology.pcie.bus), static_cast<uint32_t>(topology.pcie.device), static_cast<uint32_t>(topology.pcie.function));
} }
m_board = OclLib::getString(id, CL_DEVICE_BOARD_NAME_AMD); m_board = OclLib::getString(id, CL_DEVICE_BOARD_NAME_AMD);
} }
else if (m_extensions.contains("cl_nv_device_attribute_query")) { else if (m_extensions.contains("cl_nv_device_attribute_query")) {
cl_uint bus = 0; cl_uint bus = 0;
if (OclLib::getDeviceInfo(id, CL_DEVICE_PCI_BUS_ID_NV, sizeof (bus), &bus, nullptr) == CL_SUCCESS) { if (OclLib::getDeviceInfo(id, CL_DEVICE_PCI_BUS_ID_NV, sizeof(bus), &bus) == CL_SUCCESS) {
cl_uint slot = OclLib::getUint(id, CL_DEVICE_PCI_SLOT_ID_NV); cl_uint slot = OclLib::getUint(id, CL_DEVICE_PCI_SLOT_ID_NV);
m_topology = PciTopology(bus, (slot >> 3) & 0xff, slot & 7); m_topology = { bus, (slot >> 3) & 0xff, slot & 7 };
} }
} }
} }
@@ -253,17 +158,11 @@ xmrig::OclDevice::OclDevice(uint32_t index, cl_device_id id, cl_platform_id plat
xmrig::String xmrig::OclDevice::printableName() const xmrig::String xmrig::OclDevice::printableName() const
{ {
const size_t size = m_board.size() + m_name.size() + 64;
char *buf = new char[size]();
if (m_board.isNull()) { if (m_board.isNull()) {
snprintf(buf, size, GREEN_BOLD("%s"), m_name.data()); return fmt::format(GREEN_BOLD("{}"), m_name).c_str();
}
else {
snprintf(buf, size, GREEN_BOLD("%s") " (" CYAN_BOLD("%s") ")", m_board.data(), m_name.data());
} }
return buf; return fmt::format(GREEN_BOLD("{}") " (" CYAN_BOLD("{}") ")", m_board, m_name).c_str();
} }
@@ -311,3 +210,35 @@ void xmrig::OclDevice::toJSON(rapidjson::Value &out, rapidjson::Document &doc) c
# endif # endif
} }
#endif #endif
#ifndef XMRIG_OS_APPLE
xmrig::OclDevice::Type xmrig::OclDevice::getType(const String &name)
{
static std::map<const char *, OclDevice::Type> types = {
{ "gfx900", Vega_10 },
{ "gfx901", Vega_10 },
{ "gfx902", Raven },
{ "gfx903", Raven },
{ "gfx906", Vega_20 },
{ "gfx907", Vega_20 },
{ "gfx1010", Navi_10 },
{ "gfx1011", Navi_12 },
{ "gfx1012", Navi_14 },
{ "gfx1030", Navi_21 },
{ "gfx804", Lexa },
{ "Baffin", Baffin },
{ "Ellesmere", Ellesmere },
{ "gfx803", Polaris },
{ "polaris", Polaris },
};
for (auto &kv : types) {
if (name.contains(kv.first)) {
return kv.second;
}
}
return OclDevice::Unknown;
}
#endif

View File

@@ -1,6 +1,6 @@
/* XMRig /* XMRig
* Copyright (c) 2018-2021 SChernykh <https://github.com/SChernykh> * Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2021 XMRig <https://github.com/xmrig>, <support@xmrig.com> * Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
* *
* This program is free software: you can redistribute it and/or modify * This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by * it under the terms of the GNU General Public License as published by
@@ -86,6 +86,8 @@ public:
# endif # endif
private: private:
static OclDevice::Type getType(const String &name);
cl_device_id m_id = nullptr; cl_device_id m_id = nullptr;
cl_platform_id m_platform = nullptr; cl_platform_id m_platform = nullptr;
const String m_platformVendor; const String m_platformVendor;

View File

@@ -0,0 +1,77 @@
/* XMRig
* Copyright (c) 2021 Spudz76 <https://github.com/Spudz76>
* Copyright (c) 2018-2024 SChernykh <https://github.com/SChernykh>
* Copyright (c) 2016-2024 XMRig <https://github.com/xmrig>, <support@xmrig.com>
*
* This program is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or
* (at your option) any later version.
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
#include "backend/opencl/wrappers/OclDevice.h"
xmrig::OclDevice::Type xmrig::OclDevice::getType(const String &name)
{
// Apple Platform: uses product names, not gfx# or codenames
if (name.contains("AMD Radeon")) {
if (name.contains(" 450 ") ||
name.contains(" 455 ") ||
name.contains(" 460 ")) {
return Baffin;
}
if (name.contains(" 555 ") || name.contains(" 555X ") ||
name.contains(" 560 ") || name.contains(" 560X ") ||
name.contains(" 570 ") || name.contains(" 570X ") ||
name.contains(" 575 ") || name.contains(" 575X ")) {
return Polaris;
}
if (name.contains(" 580 ") || name.contains(" 580X ")) {
return Ellesmere;
}
if (name.contains(" Vega ")) {
if (name.contains(" 48 ") ||
name.contains(" 56 ") ||
name.contains(" 64 ") ||
name.contains(" 64X ")) {
return Vega_10;
}
if (name.contains(" 16 ") ||
name.contains(" 20 ") ||
name.contains(" II ")) {
return Vega_20;
}
}
if (name.contains(" 5700 ") || name.contains(" W5700X ")) {
return Navi_10;
}
if (name.contains(" 5600 ") || name.contains(" 5600M ")) {
return Navi_12;
}
if (name.contains(" 5300 ") || name.contains(" 5300M ") ||
name.contains(" 5500 ") || name.contains(" 5500M ")) {
return Navi_14;
}
if (name.contains(" W6800 ") || name.contains(" W6900X ")) {
return Navi_21;
}
}
return OclDevice::Unknown;
}

View File

@@ -94,7 +94,13 @@ static inline const std::string &usage()
# ifdef XMRIG_ALGO_RANDOMX # ifdef XMRIG_ALGO_RANDOMX
u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n"; u += " --huge-pages-jit enable huge pages support for RandomX JIT code\n";
# endif # endif
# ifdef XMRIG_FEATURE_ASM
# ifdef XMRIG_FEATURE_ASM_AMD
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n"; u += " --asm=ASM ASM optimizations, possible values: auto, none, intel, ryzen, bulldozer\n";
# else
u += " --asm=ASM ASM optimizations, possible values: auto, none, intel\n";
# endif
# endif
# if defined(__x86_64__) || defined(_M_AMD64) # if defined(__x86_64__) || defined(_M_AMD64)
u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n"; u += " --argon2-impl=IMPL argon2 implementation: x86_64, SSE2, SSSE3, XOP, AVX2, AVX-512F\n";

View File

@@ -55,6 +55,7 @@ bool cn_vaes_enabled = false;
#ifdef XMRIG_FEATURE_ASM #ifdef XMRIG_FEATURE_ASM
#ifdef XMRIG_FEATURE_ASM_AMD
# define ADD_FN_ASM(algo) do { \ # define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \ m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \ m_map[algo]->data[AV_SINGLE][Assembly::RYZEN] = cryptonight_single_hash_asm<algo, Assembly::RYZEN>; \
@@ -63,34 +64,50 @@ bool cn_vaes_enabled = false;
m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \ m_map[algo]->data[AV_DOUBLE][Assembly::RYZEN] = cryptonight_double_hash_asm<algo, Assembly::RYZEN>; \
m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \ m_map[algo]->data[AV_DOUBLE][Assembly::BULLDOZER] = cryptonight_double_hash_asm<algo, Assembly::BULLDOZER>; \
} while (0) } while (0)
#else
# define ADD_FN_ASM(algo) do { \
m_map[algo]->data[AV_SINGLE][Assembly::INTEL] = cryptonight_single_hash_asm<algo, Assembly::INTEL>; \
m_map[algo]->data[AV_DOUBLE][Assembly::INTEL] = cryptonight_double_hash_asm<algo, Assembly::INTEL>; \
} while (0)
#endif
namespace xmrig { namespace xmrig {
cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_half_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_half_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_half_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_zls_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr; cn_mainloop_fun cn_double_mainloop_ivybridge_asm = nullptr;
#ifdef XMRIG_FEATURE_ASM_AMD
cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr; cn_mainloop_fun cn_double_mainloop_ryzen_asm = nullptr;
cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr; cn_mainloop_fun cn_double_mainloop_bulldozer_asm = nullptr;
#endif
cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr; cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm = nullptr;
cn_mainloop_fun cn_upx2_mainloop_asm = nullptr; cn_mainloop_fun cn_upx2_mainloop_asm = nullptr;
@@ -160,31 +177,41 @@ static void patchAsmVariants()
auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false)); auto base = static_cast<uint8_t *>(VirtualMemory::allocateExecutableMemory(allocation_size, false));
cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000); cn_half_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x0000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000); cn_half_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x1000);
cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000); cn_half_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x2000);
# endif
cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000); cn_half_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x3000);
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000); cn_trtl_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x4000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000); cn_trtl_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x5000);
cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000); cn_trtl_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x6000);
# endif
cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000); cn_trtl_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x7000);
# endif # endif
cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000); cn_zls_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x8000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000); cn_zls_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x9000);
cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000); cn_zls_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xA000);
# endif
cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000); cn_zls_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xB000);
cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000); cn_double_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xC000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000); cn_double_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xD000);
cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000); cn_double_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xE000);
# endif
cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000); cn_double_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0xF000);
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000); cn_tlo_mainloop_ivybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x10000);
# ifdef XMRIG_FEATURE_ASM_AMD
cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000); cn_tlo_mainloop_ryzen_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x11000);
cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000); cn_tlo_mainloop_bulldozer_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x12000);
# endif
cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000); cn_tlo_double_mainloop_sandybridge_asm = reinterpret_cast<cn_mainloop_fun> (base + 0x13000);
# endif # endif
@@ -220,8 +247,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_HALF>().iterations();
patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_half_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_half_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_half_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_half_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }
@@ -231,8 +260,10 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask(); constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_0>().mask();
patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK); patchCode(cn_trtl_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK); patchCode(cn_trtl_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK); patchCode(cn_trtl_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK); patchCode(cn_trtl_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
} }
@@ -241,8 +272,10 @@ static void patchAsmVariants()
constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask(); constexpr uint32_t MASK = CnAlgo<Algorithm::CN_PICO_TLO>().mask();
patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK); patchCode(cn_tlo_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER, MASK);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK); patchCode(cn_tlo_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER, MASK);
patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK); patchCode(cn_tlo_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER, MASK);
# endif
patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK); patchCode(cn_tlo_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER, MASK);
} }
# endif # endif
@@ -251,8 +284,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_ZLS>().iterations();
patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_zls_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_zls_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_zls_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_zls_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }
@@ -260,8 +295,10 @@ static void patchAsmVariants()
constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations(); constexpr uint32_t ITER = CnAlgo<Algorithm::CN_DOUBLE>().iterations();
patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER); patchCode(cn_double_mainloop_ivybridge_asm, cnv2_mainloop_ivybridge_asm, ITER);
# ifdef XMRIG_FEATURE_ASM_AMD
patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER); patchCode(cn_double_mainloop_ryzen_asm, cnv2_mainloop_ryzen_asm, ITER);
patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER); patchCode(cn_double_mainloop_bulldozer_asm, cnv2_mainloop_bulldozer_asm, ITER);
# endif
patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER); patchCode(cn_double_double_mainloop_sandybridge_asm, cnv2_double_mainloop_sandybridge_asm, ITER);
} }

View File

@@ -852,12 +852,16 @@ extern "C" void cnv1_single_mainloop_asm(cryptonight_ctx * *ctx);
extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv1_double_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv1_quad_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_ivybridge_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_ryzen_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx); extern "C" void cnv2_mainloop_bulldozer_asm(cryptonight_ctx **ctx);
#endif
extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx); extern "C" void cnv2_double_mainloop_sandybridge_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv2_rwz_mainloop_asm(cryptonight_ctx **ctx);
extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx); extern "C" void cnv2_rwz_double_mainloop_asm(cryptonight_ctx **ctx);
#ifdef XMRIG_FEATURE_ASM_AMD
extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx); extern "C" void cnv2_upx_double_mainloop_zen3_asm(cryptonight_ctx **ctx);
#endif
namespace xmrig { namespace xmrig {
@@ -867,28 +871,38 @@ typedef void (*cn_mainloop_fun)(cryptonight_ctx **ctx);
extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_half_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_half_mainloop_ryzen_asm; extern cn_mainloop_fun cn_half_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_half_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_half_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_trtl_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm; extern cn_mainloop_fun cn_trtl_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_trtl_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_trtl_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_tlo_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm; extern cn_mainloop_fun cn_tlo_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_tlo_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_tlo_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_zls_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm; extern cn_mainloop_fun cn_zls_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_zls_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_zls_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm; extern cn_mainloop_fun cn_double_mainloop_ivybridge_asm;
#ifdef XMRIG_FEATURE_ASM_AMD
extern cn_mainloop_fun cn_double_mainloop_ryzen_asm; extern cn_mainloop_fun cn_double_mainloop_ryzen_asm;
extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm; extern cn_mainloop_fun cn_double_mainloop_bulldozer_asm;
#endif
extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm; extern cn_mainloop_fun cn_double_double_mainloop_sandybridge_asm;
extern cn_mainloop_fun cn_upx2_mainloop_asm; extern cn_mainloop_fun cn_upx2_mainloop_asm;
@@ -964,46 +978,54 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cnv2_mainloop_ivybridge_asm(ctx); cnv2_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cnv2_mainloop_ryzen_asm(ctx); cnv2_mainloop_ryzen_asm(ctx);
} }
else { else {
cnv2_mainloop_bulldozer_asm(ctx); cnv2_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_HALF) { else if (ALGO == Algorithm::CN_HALF) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_half_mainloop_ivybridge_asm(ctx); cn_half_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_half_mainloop_ryzen_asm(ctx); cn_half_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_half_mainloop_bulldozer_asm(ctx); cn_half_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# ifdef XMRIG_ALGO_CN_PICO # ifdef XMRIG_ALGO_CN_PICO
else if (ALGO == Algorithm::CN_PICO_0) { else if (ALGO == Algorithm::CN_PICO_0) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_trtl_mainloop_ivybridge_asm(ctx); cn_trtl_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_trtl_mainloop_ryzen_asm(ctx); cn_trtl_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_trtl_mainloop_bulldozer_asm(ctx); cn_trtl_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_PICO_TLO) { else if (ALGO == Algorithm::CN_PICO_TLO) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_tlo_mainloop_ivybridge_asm(ctx); cn_tlo_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_tlo_mainloop_ryzen_asm(ctx); cn_tlo_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_tlo_mainloop_bulldozer_asm(ctx); cn_tlo_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (ALGO == Algorithm::CN_RWZ) {
@@ -1013,23 +1035,27 @@ inline void cryptonight_single_hash_asm(const uint8_t *__restrict__ input, size_
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_zls_mainloop_ivybridge_asm(ctx); cn_zls_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_zls_mainloop_ryzen_asm(ctx); cn_zls_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_zls_mainloop_bulldozer_asm(ctx); cn_zls_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
else if (ALGO == Algorithm::CN_DOUBLE) { else if (ALGO == Algorithm::CN_DOUBLE) {
if (ASM == Assembly::INTEL) { if (ASM == Assembly::INTEL) {
cn_double_mainloop_ivybridge_asm(ctx); cn_double_mainloop_ivybridge_asm(ctx);
} }
# ifdef XMRIG_FEATURE_ASM_AMD
else if (ASM == Assembly::RYZEN) { else if (ASM == Assembly::RYZEN) {
cn_double_mainloop_ryzen_asm(ctx); cn_double_mainloop_ryzen_asm(ctx);
} }
else { else {
cn_double_mainloop_bulldozer_asm(ctx); cn_double_mainloop_bulldozer_asm(ctx);
} }
# endif
} }
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (ALGO == Algorithm::CN_UPX2) {
@@ -1094,12 +1120,16 @@ inline void cryptonight_double_hash_asm(const uint8_t *__restrict__ input, size_
# endif # endif
# ifdef XMRIG_ALGO_CN_FEMTO # ifdef XMRIG_ALGO_CN_FEMTO
else if (ALGO == Algorithm::CN_UPX2) { else if (ALGO == Algorithm::CN_UPX2) {
# ifdef XMRIG_FEATURE_ASM_AMD
if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) { if (Cpu::info()->arch() == ICpuInfo::ARCH_ZEN3) {
cnv2_upx_double_mainloop_zen3_asm(ctx); cnv2_upx_double_mainloop_zen3_asm(ctx);
} }
else { else {
cn_upx2_double_mainloop_asm(ctx); cn_upx2_double_mainloop_asm(ctx);
} }
# else
cn_upx2_double_mainloop_asm(ctx);
# endif
} }
# endif # endif
else if (ALGO == Algorithm::CN_RWZ) { else if (ALGO == Algorithm::CN_RWZ) {

View File

@@ -15,12 +15,16 @@
.global FN_PREFIX(cnv1_double_mainloop_asm) .global FN_PREFIX(cnv1_double_mainloop_asm)
.global FN_PREFIX(cnv1_quad_mainloop_asm) .global FN_PREFIX(cnv1_quad_mainloop_asm)
.global FN_PREFIX(cnv2_mainloop_ivybridge_asm) .global FN_PREFIX(cnv2_mainloop_ivybridge_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_mainloop_ryzen_asm) .global FN_PREFIX(cnv2_mainloop_ryzen_asm)
.global FN_PREFIX(cnv2_mainloop_bulldozer_asm) .global FN_PREFIX(cnv2_mainloop_bulldozer_asm)
#endif
.global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm) .global FN_PREFIX(cnv2_double_mainloop_sandybridge_asm)
.global FN_PREFIX(cnv2_rwz_mainloop_asm) .global FN_PREFIX(cnv2_rwz_mainloop_asm)
.global FN_PREFIX(cnv2_rwz_double_mainloop_asm) .global FN_PREFIX(cnv2_rwz_double_mainloop_asm)
#ifdef XMRIG_FEATURE_ASM_AMD
.global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm) .global FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm)
#endif
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv1_single_mainloop_asm): FN_PREFIX(cnv1_single_mainloop_asm):
@@ -58,6 +62,7 @@ FN_PREFIX(cnv2_mainloop_ivybridge_asm):
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_mainloop_ryzen_asm): FN_PREFIX(cnv2_mainloop_ryzen_asm):
sub rsp, 48 sub rsp, 48
@@ -75,6 +80,7 @@ FN_PREFIX(cnv2_mainloop_bulldozer_asm):
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_double_mainloop_sandybridge_asm): FN_PREFIX(cnv2_double_mainloop_sandybridge_asm):
@@ -103,6 +109,7 @@ FN_PREFIX(cnv2_rwz_double_mainloop_asm):
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm): FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
sub rsp, 48 sub rsp, 48
@@ -111,6 +118,7 @@ FN_PREFIX(cnv2_upx_double_mainloop_zen3_asm):
add rsp, 48 add rsp, 48
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
#if defined(__linux__) && defined(__ELF__) #if defined(__linux__) && defined(__ELF__)
.section .note.GNU-stack,"",%progbits .section .note.GNU-stack,"",%progbits

View File

@@ -5,12 +5,16 @@
.global cnv1_double_mainloop_asm .global cnv1_double_mainloop_asm
.global cnv1_quad_mainloop_asm .global cnv1_quad_mainloop_asm
.global cnv2_mainloop_ivybridge_asm .global cnv2_mainloop_ivybridge_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_mainloop_ryzen_asm .global cnv2_mainloop_ryzen_asm
.global cnv2_mainloop_bulldozer_asm .global cnv2_mainloop_bulldozer_asm
#endif
.global cnv2_double_mainloop_sandybridge_asm .global cnv2_double_mainloop_sandybridge_asm
.global cnv2_rwz_mainloop_asm .global cnv2_rwz_mainloop_asm
.global cnv2_rwz_double_mainloop_asm .global cnv2_rwz_double_mainloop_asm
#ifdef XMRIG_FEATURE_ASM_AMD
.global cnv2_upx_double_mainloop_zen3_asm .global cnv2_upx_double_mainloop_zen3_asm
#endif
ALIGN(64) ALIGN(64)
cnv1_single_mainloop_asm: cnv1_single_mainloop_asm:
@@ -36,6 +40,7 @@ cnv2_mainloop_ivybridge_asm:
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
cnv2_mainloop_ryzen_asm: cnv2_mainloop_ryzen_asm:
#include "../cn2/cnv2_main_loop_ryzen.inc" #include "../cn2/cnv2_main_loop_ryzen.inc"
@@ -47,6 +52,7 @@ cnv2_mainloop_bulldozer_asm:
#include "../cn2/cnv2_main_loop_bulldozer.inc" #include "../cn2/cnv2_main_loop_bulldozer.inc"
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif
ALIGN(64) ALIGN(64)
cnv2_double_mainloop_sandybridge_asm: cnv2_double_mainloop_sandybridge_asm:
@@ -66,8 +72,10 @@ cnv2_rwz_double_mainloop_asm:
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#ifdef XMRIG_FEATURE_ASM_AMD
ALIGN(64) ALIGN(64)
cnv2_upx_double_mainloop_zen3_asm: cnv2_upx_double_mainloop_zen3_asm:
#include "cn2/cnv2_upx_double_mainloop_zen3.inc" #include "cn2/cnv2_upx_double_mainloop_zen3.inc"
ret 0 ret 0
mov eax, 3735929054 mov eax, 3735929054
#endif

View File

@@ -41,10 +41,12 @@ randomx_vm *xmrig::RxVm::create(RxDataset *dataset, uint8_t *scratchpad, bool so
flags |= RANDOMX_FLAG_JIT; flags |= RANDOMX_FLAG_JIT;
} }
# ifdef XMRIG_FEATURE_ASM_AMD
const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id(); const auto asmId = assembly == Assembly::AUTO ? Cpu::info()->assembly() : assembly.id();
if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) { if ((asmId == Assembly::RYZEN) || (asmId == Assembly::BULLDOZER)) {
flags |= RANDOMX_FLAG_AMD; flags |= RANDOMX_FLAG_AMD;
} }
# endif
return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node); return randomx_create_vm(static_cast<randomx_flags>(flags), !dataset->get() ? dataset->cache()->get() : nullptr, dataset->get(), scratchpad, node);
} }