1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-07 16:05:05 -05:00

Compare commits

...

888 Commits

Author SHA1 Message Date
XMRig
9138690126 v6.23.0-dev 2025-06-16 02:05:43 +07:00
XMRig
d58061c903 Add detection for _aligned_malloc. 2025-06-15 20:06:19 +07:00
XMRig
3b863cf88f Fixed __umul128 for MSVC ARM64. 2025-06-15 04:58:03 +07:00
XMRig
9c7468df64 Fixed user agent string. 2025-06-15 00:21:23 +07:00
xmrig
a18fa269a6 Merge pull request #3666 from SChernykh/dev
Better detection of aligned malloc functions
2025-06-14 23:09:05 +07:00
SChernykh
bcc5581535 Better detection of aligned malloc functions 2025-06-14 18:00:27 +02:00
XMRig
dba336aa04 Update hwloc for MSVC. 2025-06-14 22:11:33 +07:00
XMRig
3ff41f7c94 Fixed UTF-8 paths support for the config file with Clang compiler on Windows ARM64. 2025-06-14 15:38:25 +07:00
XMRig
faa3d55123 Remove deprecated -Ofast for Clang. 2025-06-13 21:53:03 +07:00
XMRig
9e7cf69ac3 Detect CPU name and AES instructions on Windows ARM64. 2025-06-13 21:02:10 +07:00
XMRig
57a4998ae2 Fix Linux build. 2025-06-13 04:05:30 +07:00
XMRig
34b4448a81 Split BasicCpuInfo_arm. 2025-06-13 03:57:13 +07:00
XMRig
650d794fb1 Initial Windows ARM64 support via MSYS2. 2025-06-13 03:00:34 +07:00
XMRig
064a61988a Update deps scripts. 2025-06-12 00:52:49 +07:00
xmrig
2ab7f85ccd Merge pull request #3665 from SChernykh/dev
Tweaked autoconfig for AMD CPUs with < 2 MB L3 cache per thread
2025-06-11 23:40:46 +07:00
SChernykh
e4c30eb0dd Tweaked autoconfig for AMD CPUs with < 2 MB L3 cache per thread 2025-06-11 18:34:50 +02:00
XMRig
d4e57d9427 Fix LLHTTP_EXPORT 2025-06-10 03:13:34 +07:00
XMRig
9a71190ca1 Update llhttp to 9.3.0 2025-06-09 03:02:26 +07:00
XMRig
a7dcbb143e Bump minimum CMake version to 3.10 2025-06-08 23:23:40 +07:00
XMRig
a6a0f80b12 Fix header path. 2025-06-06 14:42:49 +07:00
XMRig
682834b87d Universal fix for NaN and Infinity in JSON output 2025-06-06 14:36:21 +07:00
XMRig
184d6100dc Update rapidjson 2025-06-05 01:22:31 +07:00
XMRig
0c52d789a9 v6.22.4-dev 2025-06-04 18:59:39 +07:00
XMRig
e33334f11a Merge branch 'master' into dev 2025-06-04 18:58:55 +07:00
XMRig
6184224a66 v6.22.3 2025-06-04 18:11:51 +07:00
XMRig
f499155032 Merge branch 'dev' 2025-06-04 18:11:14 +07:00
xmrig
a32b688dcf Update CHANGELOG.md 2025-06-04 01:47:57 +07:00
XMRig
35b334d58a Fixed compile warning. 2025-05-31 01:12:00 +07:00
XMRig
33623492fe Allow run generate_cl.js from the scripts directory. 2025-05-30 01:47:08 +07:00
xmrig
77009bd0d1 Merge pull request #3662 from ybh1998/keccak_f800
Fix type of `keccak_f800`
2025-05-30 01:04:22 +07:00
ybh1998
46572dcb3d Fix type of keccak_f800 2025-05-30 01:57:08 +08:00
xmrig
0d9af3347d Merge pull request #3652 from SChernykh/dev
Fixed HttpsClient::flush logic
2025-04-17 16:12:31 +07:00
SChernykh
d24e13e605 Fixed HttpsClient::flush logic
- Don't write empty buffers
- Don't write if an error was returned
2025-04-17 10:32:14 +02:00
xmrig
36fdfa2694 Merge pull request #3646 from SChernykh/dev
Optimized autoconfig for AMD CPUs with < 2 MB L3 cache per thread
2025-03-22 18:36:09 +07:00
SChernykh
6cfc02d24f Optimized autoconfig for AMD CPUs with < 2 MB L3 cache per thread 2025-03-22 11:34:23 +01:00
XMRig
16ecb8f085 Allow use of the previous CUDA plugin version with a warning. 2024-12-23 23:14:06 +07:00
xmrig
0229c65232 Merge pull request #3605 from SChernykh/dev
CUDA backend: update RandomX dataset when it changes
2024-12-18 22:36:08 +07:00
SChernykh
4a13a8a75c CUDA backend: update RandomX dataset when it changes 2024-12-18 13:45:10 +01:00
XMRig
cd2fd9d7a6 Simplified getting PCI topology for the OpenCL backend. 2024-11-08 13:03:35 +07:00
XMRig
064cd3ef20 Fixed and simplified OpenCL GPU type detection. 2024-11-08 07:09:35 +07:00
XMRig
e8bbd134f9 v6.22.3-dev 2024-11-03 15:06:54 +07:00
XMRig
cf86a1e05c Merge branch 'master' into dev 2024-11-03 15:06:22 +07:00
XMRig
f9e990d0f0 v6.22.2 2024-11-03 14:38:44 +07:00
XMRig
200f23bba7 Merge branch 'dev' 2024-11-03 14:38:00 +07:00
xmrig
4234b20e21 Update CHANGELOG.md 2024-11-03 14:31:17 +07:00
xmrig
c5d8b8265b Merge pull request #3571 from SChernykh/dev
Fix number of threads on the new Intel Core Ultra CPUs
2024-10-25 20:55:35 +07:00
SChernykh
77c14c8362 Fix number of threads on the new Intel Core Ultra CPUs 2024-10-25 13:44:24 +02:00
xmrig
8b03750806 Merge pull request #3569 from SChernykh/dev
Fix: don't use NaN in hashrate calculations
2024-10-23 17:18:36 +07:00
SChernykh
40949f2767 Fix: don't use NaN in hashrate calculations 2024-10-23 11:40:27 +02:00
XMRig
56c447e02a v6.22.2-dev 2024-10-23 13:36:56 +07:00
XMRig
21c206f05d Merge branch 'master' into dev 2024-10-23 13:36:19 +07:00
XMRig
ee65b3d159 v6.22.1 2024-10-23 12:53:06 +07:00
XMRig
1f75d198d8 Merge branch 'dev' 2024-10-23 12:52:16 +07:00
xmrig
5cf2422766 Update CHANGELOG.md 2024-10-22 17:34:07 +07:00
XMRig
a32f9b5b04 Fixed --version output on ARM. 2024-10-21 08:48:58 +07:00
XMRig
8a4792f638 Update hwloc for MSVC. 2024-10-21 08:31:52 +07:00
XMRig
e32731b60b Update deps 2024-10-20 09:49:06 +07:00
xmrig
e1ae367084 Merge pull request #3540 from SChernykh/dev
Detect AMD engineering samples in randomx_boost.sh
2024-08-29 19:50:43 +07:00
SChernykh
bc1c8358c4 Detect AMD engineering samples in randomx_boost.sh 2024-08-29 14:47:30 +02:00
xmrig
e0af8f0c6b Merge pull request #3539 from SChernykh/dev
Added Zen5 to randomx_boost.sh
2024-08-28 18:51:39 +07:00
SChernykh
29f9c8cf4c Added Zen5 to randomx_boost.sh 2024-08-28 13:49:27 +02:00
xmrig
26f4936f6f Merge pull request #3535 from SChernykh/dev
RandomX: tweaks for Zen5
2024-08-20 06:47:30 +07:00
SChernykh
a411ee3565 RandomX: tweaks for Zen5 2024-08-19 21:01:49 +02:00
xmrig
01bd0d48a1 Merge pull request #3534 from SChernykh/dev
Fixed threads auto-config on Zen5
2024-08-17 06:23:49 +07:00
SChernykh
20d555668b Fixed threads auto-config on Zen5 2024-08-16 23:36:22 +02:00
xmrig
56baec762f Merge pull request #3531 from SChernykh/dev
Always reset nonce on RandomX dataset change
2024-08-14 22:16:34 +07:00
SChernykh
17a52fb418 Always reset nonce on RandomX dataset change
Also never get a new job when mining is paused
2024-08-14 16:41:03 +02:00
XMRig
7e4caa8929 Merge remote-tracking branch 'remotes/origin/master' into dev 2024-08-12 03:02:19 +07:00
xmrig
ef14d55aa5 Merge pull request #3529 from eltociear/patch-1
docs: update ghostrider/README.md
2024-08-12 03:01:13 +07:00
XMRig
5776fdcc20 v6.22.1-dev 2024-08-12 02:15:08 +07:00
XMRig
fe0f69031b Merge branch 'master' into dev 2024-08-12 02:14:40 +07:00
Ikko Eltociear Ashimine
e682f89298 docs: update ghostrider/README.md
nubmer -> number
2024-08-12 03:54:26 +09:00
XMRig
544c393f78 v6.22.0 2024-08-12 01:13:51 +07:00
XMRig
9da6ea07bd Merge branch 'dev' 2024-08-12 01:13:29 +07:00
XMRig
62bcd6e5dc v6.22.0-dev 2024-08-10 22:00:42 +07:00
xmrig
c5f98fc5c7 Merge pull request #3528 from SChernykh/dev
Added rx/yada OpenCL support
2024-08-07 13:36:55 +07:00
SChernykh
ecb3ec0317 Added rx/yada OpenCL support 2024-08-07 00:18:51 +02:00
XMRig
3dfeed475f Sync changes with the proxy. 2024-08-06 23:32:20 +07:00
XMRig
98c775703e Don't generate "rx/yada" profile, use the "rx" profile by default. 2024-08-04 20:00:12 +07:00
XMRig
8da49f2650 More clean target parse. 2024-08-04 19:51:11 +07:00
xmrig
4570187459 Merge pull request #3525 from SChernykh/dev
Added Zen5 detection
2024-08-03 22:58:00 +07:00
SChernykh
748365d6e3 Added Zen5 detection
Preliminary Zen5 support, MSR mod is not ready yet.
2024-08-03 11:01:18 +02:00
xmrig
dd7e0e520d Merge pull request #3524 from SChernykh/dev
Fixed ARMv8 compilation
2024-08-02 23:47:21 +07:00
SChernykh
ef6fb728b5 Fixed ARMv8 compilation 2024-08-02 17:51:08 +02:00
xmrig
92ffcd34d6 Merge pull request #2411 from pdxwebdev/feature/yadacoin
Added support for Yada (rx/yada algorithm)
2024-08-02 16:22:50 +07:00
Matthew Vogel
b108845627 fix yada nonce offset 2024-08-01 15:10:20 -07:00
Matthew Vogel
046b2a17d3 finish updating for yadacoin 2024-08-01 00:01:09 -07:00
Matthew Vogel
5342f25fbf update constants for yadacoin 2024-07-31 23:45:34 -07:00
Matthew Vogel
5f6bcfe949 add yada constants 2024-07-31 23:26:37 -07:00
xmrig
ecef382326 Merge pull request #3522 from SChernykh/dev
Removed rx/keva
2024-07-31 15:41:25 +07:00
SChernykh
86f5db19d2 Removed rx/keva
Keva coin is too small now.
2024-07-31 08:28:05 +02:00
xmrig
b4a47d6ed0 Merge pull request #3518 from SChernykh/dev
Make Json::normalize more strict
2024-07-29 22:27:29 +07:00
SChernykh
f5095247e8 Make Json::normalize more strict
Rounding a regular FP value can give an invalid result - check the result too.
2024-07-29 17:14:21 +02:00
XMRig
2bb07fe633 #3515 Update build scripts for OpenSSL. 2024-07-24 21:02:53 +07:00
XMRig
a7be8cb80c Remove chdir call after fork. 2024-06-05 03:45:37 +07:00
XMRig
2ce16df423 Create signal handles after fork() call, replace #3492. 2024-06-05 03:23:58 +07:00
XMRig
5eaa6c152e v6.21.4-dev 2024-04-23 16:51:58 +07:00
XMRig
6972f727c1 Merge branch 'master' into dev 2024-04-23 16:50:58 +07:00
XMRig
7897f10c48 v6.21.3 2024-04-23 16:27:24 +07:00
XMRig
da2fb331b3 Merge branch 'dev' 2024-04-23 16:26:18 +07:00
xmrig
57f3e9c3da Update CHANGELOG.md 2024-04-23 16:17:26 +07:00
xmrig
1efe7e9562 Merge pull request #3462 from SChernykh/dev
RandomX: correct memcpy size for JIT initialization
2024-04-14 17:01:16 +07:00
SChernykh
caae7c64f0 RandomX: correct memcpy size for JIT initialization
No buffer overflow, better fix for `_FORTIFY_SOURCE`
2024-04-14 09:13:00 +02:00
xmrig
9fbdcc0ef0 Merge pull request #3461 from SChernykh/dev
RandomX: check pointer sizes during JIT initialization
2024-04-14 05:38:53 +07:00
SChernykh
c7c26d97fe RandomX: check pointer sizes during JIT initialization 2024-04-13 20:32:16 +02:00
XMRig
1f7e635b04 Use internal logger for error message. 2024-03-26 21:46:18 +07:00
XMRig
1c5786e3c5 v6.21.3-dev 2024-03-23 16:21:54 +07:00
XMRig
44eb4f0038 Merge branch 'master' into dev 2024-03-23 16:20:24 +07:00
XMRig
4ab9329dda v6.21.2 2024-03-23 13:38:42 +07:00
XMRig
0c2ee013a7 Merge branch 'dev' 2024-03-23 13:38:05 +07:00
xmrig
3347537635 Update CHANGELOG.md 2024-03-23 00:46:15 +07:00
XMRig
7a85257ad4 Update hwloc for MSVC builds. 2024-03-22 18:14:39 +07:00
XMRig
850b43c079 Fix build with recent libuv. 2024-03-22 01:22:54 +07:00
XMRig
b8e4eaac87 Fix rapidjson assert. 2024-03-21 21:03:35 +07:00
xmrig
b9dd5e3eae Merge pull request #3450 from SChernykh/dev
Fix RandomX crash when compiled with fortify_source
2024-03-21 04:09:05 +07:00
SChernykh
032c28d50a Merge remote-tracking branch 'upstream/dev' into dev 2024-03-20 21:24:58 +01:00
SChernykh
f6c50b5393 Fix RandomX crash when compiled with fortify_source 2024-03-20 21:24:02 +01:00
SChernykh
e65e283aac Merge remote-tracking branch 'upstream/dev' into dev 2024-03-20 21:22:11 +01:00
XMRig
5552e1f864 Fix scripts for systems without bash. 2024-03-21 02:13:01 +07:00
XMRig
3beccae136 Merge branch 'goodmost-master' into dev 2024-03-20 14:11:53 +07:00
XMRig
ef9bf2aa8c Merge branch 'master' of https://github.com/goodmost/xmrig into goodmost-master 2024-03-20 14:11:28 +07:00
XMRig
42f645fa3b Merge branch 'dev' of github.com:xmrig/xmrig into dev 2024-03-20 00:25:21 +07:00
XMRig
1fb5be6c1d Update deps. 2024-03-20 00:24:46 +07:00
goodmost
08c43b7e58 chore: remove repetitive words
Signed-off-by: goodmost <zhaohaiyang@outlook.com>
2024-03-19 23:19:36 +08:00
xmrig
7b016fd9ce Merge pull request #3436 from SChernykh/dev
Thread-safe FileLogWriter
2024-03-14 21:46:45 +07:00
SChernykh
688d4f5ee1 Thread-safe FileLogWriter 2024-03-04 08:45:22 +01:00
xmrig
64913e3163 Merge pull request #3434 from SChernykh/dev
Update bug_report.md
2024-02-29 14:33:07 +07:00
SChernykh
48fa095e3e Update bug_report.md 2024-02-29 08:31:16 +01:00
XMRig
c9b9ef51ee #2800 Fixed donation with ghostrider algorithm for builds without KawPow algorithm. 2024-02-29 09:38:47 +07:00
xmrig
dd782c7001 Merge pull request #3431 from SChernykh/dev
Stratum: better check of the login response
2024-02-28 11:25:34 +07:00
SChernykh
b49197f808 Stratum: better check of the login response 2024-02-27 23:39:23 +01:00
XMRig
f9c4c57216 v6.21.2-dev 2024-02-25 23:00:45 +07:00
XMRig
a5b8b85967 Merge branch 'master' into dev 2024-02-25 23:00:11 +07:00
XMRig
a5aa2c9042 v6.21.1 2024-02-25 22:26:52 +07:00
XMRig
fa35a32eee Merge branch 'dev' 2024-02-25 22:25:41 +07:00
XMRig
7b6ce59821 Update CHANGELOG.md. 2024-02-22 03:26:41 +07:00
XMRig
33315ba2ef Merge branch 'Daviey-HTTPRebindSegFault' into dev 2024-02-12 14:51:34 +07:00
XMRig
2c9c40d623 Merge branch 'HTTPRebindSegFault' of https://github.com/Daviey/xmrig into Daviey-HTTPRebindSegFault 2024-02-12 14:50:48 +07:00
Dave Walker (Daviey)
daa6328418 Fix segfault in HTTP API rebind
Previously with HTTP API enabled on brenchmarking run, it is possible
to cause a segfault due to an issue handling the m_httpd pointer and
rebinding.

  - Initialize m_httpd to nullptr to indicate when it's not in use.
  - Safely delete m_httpd in Api's destructor to prevent use-after-free
    issues.
  - Add checks to ensure m_httpd is not nullptr before usage in start,
    stop, and tick methods.
  - Log errors for HTTP server start failures to aid in debugging.

Fixes MoneroOcean/xmrig#120

Signed-off-by: Dave Walker (Daviey) <email@daviey.com>
2024-02-11 17:52:36 +00:00
XMRig
8afd4d5f2f Cleanup. 2024-01-17 00:31:16 +07:00
xmrig
77e2f3a028 Merge pull request #3399 from SChernykh/dev
Fixed Zephyr mining (OpenCL)
2024-01-14 09:01:44 +07:00
SChernykh
206295c6cb Fixed Zephyr mining (OpenCL) 2024-01-13 20:14:08 +01:00
XMRig
07e1e77c4f Code style cleanup. 2023-12-29 21:17:19 +07:00
xmrig
50a98a4bb1 Merge pull request #3391 from moneromooo-monero/tf-dev
add support for townforge (monero fork using randomx)
2023-12-27 23:13:54 +07:00
moneromooo-monero
c50369d65d add support for townforge (monero fork using randomx) 2023-12-23 15:31:05 +00:00
XMRig
592b0c9c76 v6.21.1-dev 2023-11-23 21:19:36 +07:00
XMRig
89eab0eff2 Merge branch 'master' into dev 2023-11-23 21:18:21 +07:00
XMRig
8084ff37a5 v6.21.0 2023-11-23 20:40:58 +07:00
XMRig
7cf3db7750 Merge branch 'dev' 2023-11-23 20:40:34 +07:00
XMRig
4bda6e054d v6.21.0-dev 2023-11-23 19:51:41 +07:00
xmrig
64a0ed413b Merge pull request #3358 from SChernykh/dev
Zephyr solo mining: handle multiple outputs
2023-11-15 22:36:35 +07:00
SChernykh
0b59b7eb43 Zephyr solo mining: handle multiple outputs 2023-11-15 16:18:05 +01:00
xmrig
ae6b10b5a4 Merge pull request #3356 from SChernykh/dev
Updated pricing record size for Zephyr solo mining
2023-11-15 08:27:02 +07:00
SChernykh
705a7eac0c Updated pricing record size for Zephyr solo mining 2023-11-14 13:06:10 +01:00
xmrig
10bfffe033 Merge pull request #3348 from SChernykh/dev
Update to latest sse2neon.h
2023-10-31 11:52:38 +07:00
SChernykh
4131aa4754 Update sse2neon.h 2023-10-30 20:07:03 +01:00
xmrig
fee51b20fa Merge pull request #3346 from SChernykh/dev
ARM64 JIT: don't use `x18` register
2023-10-20 07:36:12 +07:00
SChernykh
5e66efabcf ARM64 JIT: don't use x18 register
From https://developer.apple.com/documentation/xcode/writing-arm64-code-for-apple-platforms
> The platforms reserve register x18. Don’t use this register.

This PR fixes invalid hashes when running on Apple silicon with the latest macOS SDK.
2023-10-19 17:45:15 +02:00
XMRig
08901a9a4b Merge branch 'JacksonZ03-main' into dev 2023-10-09 15:15:32 +07:00
XMRig
a19f590ee6 Merge branch 'main' of https://github.com/JacksonZ03/xmrig into JacksonZ03-main 2023-10-09 15:14:50 +07:00
Jackson Zheng
2fa754825d Update cn_main_loop.asm
Found this line to be missing. I looked through the history and seemed like the original author of the commit missed it out.
2023-10-08 23:29:52 +01:00
Jackson Zheng
f3446c0a94 Update cn_main_loop.asm
I was scanning the code and found this line to be missing. Not sure if this was a mistake or if it was intentionally left out?
2023-10-08 23:12:58 +01:00
xmrig
71209d4cd7 Merge pull request #3339 from SChernykh/dev
Added SNI option for TLS connections
2023-09-29 19:15:29 +07:00
SChernykh
0a3313cb76 Added SNI option for TLS connections
Disabled by default, add `"sni": true,` to pool config to enable it.
2023-09-29 08:33:49 +02:00
xmrig
e855723cd9 Merge pull request #3320 from SChernykh/dev
Add "built for OS/architecture/bits" to "ABOUT"
2023-08-21 19:00:14 +07:00
SChernykh
6e294bd046 Add "built for OS/architecture/bits" to "ABOUT"
To make it more clear what binary it is on some XMRig screenshot.
2023-08-21 13:49:21 +02:00
XMRig
dfe70d9ea7 Fixed huge pages availability info on Linux. 2023-08-08 17:48:44 +07:00
XMRig
2ecf10cdcb Make Platform::hasKeepalive() constexpr where always supported and code cleanup. 2023-08-06 20:26:07 +07:00
xmrig
b55ca8e547 Merge pull request #3312 from SChernykh/dev
Disable TCP keepalive before closing socket
2023-08-06 20:14:37 +07:00
SChernykh
12577df7ba Disable TCP keepalive before closing socket 2023-08-06 14:51:25 +02:00
xmrig
64f5bb467a Merge pull request #3302 from SChernykh/dev
Enabled keepalive for Windows (>= Vista)
2023-07-17 17:17:39 +07:00
SChernykh
5717e72367 Enabled keepalive for Windows (>= Vista) 2023-07-17 09:49:10 +02:00
XMRig
e7de104d88 v6.20.1-dev 2023-07-03 18:47:55 +07:00
XMRig
3b5e04b1b7 Merge branch 'master' into dev 2023-07-03 18:47:22 +07:00
XMRig
2e77faa80c v6.20.0 2023-07-03 12:42:00 +07:00
XMRig
6e63a246bf Merge branch 'dev' 2023-07-03 12:41:35 +07:00
XMRig
09abc81255 v6.20.0-dev 2023-07-03 12:37:36 +07:00
xmrig
fc698f7bcf Merge pull request #3291 from SChernykh/dev
Zephyr solo mining: fix for blocks with transactions
2023-06-24 20:22:53 +07:00
SChernykh
cb2f8fd453 Zephyr solo mining: fix for blocks with transactions 2023-06-24 15:15:37 +02:00
xmrig
59c6c42ceb Merge pull request #3290 from SChernykh/dev
Zephyr coin support
2023-06-24 19:53:54 +07:00
SChernykh
6c10cc5a4b Zephyr coin support
Solo mining will require `--coin Zephyr` in command line, or `"coin": "Zephyr",` in `pools` section of config.json
2023-06-24 14:37:20 +02:00
xmrig
d5a8f8a5ae Merge pull request #3288 from SChernykh/dev
KawPow: fixed data race when building programs
2023-06-19 17:40:24 +07:00
SChernykh
d94d052e6c KawPow: fixed data race when building programs
`uv_queue_work` can't be called from other threads, only `uv_async_send` is thread-safe.
2023-06-19 12:32:28 +02:00
XMRig
ae2b7e3348 Merge branch 'Spudz76-dev-addApiRebind' into dev 2023-06-07 20:49:34 +07:00
XMRig
7d7f30701f Code cleanup. 2023-06-07 20:48:56 +07:00
XMRig
e80fc25789 Merge branch 'dev-addApiRebind' of https://github.com/Spudz76/xmrig into Spudz76-dev-addApiRebind 2023-06-07 20:12:58 +07:00
XMRig
ff53be5f3b Merge branch 'benthetechguy-readme' into dev 2023-06-07 00:52:37 +07:00
XMRig
6981e68ae3 Merge branch 'readme' of https://github.com/benthetechguy/xmrig into benthetechguy-readme 2023-06-07 00:52:03 +07:00
XMRig
c7e541d84f Disallow direct use of HwlocCpuInfo class. 2023-06-07 00:32:09 +07:00
XMRig
a2ae17b4c4 Code cleanup. 2023-06-06 23:15:58 +07:00
XMRig
554b60966b Fixed compatibility with hwloc 1.11. 2023-06-06 02:30:10 +07:00
xmrig
0378aa8df4 Merge pull request #3236 from MrFoxPro/dev
fix(cuda): receive CUDA loader error on linux too.
2023-06-05 23:07:38 +07:00
XMRig
6dbd46a891 Added new CMake options ARM_V8 and ARM_V7. 2023-06-04 20:32:05 +07:00
XMRig
055db83142 Added new ARM CPU names. 2023-06-04 19:36:53 +07:00
XMRig
cdd5dff337 v6.19.4-dev 2023-06-03 21:14:26 +07:00
XMRig
bc5fe8f456 Merge branch 'master' into dev 2023-06-03 21:13:51 +07:00
XMRig
0bc87345c4 v6.19.3 2023-06-03 19:59:18 +07:00
XMRig
f17d31e61a Merge branch 'dev' 2023-06-03 19:57:36 +07:00
xmrig
e6bf4c0077 Update CHANGELOG.md 2023-06-02 22:12:18 +07:00
xmrig
ff79b8fce4 Merge pull request #3280 from SChernykh/dev
Updated example scripts
2023-06-02 17:47:13 +07:00
SChernykh
af87369e4f Updated example scripts
- Hashvault is top 1 pool now, so changed it to a smaller pool
- node.xmr.to doesn't exist anymore
2023-06-02 09:34:26 +02:00
xmrig
65fc16d5ac Merge pull request #3275 from SChernykh/dev
RandomX: fixed `jccErratum` list
2023-05-26 18:25:57 +07:00
SChernykh
826e23b4c4 Fixed jccErratum list 2023-05-26 12:46:59 +02:00
Tony Butler
548fbb9f71 Add API rebind polling 2023-05-23 16:49:43 -06:00
xmrig
02d45834e1 Merge pull request #3273 from SChernykh/dev
RandomX: fixed undefined behavior
2023-05-23 20:18:32 +07:00
SChernykh
1252a4710e RandomX: fixed undefined behavior
Using an inactive member of a `union` is an undefined behavior in C++
2023-05-23 14:40:12 +02:00
xmrig
5891f1f06b Merge pull request #3271 from SChernykh/opt_genprog
RandomX: optimized program generation
2023-05-22 05:25:32 +07:00
SChernykh
5dcbab7e3a RandomX: optimized program generation 2023-05-21 17:44:20 +02:00
xmrig
7b51e23aa0 Merge pull request #3254 from SChernykh/dev
Tweaked auto-tuning for Intel CPUs
2023-04-19 12:29:58 +07:00
SChernykh
7f7fc363e1 Tweaked auto-tuning for Intel CPUs
Alder Lake and newer CPUs have exclusive L3 cache and benefit from more threads until L3+L2 is filled.
2023-04-18 21:20:45 +02:00
XMRig
c4e1363148 #3245 Improved algorithm negotiation for donation rounds by sending extra information about current mining job. 2023-04-07 23:35:05 +07:00
XMRig
a2e9b3456d v6.19.3-dev 2023-04-04 00:34:54 +07:00
XMRig
4790318685 Merge branch 'master' into dev 2023-04-04 00:34:22 +07:00
XMRig
038c4fbe34 v6.19.2 2023-04-03 22:15:40 +07:00
XMRig
d65d34ef36 Merge branch 'dev' 2023-04-03 22:14:58 +07:00
xmrig
af6647f377 Update CHANGELOG.md 2023-04-03 20:34:35 +07:00
xmrig
8f9adc02c0 Merge pull request #3241 from SChernykh/dev
Sync with changes from proxy
2023-04-03 20:28:38 +07:00
SChernykh
5e0079f012 Sync with changes from proxy 2023-04-03 15:01:40 +02:00
xmrig
dc5e341778 Merge pull request #3240 from koitsu/dev-improve-cmd-files
Improve .cmd files when run by shortcuts on another drive
2023-04-01 12:30:33 +07:00
Jeremy Chadwick
0f81ab4c67 Improve .cmd files when run by shortcuts on another drive 2023-03-31 20:16:00 -07:00
Dmitriy Nikiforov
62a3a98e7d fix(cuda): receive CUDA loader error on linux too. 2023-03-27 18:48:13 +05:00
XMRig
d31b3b7c76 Code style cleanup. 2023-03-25 20:56:25 +07:00
xmrig
e352109431 Merge pull request #3232 from moneromooo-monero/xhd-dev
DaemonClient: new X-Hash-Difficulty HTTP header optimization
2023-03-25 20:51:09 +07:00
moneromooo-monero
88b0385bfe DaemonClient: new X-Hash-Difficulty HTTP header optimization
If the caller knows the difficulty of a PoW hash a given nonce
yields, it can tell the callee via the X-Hash-Difficulty, which
may allow the callee to skip some processing if the difficulty
does not meet some criterion.

In my case, a merge mining proxy can know it's pointless trying
to submit the nonce to a chain with higher difficulty when the
nonce only meets the difficulty for a lower difficulty chain.
2023-03-25 09:48:54 +00:00
xmrig
9508332258 Merge pull request #3230 from SChernykh/dev
Fixed parsing of TX_EXTRA_MERGE_MINING_TAG
2023-03-25 12:39:04 +07:00
SChernykh
bc5c1f7e65 Fixed parsing of TX_EXTRA_MERGE_MINING_TAG 2023-03-24 22:42:26 +01:00
XMRig
22118330e3 v6.19.2-dev 2023-03-23 20:41:00 +07:00
XMRig
240f2450af Merge branch 'master' into dev 2023-03-23 20:40:23 +07:00
XMRig
6e856ca39c v6.19.1 2023-03-23 19:03:09 +07:00
XMRig
6047786f43 Merge branch 'dev' 2023-03-23 19:02:24 +07:00
xmrig
7b8ba9ac09 Update CHANGELOG.md 2023-03-23 18:10:43 +07:00
xmrig
02259fec05 Merge pull request #3228 from SChernykh/dev
Fix build with gcc 13
2023-03-23 18:02:47 +07:00
Matthew Smith
51728b2d55 Fix build with gcc 13
Now some header files are not included transistively with new
libstdc++.

Bug: https://bugs.gentoo.org/895226
2023-03-23 12:01:15 +01:00
XMRig
ebe818a5fb Resolved deprecated methods warnings with OpenSSL 3.0. 2023-03-07 23:51:03 +07:00
xmrig
790a71b030 Merge pull request #3218 from SChernykh/dev
Fix: `--randomx-wrmsr=-1` worked only on Intel
2023-02-27 11:17:01 +07:00
SChernykh
c62622b114 Fix: --randomx-wrmsr=-1 worked only on Intel 2023-02-26 22:31:55 +01:00
xmrig
fc643e2936 Merge pull request #3213 from SChernykh/dev
Fix for 32-bit clang 15
2023-02-19 15:47:28 +07:00
SChernykh
12b9b62ef7 Fix for 32-bit clang 15
Don't define `_mm_cvtsi128_si64` and `_mm_cvtsi64_si128` because clang 15 already has them in its headers.
2023-02-19 09:42:16 +01:00
XMRig
667f636c62 Fixed DnsUvBackend storage cleanup. 2023-02-09 21:45:50 +07:00
XMRig
81e87a6931 Revert changes to fix MSVC build. 2023-02-09 21:28:39 +07:00
XMRig
540b223eab Cleanup. 2023-02-09 13:55:11 +07:00
XMRig
75474be060 Fix warning. 2023-02-03 23:46:58 +07:00
XMRig
49f34e59a6 Partially resolved deprecated methods warnings in OpenSSL 3.0. 2023-02-03 23:08:54 +07:00
XMRig
223add4e22 v6.19.1-dev 2023-02-02 12:27:33 +07:00
XMRig
435fc86120 Merge branch 'master' into dev 2023-02-02 12:27:08 +07:00
XMRig
c0143b90ce v6.19.0 2023-02-02 11:51:11 +07:00
XMRig
c3cdffe86d Merge branch 'dev' 2023-02-02 11:50:29 +07:00
XMRig
8a4da33bea Update scripts/build.*.sh. 2023-01-30 00:19:55 +07:00
XMRig
1c7a339527 v6.19.0-dev (new config options added). 2023-01-29 11:16:37 +07:00
xmrig
490acd6e55 Update CHANGELOG.md 2023-01-29 11:13:28 +07:00
xmrig
6ecf57959b Merge pull request #3202 from SChernykh/dev
Solo mining: added job timeout (default is 15 seconds)
2023-01-29 11:12:22 +07:00
SChernykh
e2c58126e9 Solo mining: added job timeout (default is 15 seconds)
It's important to update jobs frequently to get new transactions into the block template. See https://rucknium.me/posts/monero-pool-transaction-delay/ for more details.
2023-01-28 19:42:02 +01:00
XMRig
0ed4b35cd3 Update hwloc for MSVC builds to 2.9.0. 2023-01-27 01:07:58 +07:00
xmrig
afe2aa4402 Update CHANGELOG.md 2023-01-23 20:54:46 +07:00
XMRig
3f7533a645 Update to latest sse2neon.h. 2023-01-23 20:45:02 +07:00
xmrig
6ef0409086 Merge pull request #3198 from SChernykh/dev
Fixed broken RandomX light mode mining
2023-01-21 22:05:25 +07:00
SChernykh
64b0d9562e Fixed broken RandomX light mode mining
RandomX VMs didn't get updated properly in light mode.
2023-01-21 16:02:47 +01:00
XMRig
770b71c69a #3185 Fixed macOS DMI reader. 2023-01-19 22:09:59 +07:00
xmrig
44642643f8 Merge pull request #3196 from SChernykh/dev
Show IP address for failed connections
2023-01-11 17:02:15 +07:00
SChernykh
273bb84df8 Show IP address for failed connections 2023-01-11 09:28:16 +01:00
xmrig
4d0b8c9daf Merge pull request #3182 from SChernykh/dev
DragonflyBSD compilation fixes
2022-12-17 20:24:01 +07:00
SChernykh
7d4d48e83b DragonflyBSD compilation fixes 2022-12-17 13:11:14 +01:00
xmrig
2ea37cdf37 Merge pull request #3180 from SChernykh/dev
Added ifdefs for DragonflyBSD
2022-12-16 21:29:24 +07:00
SChernykh
a02afe6d4f Added ifdefs for DragonflyBSD
Possible fix for #3179
2022-12-16 15:26:37 +01:00
XMRig
6e86dddc65 Bump the minimum CMake version in other places too. 2022-12-09 16:07:42 +07:00
xmrig
0171faffe7 Merge pull request #3176 from SChernykh/dev
Update cmake required version to 3.1
2022-12-09 15:24:54 +07:00
SChernykh
25decd1b7f Update cmake required version to 3.1
`set(CMAKE_CXX_STANDARD 11)` only works properly starting from cmake 3.1, see #3174
2022-12-09 09:21:40 +01:00
xmrig
354b9ddb34 Merge pull request #3163 from SChernykh/dev
Improved Zen 3 MSR mod
2022-11-18 11:38:45 +07:00
SChernykh
3ad6ab56a5 Improved Zen 3 MSR mod
+0.5% speedup on Ryzen 5 5600X
2022-11-17 23:32:36 +01:00
xmrig
1aa0e37b54 Merge pull request #3161 from SChernykh/dev
MSVC build: enabled parallel compilation
2022-11-15 13:20:41 +06:30
SChernykh
807c64ddb1 MSVC build: enabled parallel compilation 2022-11-15 07:45:54 +01:00
XMRig
5bf90704a6 #2869 2022-10-29 23:51:42 +07:00
xmrig
912d1e362b Merge pull request #3144 from Spudz76/dev-updateSSE2NEON
Update to latest sse2neon.h from github:DLTcollab/sse2neon
2022-10-24 13:50:54 +07:00
Tony Butler
eeb459506c Update to latest sse2neon.h from github:DLTcollab/sse2neon 2022-10-23 15:27:14 -06:00
XMRig
f4ec0287c4 v6.18.2-dev 2022-10-23 23:19:50 +07:00
XMRig
483d6ada3d Merge branch 'master' into dev 2022-10-23 23:19:06 +07:00
XMRig
28e81bd7c0 v6.18.1 2022-10-23 17:44:24 +07:00
XMRig
54e75bc7c4 Merge branch 'dev' 2022-10-23 17:43:38 +07:00
xmrig
c388113a30 Update CHANGELOG.md 2022-10-23 17:14:57 +07:00
xmrig
36afeec225 Merge pull request #3134 from SChernykh/dev
Added Zen4 to randomx_boost.sh
2022-10-10 03:37:06 +07:00
SChernykh
4b5e56416d Added Zen4 to randomx_boost.sh 2022-10-09 22:02:50 +02:00
xmrig
0d314d0469 Merge pull request #3132 from SChernykh/dev
RandomX: added MSR mod for Zen 4
2022-10-01 23:40:04 +07:00
SChernykh
7fc45dfb2d RandomX: added MSR mod for Zen 4
+0.8% faster on Ryzen 9 7950X
2022-10-01 18:33:04 +02:00
xmrig
2ba40edee0 Update CHANGELOG.md 2022-09-25 17:01:33 +07:00
xmrig
bc4dd11761 Merge pull request #3129 from SChernykh/dev
Fix: protectRX flushed CPU cache only on MacOS/iOS
2022-09-22 07:02:28 +07:00
SChernykh
7b52a41459 Fix: protectRX flushed CPU cache only on MacOS/iOS 2022-09-21 15:18:06 +02:00
xmrig
b5de214ff9 Merge pull request #3126 from SChernykh/dev
Don't reset when pool sends the same job blob
2022-09-19 19:03:17 +07:00
SChernykh
8bd3b393ef Update m_size only if blob was set successfully 2022-09-19 10:42:08 +02:00
SChernykh
9223c2f027 Don't reset when pool sends the same job blob 2022-09-19 10:35:36 +02:00
xmrig
6346d36d1b Merge pull request #3120 from SChernykh/dev
RandomX: optimized CFROUND elimination more
2022-09-16 22:50:51 +07:00
SChernykh
93c07e1d34 RandomX: optimized CFROUND elimination more 2022-09-16 14:11:27 +02:00
xmrig
0ba3000982 Merge pull request #3119 from SChernykh/dev
RandomX: optimized CFROUND elimination
2022-09-16 01:04:32 +07:00
SChernykh
f0e7de8c71 RandomX: optimized CFROUND elimination 2022-09-15 19:57:34 +02:00
xmrig
1c4eb6c5fe Merge pull request #3109 from SChernykh/dev
RandomX: added Blake2 AVX2 version
2022-08-26 01:52:22 +07:00
SChernykh
63e21dfe63 RandomX: added Blake2 AVX2 version
+0.1% speedup on AMD Zen2/Zen3 and Intel CPUs which support AVX2.
2022-08-25 20:39:54 +02:00
xmrig
b2d9dab2e3 Merge pull request #3075 from dev-0x7C6/master
Recognize armv7ve as valid ARMv7 target.
2022-08-19 02:30:27 +07:00
xmrig
4c57b60e59 Merge pull request #3082 from SChernykh/dev
Fixed GCC 12 warnings
2022-07-03 16:59:26 +07:00
SChernykh
e6c81d7166 Fixed GCC 12 warnings 2022-07-03 11:51:46 +02:00
xmrig
94840c70d8 Update README.md 2022-07-02 22:27:51 +07:00
XMRig
e1478bfa94 v6.18.1-dev 2022-06-26 18:32:12 +07:00
XMRig
6df6e15267 Merge branch 'master' into dev 2022-06-26 18:31:40 +07:00
XMRig
834ea44507 v6.18.0 2022-06-23 20:04:00 +07:00
XMRig
73dc0ffb7e Merge branch 'dev' 2022-06-23 20:03:29 +07:00
XMRig
e57641d6b1 v6.18.0-dev 2022-06-23 17:36:31 +07:00
XMRig
b324e34444 Update hwloc for msvc. 2022-06-23 16:45:54 +07:00
Bartłomiej Burdukiewicz
7e49fc828d Recognize armv7ve as valid ARMv7 target.
Docs: https://gcc.gnu.org/onlinedocs/gcc/ARM-Options.html

'armv7ve' - The extended version of the ARMv7-A architecture with support for virtualization.

Signed-off-by: Bartłomiej Burdukiewicz <bartlomiej.burdukiewicz@gmail.com>
2022-06-21 18:31:24 +02:00
XMRig
fdfbb60840 Update deps. 2022-06-19 13:39:32 +07:00
xmrig
ee51dec499 Merge pull request #3068 from SChernykh/dev
Better fix for daemon solo mining with ZMQ
2022-06-13 03:37:56 +07:00
SChernykh
575742078c Better fix for daemon solo mining with ZMQ 2022-06-12 22:32:50 +02:00
xmrig
6bab67bced Merge pull request #3067 from SChernykh/dev
Monero v15 network upgrade support and more house keeping
2022-06-13 01:16:34 +07:00
SChernykh
db9069897d Improved daemon ZMQ mining stability 2022-06-12 14:41:47 +02:00
SChernykh
30641b1bdf Fixed ZMQ debug log 2022-06-12 12:33:09 +02:00
SChernykh
45061f40d8 Monero v15 network upgrade support 2022-06-12 11:49:54 +02:00
SChernykh
9f70752090 Fixed debug GhostRider build 2022-06-12 11:47:56 +02:00
SChernykh
22d6a7525e Removed deprecated AstroBWTv1 and v2 2022-06-12 11:47:36 +02:00
benthetechguy
c0bce256e1 Add x86 to README 2022-05-31 21:15:37 -04:00
xmrig
09a7219651 Merge pull request #3055 from benthetechguy/patch-1
Add armv7 to README
2022-05-21 15:50:01 +07:00
benthetechguy
97869f3347 Add armv7 to supported architectures 2022-05-20 23:19:34 -04:00
xmrig
1bbbff7d17 Merge pull request #3054 from SChernykh/dev
Fixes for 32-bit ARM
2022-05-21 09:57:17 +07:00
SChernykh
97683e5719 Fixes for 32-bit ARM 2022-05-20 21:16:10 +02:00
xmrig
059d5d8421 Merge pull request #3051 from SChernykh/dev
Fixed unaligned memory read in DMI
2022-05-20 09:11:26 +07:00
SChernykh
285719cde4 Fixed unaligned memory read in DMI 2022-05-19 20:56:19 +02:00
xmrig
c877ba8145 Merge pull request #3042 from SChernykh/dev
Fixed being unable to resume from pause-on-battery
2022-05-06 02:14:52 +07:00
SChernykh
6793981066 Fixed being unable to resume from pause-on-battery
Fixes #3041
2022-05-05 21:13:02 +02:00
xmrig
1ae9a4e428 Merge pull request #3031 from SChernykh/dev
Fixed --cpu-priority not working sometimes
2022-04-20 07:44:33 +07:00
SChernykh
0e57053c5a Fixed --cpu-priority not working sometimes 2022-04-19 19:57:12 +02:00
xmrig
232d2d6dc5 Merge pull request #3020 from SChernykh/dev
Removed old AstroBWT algorithm
2022-04-15 16:03:26 +07:00
SChernykh
a3cb74f29b Removed old AstroBWT algorithm
It's not used anywhere now.
2022-04-15 10:59:31 +02:00
XMRig
56753d7c4a v6.17.1-dev 2022-04-06 01:58:24 +07:00
XMRig
f7b9e3ca67 Merge branch 'master' into dev 2022-04-06 01:58:03 +07:00
XMRig
56c95703a5 v6.17.0 2022-04-05 21:46:01 +07:00
XMRig
eadf272425 Merge branch 'dev' 2022-04-05 21:45:26 +07:00
XMRig
cb227a0a79 Merge branch 'dev' of github.com:xmrig/xmrig into dev 2022-04-05 15:15:47 +07:00
XMRig
4c171bea1e Disable donate for astrobwt/v2. 2022-04-05 15:15:03 +07:00
xmrig
e55a854314 Update CHANGELOG.md 2022-04-04 20:33:39 +07:00
XMRig
5bdfafd719 v6.17.0-dev 2022-04-04 20:17:40 +07:00
xmrig
15a2091837 Merge pull request #2991 from SChernykh/dev
Fixed compilation error
2022-03-24 22:46:33 +07:00
SChernykh
48bd09f730 Fixed compilation error 2022-03-24 16:38:47 +01:00
xmrig
21fb970949 Merge pull request #2990 from SChernykh/dev
Optimized keccak
2022-03-24 22:22:21 +07:00
SChernykh
23c12fc351 Optimized keccak
Big astrobwt/v2 speedup on non-AVX2 CPUs: **Core i7-2600 +64% (17 -> 28 kh/s)**
2022-03-24 13:10:03 +01:00
xmrig
71d193676a Merge pull request #2974 from SChernykh/dev
Fixed AstroBWT OpenCL config generation
2022-03-16 16:19:08 +07:00
SChernykh
baef34ba8c Fixed AstroBWT OpenCL config generation 2022-03-16 10:15:38 +01:00
xmrig
95a739d821 Merge pull request #2969 from SChernykh/dev
Dero HE (astrobwt/v2) OpenCL support
2022-03-15 08:51:46 +07:00
SChernykh
7b9135aadc Dero HE (astrobwt/v2) OpenCL support 2022-03-14 20:13:31 +01:00
xmrig
e6f694ca9e Merge pull request #2958 from SChernykh/dev
Fixed out of bounds access in astrobwt/v2
2022-03-10 06:40:34 +07:00
xmrig
afd79e7537 Merge pull request #2961 from SChernykh/derohe_cuda
Dero HE (astrobwt/v2) CUDA config generator
2022-03-10 06:37:30 +07:00
SChernykh
a2728af4f7 Dero HE (astrobwt/v2) CUDA config generator 2022-03-10 00:24:49 +01:00
SChernykh
65dbded9c4 Fixed out of bounds access in astrobwt/v2 2022-03-08 22:31:34 +01:00
XMRig
f25e65b5ac Update hwloc for MSVC builds. 2022-03-07 04:29:13 +07:00
XMRig
bbb19ea2f9 #2941 Update deps scripts. 2022-03-07 00:27:49 +07:00
xmrig
1c5b332add Merge pull request #2954 from SChernykh/dev
Dero HE fork support (astrobwt/v2 algorithm)
2022-03-06 04:18:43 +07:00
SChernykh
87fd0ea94a Added alternative algo names for Dero HE 2022-03-05 13:32:16 +01:00
SChernykh
4a42dca2cb Show block/miniblock counters more often 2022-03-05 10:00:39 +01:00
SChernykh
b674fafa0f DaemonClient: fixed broken coin setting 2022-03-05 09:56:30 +01:00
SChernykh
b5da73389f Dero HE fork support (astrobwt/v2 algorithm) 2022-03-05 00:31:18 +01:00
XMRig
bf5e38545c Fixed displayed DMI memory information for empty slots. 2022-03-01 02:50:30 +07:00
xmrig
f7543ada60 Merge pull request #2932 from SChernykh/dev
Fixed GhostRider with hwloc disabled
2022-02-16 19:23:26 +07:00
SChernykh
95e1705fc8 Fixed GhostRider with hwloc disabled 2022-02-16 08:13:48 +01:00
XMRig
2d0b07afbc v6.16.5-dev 2022-02-05 16:22:55 +07:00
XMRig
b33ccf0e0b Merge branch 'master' into dev 2022-02-05 16:21:19 +07:00
XMRig
4f5f9bdffb v6.16.4 2022-02-04 16:11:37 +07:00
XMRig
4d3e3daa6a Merge branch 'dev' 2022-02-04 16:10:58 +07:00
xmrig
802029e5f5 Update CHANGELOG.md 2022-02-04 15:14:46 +07:00
XMRig
14117e9658 #2910 Fixed donation for GhostRider/RTM. 2022-01-31 14:29:41 +07:00
xmrig
7ccb1d65f0 Merge pull request #2908 from Spudz76/dev-addMSVC2022
Add MSVC/2022 to version.h
2022-01-31 10:14:47 +07:00
Tony Butler
15de3cc16c Add MSVC/2022 to version.h 2022-01-28 21:09:24 -07:00
xmrig
124daa4afd Merge pull request #2898 from SChernykh/armv7
Fixed armv7 compilation
2022-01-26 23:38:45 +07:00
xmrig
5de1609b7d Merge pull request #2904 from SChernykh/dev
Fixed unaligned memory accesses
2022-01-26 23:26:03 +07:00
SChernykh
644f4cc017 Fixed unaligned memory accesses 2022-01-26 17:18:18 +01:00
XMRig
41a3f97060 v6.16.4-dev 2022-01-25 23:21:54 +07:00
XMRig
452080cfbd Merge branch 'master' into dev 2022-01-25 23:21:21 +07:00
XMRig
4f103b6b45 v6.16.3 2022-01-25 21:53:47 +07:00
XMRig
39609c9183 Merge branch 'dev' 2022-01-25 21:53:19 +07:00
xmrig
2adb7b2b74 Update CHANGELOG.md 2022-01-25 20:57:06 +07:00
SChernykh
3673137df6 Fixed armv7 compilation
Fix for error `Unsupported target. Must be either ARMv7-A+NEON or ARMv8-A.`
2022-01-25 12:37:41 +01:00
xmrig
faa7095865 Merge pull request #2893 from SChernykh/dev
KawPow OpenCL: use separate UV loop for building programs
2022-01-24 19:30:24 +07:00
SChernykh
e0701f9dad KawPow OpenCL: build next period only when it's not in cache 2022-01-24 13:28:58 +01:00
SChernykh
14aacf8636 KawPow OpenCL: use separate UV loop for building programs
Fixes #2890: uv_default_loop() can't be used there because UV loops are not thread safe.
2022-01-24 13:20:04 +01:00
xmrig
c764441337 Update CHANGELOG.md 2022-01-22 00:05:54 +07:00
xmrig
05fae12a63 Merge pull request #2882 from benthetechguy/armv7-gcc
armv7 compilation fix
2022-01-21 23:52:59 +07:00
XMRig
8059ce67f9 Add missing DNS options to config example. 2022-01-21 20:17:00 +07:00
benthetechguy
10111fd7f9 armv7 compilation fix
Compilation fails for armv7 on gcc 11, and updating the version in that line fixes it.
2022-01-20 20:47:26 -05:00
xmrig
2d25bec2df Merge pull request #2873 from SChernykh/dev
Fixed GhostRider benchmark on single-core systems
2022-01-18 10:13:16 +07:00
SChernykh
cab244d468 Fixed GhostRider benchmark on single-core systems
Fixes #2871
2022-01-17 19:59:21 +01:00
xmrig
4001488888 Merge pull request #2856 from SChernykh/dev
Fix for short responses from some Raptoreum pools
2022-01-05 10:22:39 +07:00
SChernykh
9bec1521b8 Fix for short responses from some Raptoreum pools 2022-01-04 23:54:59 +01:00
xmrig
7bde3ed5f7 Merge pull request #2848 from Spudz76/dev-addClientReconnect
Add support for client.reconnect method
2021-12-30 20:44:22 +07:00
Tony Butler
2e738509bb Add support for client.reconnect method 2021-12-30 06:26:52 -07:00
xmrig
f5447088cb Merge pull request #2837 from SChernykh/dev
RandomX: don't restart mining threads when the seed changes
2021-12-26 18:03:48 +07:00
SChernykh
7f2f50a8d9 RandomX: don't restart mining threads when the seed changes
It helps to not loose huge pages when the seed changes (every 2048 blocks, ~2.8 days).
2021-12-25 13:39:15 +01:00
xmrig
5747ccfafc Merge pull request #2827 from SChernykh/dev
GhostRider: set correct priority for helper threads
2021-12-20 18:07:23 +07:00
SChernykh
93081eb1f6 GhostRidere: set correct priority for helper threads
Fixes #2825
2021-12-20 12:05:17 +01:00
xmrig
4bf65c8669 Update README.md 2021-12-19 22:26:50 +07:00
xmrig
1a6fc3a665 Merge pull request #2815 from SChernykh/dev
Fixed cn-heavy in 32-bit builds
2021-12-16 10:44:16 +07:00
SChernykh
8dede14ac8 Fixed cn-heavy in 32-bit builds 2021-12-15 21:17:25 +00:00
xmrig
20687a397e Merge pull request #2782 from SChernykh/dev
Updated GhostRider documentation
2021-12-03 22:16:33 +07:00
SChernykh
454f97fa0f Updated GhostRider documentation
Added examples for SSL port command line that don't use #1 pool.
2021-12-03 12:30:09 +01:00
xmrig
8149fc7dcb Merge pull request #2778 from SChernykh/dev
Fixed "READY threads X/X" display after algo switching
2021-12-03 15:02:58 +07:00
SChernykh
a39ab89236 Fixed "READY threads X/X" display after algo switching 2021-12-03 07:44:23 +01:00
XMRig
5b8501fb57 v6.16.3-dev 2021-12-02 22:10:57 +07:00
XMRig
039be2ab75 Merge branch 'master' into dev 2021-12-02 22:10:23 +07:00
XMRig
718c7e0fc1 v6.16.2 2021-12-02 20:55:27 +07:00
XMRig
ef7951b91d Merge branch 'dev' 2021-12-02 20:54:50 +07:00
xmrig
214b1f021b Update CHANGELOG.md 2021-12-02 20:52:53 +07:00
XMRig
81b18c0741 #2771 Fixed environment variables support in EthStratumClient. 2021-12-02 19:36:51 +07:00
xmrig
8e83f72456 Merge pull request #2772 from SChernykh/dev
Compilation fixes
2021-12-02 19:36:09 +07:00
SChernykh
c2ae625032 Compilationn fixes 2021-12-02 13:34:24 +01:00
xmrig
60566dc84c Merge pull request #2769 from SChernykh/compiler_fix
Performance fixes
2021-12-02 10:00:29 +07:00
SChernykh
4ea8fe694d GhostRider benchmark: added 20 more possible rounds 2021-12-01 20:26:41 +01:00
SChernykh
669d1ab008 Updated changelog and GhostRider readme 2021-12-01 18:14:01 +01:00
SChernykh
e87d5111a2 Compiler fix 2021-12-01 17:08:40 +01:00
xmrig
56158779de Merge pull request #2761 from SChernykh/dev
Refactored Chrono::highResolutionMSecs()
2021-11-30 19:13:24 +07:00
SChernykh
efb322df66 Refactored Chrono::highResolutionMSecs()
Improved precision
2021-11-30 08:11:09 +01:00
xmrig
e673d541c1 Merge pull request #2751 from SChernykh/dev
VAES crash fixes
2021-11-30 09:49:37 +07:00
SChernykh
a98db529fb Explicitly use QueryPerformanceCounter() on Windows 2021-11-29 21:58:24 +01:00
SChernykh
1a9eaaad8f VAES crash fixes 2021-11-29 21:05:51 +01:00
XMRig
be5fbca9b6 v6.16.2-dev 2021-11-29 21:35:42 +07:00
XMRig
2feb264375 Merge branch 'master' into dev 2021-11-29 21:35:02 +07:00
XMRig
00990f2649 v6.16.1 2021-11-29 20:43:17 +07:00
XMRig
d78713be48 Merge branch 'dev' 2021-11-29 20:42:32 +07:00
XMRig
77367abe13 Fixed Clang build. 2021-11-29 16:01:16 +07:00
xmrig
cd046f6fd0 Merge pull request #2747 from SChernykh/dev
Disable VAES in 32-bit builds
2021-11-29 15:50:17 +07:00
SChernykh
63b7ec2887 Check compiler support for VAES 2021-11-29 09:48:15 +01:00
xmrig
a1e8f1c3e5 Merge pull request #2746 from Spudz76/dev-fixVAESCompile
Fix compile for VAES support with GCC<10
2021-11-29 15:38:35 +07:00
SChernykh
6db480a1ab Disable VAES in 32-bit builds 2021-11-29 09:32:00 +01:00
Tony Butler
a7acd9de6d Fix compile for VAES support with GCC<10 2021-11-28 22:11:42 -07:00
XMRig
a64f4d1870 v6.16.1-dev 2021-11-29 09:29:24 +07:00
XMRig
9bfe59b630 Merge branch 'master' into dev 2021-11-29 09:28:43 +07:00
xmrig
1a4bf16521 Merge pull request #2740 from SChernykh/dev
Added VAES support for Cryptonight variants
2021-11-29 09:26:45 +07:00
SChernykh
a4d5d0a75a Added VAES support for Cryptonight variants 2021-11-28 20:49:54 +01:00
xmrig
c40f1f9f66 Merge pull request #2738 from SChernykh/dev
More GhostRider fixes
2021-11-28 18:19:08 +07:00
SChernykh
15e5052dd0 More GhostRider fixes
- Fixed "difficulty is not a number" when diff is high on some pools
- Fixed GhostRider compilation when WITH_KAWPOW=OFF
2021-11-28 12:11:08 +01:00
xmrig
f9f7963453 Merge pull request #2734 from Spudz76/dev-nitpickWhitespace
Slash and burn EOL whitespace everywhere
2021-11-28 10:51:45 +07:00
Tony Butler
02240eff8c Slash and burn EOL whitespace everywhere 2021-11-27 17:59:40 -07:00
xmrig
d64c963e5e Merge pull request #2729 from SChernykh/dev
GhostRider hotfixes
2021-11-27 18:31:19 +07:00
SChernykh
c6292ce9ee GhostRider hotfixes
- Added average hashrate display
- Fixed the number of threads shown at startup
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use)
2021-11-27 12:27:26 +01:00
XMRig
cd652e2644 v6.16.0 2021-11-26 18:57:07 +07:00
XMRig
6f5ef0fe0f Merge branch 'dev' 2021-11-26 18:51:53 +07:00
xmrig
01fa968763 Update CHANGELOG.md 2021-11-26 18:50:34 +07:00
xmrig
8e6f3ad99e Merge pull request #2719 from SChernykh/dev
Added GhostRider release notes
2021-11-25 19:21:50 +07:00
SChernykh
b1f2479ec1 Added GhostRider release notes 2021-11-25 13:19:01 +01:00
XMRig
ecceba8ecd Add GhostRider support for AutoClient. 2021-11-25 17:44:36 +07:00
xmrig
cb5f4a9c17 Merge pull request #2716 from Spudz76/dev-initGR
Only initGhostRider() when job is in the family
2021-11-25 09:09:16 +07:00
Tony Butler
3a8ebfdcb6 Only initGhostRider() when job is in the family 2021-11-24 13:04:03 -07:00
xmrig
0dcafeb571 Merge pull request #2715 from SChernykh/dev
Benchmark support for GhostRider (offline only)
2021-11-24 22:05:44 +07:00
SChernykh
a1d7ee4c6b Benchmark support for GhostRider (offline only)
Command line:
```
./xmrig --bench=250K -a gr --rotation 15
```
Where `rotation` is an integer between 0 and 19 (inclusive).
2021-11-24 15:54:09 +01:00
XMRig
03e70ba2ed v6.16.0-dev 2021-11-24 19:49:21 +07:00
xmrig
19ef8c5d65 Merge pull request #2714 from SChernykh/gh3
GhostRider: fixed invalid hashes on ARMv8
2021-11-24 19:40:48 +07:00
SChernykh
63baa9e263 GhostRider: fixed invalid hashes on ARMv8 2021-11-24 13:39:55 +01:00
xmrig
1248bd5859 Merge pull request #2713 from SChernykh/gh3
Optimized quad hash for Ryzens
2021-11-24 18:21:01 +07:00
SChernykh
5c951ddb8a Optimized quad hash for Ryzens 2021-11-24 08:16:41 +01:00
xmrig
4ab0ad928d Merge pull request #2712 from SChernykh/gh3
GhostRider algorithm (Raptoreum) support
2021-11-24 09:49:34 +07:00
SChernykh
e67eb47796 Faster quad hash for GhostRider algos (Ryzen CPUs) 2021-11-23 22:14:46 +01:00
SChernykh
a6656a8c49 Fixed broken difficulty adjustment on some Raptoreum pools 2021-11-23 18:02:58 +01:00
SChernykh
a903d0a5bd Fixed compilation error 2021-11-23 08:52:30 +01:00
SChernykh
ceaebfd877 GhostRider algorithm (Raptoreum) support 2021-11-23 08:14:01 +01:00
xmrig
5156ff11a8 Merge pull request #2684 from SChernykh/fix-183
MSR mod: fix for error 183
2021-11-11 17:50:08 +07:00
xmrig
e0143a92a8 Merge pull request #2682 from SChernykh/dev
Fix: use cn-heavy optimization only for Vermeer CPUs
2021-11-11 17:49:51 +07:00
SChernykh
f682d9a2e9 MSR mod: fix for error 183
When WinRing0 driver starts, but some other version already created "\\.\WinRing0_1_2_0", it returns error 183 ERROR_ALREADY_EXISTS - "Cannot create a file when that file already exists."
2021-11-11 10:26:38 +01:00
SChernykh
3bece0ff40 Fix: use cn-heavy optimization only for Vermeer CPUs
Fixes #2680
2021-11-11 07:57:05 +01:00
XMRig
e6c456a970 v6.15.4-dev 2021-11-02 18:26:44 +07:00
XMRig
923d1d712f Merge branch 'master' into dev 2021-11-02 18:26:12 +07:00
XMRig
ae8459bd35 v6.15.3 2021-11-01 19:59:05 +07:00
XMRig
3a7be07c62 Merge branch 'dev' 2021-11-01 19:58:30 +07:00
xmrig
e1cc0000c6 Update CHANGELOG.md 2021-11-01 12:27:10 +07:00
xmrig
1210e8e95c Merge pull request #2644 from Spudz76/dev-fixMemleaks
Patch a couple minor leaks
2021-10-25 20:33:10 +07:00
xmrig
a45fbd9cae Merge pull request #2646 from SChernykh/dev
Fix MSVC compilation error
2021-10-25 20:31:12 +07:00
Tony Butler
f6d45f7990 Fix various memory leaks 2021-10-25 04:06:49 -06:00
SChernykh
b9464f993b Fix MSVC compilation error 2021-10-25 10:26:44 +02:00
xmrig
f8f73b0cd7 Merge pull request #2641 from SChernykh/dev
AstroBWT: fixed rare incorrect hashes
2021-10-20 07:24:15 +07:00
SChernykh
df6ab2edd8 AstroBWT: fixed rare incorrect hashes 2021-10-19 19:08:56 +02:00
xmrig
8bf7600154 Merge pull request #2639 from SChernykh/dev
AstroBWT even bigger speedup (up to +35%)
2021-10-19 22:50:07 +07:00
SChernykh
a30501956f AstroBWT even bigger speedup 2021-10-19 17:37:45 +02:00
xmrig
c287a40a20 Merge pull request #2636 from SChernykh/dev
AstroBWT speedup (up to +7%)
2021-10-19 07:58:24 +07:00
SChernykh
04f50c24e2 AstroBWT speedup 2021-10-18 18:05:51 +02:00
xmrig
7627b23212 Merge pull request #2614 from Spudz76/dev-fixAppleOpenCL
OpenCL fixes for non-AMD platforms
2021-10-13 06:20:53 +07:00
XMRig
e90e7febfb Merge branch 'StriderDM-merge_mining_tag_fix' into dev 2021-10-13 05:43:27 +07:00
XMRig
733b85a132 Code cleanup. 2021-10-13 05:43:05 +07:00
XMRig
35ba786e63 Merge branch 'merge_mining_tag_fix' of https://github.com/StriderDM/xmrig into StriderDM-merge_mining_tag_fix 2021-10-13 05:33:34 +07:00
David Main
446810a837 fix: expand validation of tx_extra for merge mining tag 2021-10-12 11:17:37 +02:00
Tony Butler
c6a68c3e51 Cap max threads to 4096 with nVidia OpenCL 2021-10-11 04:17:01 -06:00
Tony Butler
ca8bef3ade Adjust API version logic 2021-10-11 04:17:01 -06:00
Tony Butler
d735caa334 Adjust definitions and replace literal 0x4038 2021-10-11 04:17:01 -06:00
Tony Butler
eb54cc0e0f Revert amd_bitalign/amd_bfe polyfills 2021-10-11 04:17:01 -06:00
Tony Butler
84c67c37cd Apply "no-static-without-amd" fixes 2021-10-11 04:17:01 -06:00
Tony Butler
b44f38a362 Attempt repair of cn/r output-array access problem 2021-10-11 04:17:01 -06:00
Tony Butler
8ed4088d0a Second try at fixing cn/r atomic_inc() call 2021-10-11 04:17:01 -06:00
Tony Butler
cdcea2a4f9 Attempt fix for cn/r on Apple-AMD 2021-10-11 04:17:01 -06:00
Tony Butler
f0d80326ec Add Ellesmere correctly (still just a Polaris alias) 2021-10-11 04:17:01 -06:00
Tony Butler
cb8fc26cbe Add every Apple AMD GPU type 2021-10-11 04:17:01 -06:00
Tony Butler
5ec5b5ed00 Possibly fix problem with clGetProgramInfo crash 2021-10-11 04:17:01 -06:00
Tony Butler
67e29c1af1 Readjust OclDevice logic and add OCL_VENDOR_APPLE 2021-10-11 04:17:01 -06:00
xmrig
4bd94a79a4 Merge pull request #2623 from Spudz76/dev-fixWithoutKawpow
Fix #2583 compiling without kawpow (string ref is nonexistent then)
2021-10-11 16:41:44 +07:00
Tony Butler
80e597d951 Fix #2583 compiling without kawpow (string ref is nonexistent then) 2021-10-11 03:31:28 -06:00
XMRig
2e269f5b8c v6.15.3-dev 2021-10-06 02:01:29 +07:00
XMRig
57b8e35903 Merge branch 'master' into dev 2021-10-06 02:00:49 +07:00
XMRig
53be5765e6 v6.15.2 2021-10-05 23:28:29 +07:00
XMRig
68741c925b Merge branch 'dev' 2021-10-05 23:28:06 +07:00
xmrig
9ce207e667 Update CHANGELOG.md 2021-10-05 22:24:58 +07:00
XMRig
07e0966517 Added "--versions" alias. 2021-10-05 21:49:03 +07:00
XMRig
a9d4c2a923 Removed uv_os_gethostname call for all OS. 2021-09-28 23:56:33 +07:00
xmrig
dc02e1feaa Merge pull request #2606 from SChernykh/dev
Fix: AstroBWT auto-config ignored max-threads-hint
2021-09-26 18:51:47 +07:00
SChernykh
7daff331dc Fix: AstroBWT auto-config ignored max-threads-hint 2021-09-26 12:22:58 +02:00
XMRig
058a2fb0f4 v6.15.2-dev 2021-09-22 19:13:07 +07:00
XMRig
4fff3b946e Merge branch 'master' into dev 2021-09-22 19:12:38 +07:00
XMRig
f7aa5e781b v6.15.1 2021-09-22 13:08:00 +07:00
XMRig
298c5cccfa Merge branch 'dev' 2021-09-22 13:05:36 +07:00
xmrig
2985571620 Update CHANGELOG.md 2021-09-21 18:59:47 +07:00
xmrig
279d29cd7f Merge pull request #2594 from SChernykh/dev
Added Windows taskbar icon colors
2021-09-20 23:07:00 +07:00
SChernykh
387320ad6d Added Windows taskbar icon colors
- Red when there's no connection to any pool
- Yellow when mining is paused
- No color during normal mining
2021-09-20 18:03:22 +02:00
XMRig
76cd83edb2 Merge branch 'Spudz76-dev-fixAsteriskProfiling' into dev 2021-09-20 20:56:11 +07:00
XMRig
7f4d667351 Remove unnecessary string. 2021-09-20 20:53:36 +07:00
Tony Butler
8027716264 Fix --threads generates "*" profile without "kawpow":false to negate it. 2021-09-20 06:49:17 -06:00
xmrig
a459dd7741 Merge pull request #2591 from Spudz76/dev-fixCompileNoRX
Fix compile warning/crash when WITH_RANDOMX=OFF
2021-09-20 10:50:00 +07:00
Tony Butler
ef6011ac12 Fix compile warning when WITH_RANDOMX=OFF 2021-09-19 18:12:46 -06:00
xmrig
6d66051d92 Merge pull request #2586 from SChernykh/dev
Fixed Windows 7 compatibility
2021-09-17 17:11:09 +07:00
SChernykh
b2cc2ef0d7 Fixed Windows 7 compatibility
Fixes #2585
2021-09-17 12:05:37 +02:00
xmrig
9805320517 Merge pull request #2582 from Spudz76/dev-fixupRXnaming
Fixup RandomX naming consistency
2021-09-17 08:03:03 +07:00
Tony Butler
582d17bb84 Fixup RandomX naming consistency 2021-09-16 08:24:37 -06:00
XMRig
9e5f5b35a6 v6.15.1-dev 2021-08-31 18:57:08 +07:00
XMRig
9a9c69ff50 Merge branch 'master' into dev 2021-08-31 18:56:31 +07:00
XMRig
5c1f3f395c v6.15.0 2021-08-31 14:42:43 +07:00
XMRig
23cefffe43 Merge branch 'dev' 2021-08-31 14:41:47 +07:00
XMRig
d048d5a639 Fixed class/struct inconsistency. 2021-08-31 03:32:36 +07:00
xmrig
9a6f773dea Update CHANGELOG.md 2021-08-29 20:19:41 +07:00
XMRig
cd7c7902a9 Fixed clang build. 2021-08-29 18:52:11 +07:00
xmrig
fd3dad920d Merge pull request #2565 from SChernykh/dev
AstroBWT: add AVX2 Salsa20 implementation
2021-08-29 15:42:00 +07:00
SChernykh
3dc192f63e AstroBWT: add AVX2 Salsa20 implementation
+4.5% speedup on Ryzen 5 5600X
2021-08-29 10:35:43 +02:00
XMRig
123c7ab140 Added support for new CUDA plugin API. 2021-08-29 14:22:19 +07:00
XMRig
838996a0fc v6.15.0-dev 2021-08-28 19:53:28 +07:00
XMRig
6e4fea34a4 #2555 Update deps. 2021-08-28 13:10:48 +07:00
XMRig
b52c289931 Increase RANDOMX_PROGRAM_MAX_SIZE 2021-08-28 12:32:57 +07:00
XMRig
4dbb5b89da Update hwloc for MSVC. 2021-08-28 12:16:41 +07:00
XMRig
84d0212e79 Merge branch 'pr2563' into dev 2021-08-28 11:54:11 +07:00
XMRig
35acb3f00b Merge branch 'GraftRandomX' of https://github.com/Stardock2018/xmrig into pr2563 2021-08-28 11:50:17 +07:00
Chris
7f2771b466 Fixed Algorithm id
Algorithm id should be 0x72151267, second and third byte encode L3 and L2 size.
0x72 = 'r'
0x15 = 1 << 0x15 (L3 size)
0x12 = 1 << 0x12 (L2 size)
0x67 = 'g'
2021-08-27 10:31:36 -06:00
Chris
5fdf5516ff Added Graft RandonX 2021-08-27 08:19:54 -06:00
XMRig
234de96784 Update rapidjson. 2021-08-27 18:51:59 +07:00
XMRig
df4532d9a1 Cleanup ARM code. 2021-08-27 12:36:08 +07:00
XMRig
c27f535768 Fixed build on Linux. 2021-08-25 18:52:54 +07:00
XMRig
c7ac314110 Code cleanup based on Clang-Tidy. 2021-08-25 18:45:15 +07:00
XMRig
3215403815 Add missing files. 2021-08-23 18:43:14 +07:00
XMRig
bea2a6cf5b Update BlockTemplate class. 2021-08-23 18:32:58 +07:00
xmrig
a28f411339 Merge pull request #2548 from xmrig/feature-auto-coin
Added automatic coin detection for daemon mining
2021-08-19 15:35:37 +07:00
XMRig
460d9c75c5 Add global wallet address parser for DaemonClient. 2021-08-18 13:36:50 +07:00
XMRig
d1033abbe5 Update Coin, BlobReader and WalletAddress. 2021-08-17 08:17:21 +07:00
XMRig
9eac9dd30a v6.14.2-dev 2021-08-15 02:12:33 +07:00
XMRig
8d7b6adf98 Merge branch 'master' into dev 2021-08-15 02:11:29 +07:00
XMRig
230ff87634 v6.14.1 2021-08-15 00:42:47 +07:00
XMRig
19adf2630a Merge branch 'dev' 2021-08-15 00:42:12 +07:00
xmrig
3de4b16117 Update CHANGELOG.md 2021-08-15 00:37:23 +07:00
XMRig
602e3a7587 Fix algorithms order. 2021-08-14 05:48:37 +07:00
XMRig
4f6ffb67c1 Cleanup. 2021-08-14 04:58:01 +07:00
xmrig
a0194ddd18 Merge pull request #2537 from SChernykh/dev
Fixed Termux build
2021-08-13 21:14:35 +07:00
SChernykh
30f7e876a2 Update CnHash.cpp 2021-08-13 16:03:15 +02:00
SChernykh
5958490c23 Fixed Termux build 2021-08-13 12:02:03 +02:00
XMRig
f92ad4423d Fix Job::getNumTransactions. 2021-08-12 22:30:47 +07:00
XMRig
e0749a82c2 Fix cn-pico name. 2021-08-12 17:52:52 +07:00
xmrig
440aa003af Merge pull request #2532 from xmrig/feature-stable-algo-id
Refactoring: Stable (persistent) algorithms IDs.
2021-08-12 02:03:39 +07:00
XMRig
9580f5395f Removed shortName. 2021-08-11 22:26:34 +07:00
XMRig
e9ae4deb91 Removed duplicate strings. 2021-08-11 22:07:43 +07:00
XMRig
aee0762424 Fix typo. 2021-08-11 16:27:36 +07:00
XMRig
e6332eff2b Implemented stable algorithm ids. 2021-08-11 03:46:34 +07:00
XMRig
d0a632f557 Optimize CnHash storage. 2021-08-10 14:54:35 +07:00
XMRig
f4cdc527b0 #2527 Fix narrowing conversion. 2021-08-10 01:40:36 +07:00
XMRig
661dc515ab namespace cleanup. 2021-08-09 23:51:07 +07:00
XMRig
6d9bafe068 v6.14.1-dev 2021-08-09 17:20:15 +07:00
XMRig
202c8aaee8 Merge branch 'master' into dev 2021-08-09 17:19:48 +07:00
XMRig
410084384e v6.14.0 2021-08-09 16:09:15 +07:00
XMRig
43e98c509a Merge branch 'dev' 2021-08-09 16:08:20 +07:00
XMRig
08d79ddcdc v6.14.0-dev 2021-08-08 19:36:54 +07:00
xmrig
0fdf063760 Merge pull request #2512 from SChernykh/dev
Show the number of transactions in pool job
2021-08-08 00:52:06 +07:00
SChernykh
929205536c Show the number of transactions in pool job
Useful to check if pool/proxy is working properly and can also be used to compare different pools.
2021-08-07 19:38:31 +02:00
XMRig
d24581c963 #2492 Add missing --huge-pages-jit command line option. 2021-07-24 12:27:48 +07:00
xmrig
2eb2e90631 Merge pull request #2484 from SChernykh/zmq
ZeroMQ support for solo mining
2021-07-17 17:14:42 +07:00
SChernykh
0842e6b9d2 ZeroMQ support for solo mining
Gets new blocks from daemon immediately without polling, saving ~0.5 seconds on average when daemon gets new block from the network. Also saves some CPU cycles because it doesn't need to poll daemon every second.

Testing: add "daemon-zmq-port": 28083 to xmrig's pool config in config.json and run ./monerod --testnet --zmq-pub tcp://127.0.0.1:28083
2021-07-15 11:13:14 +02:00
XMRig
93805cd167 #2476 Fixed crash in DMI memory reader. 2021-07-06 23:07:31 +07:00
xmrig
755fe28bc3 Merge pull request #2472 from SChernykh/dev
Updates from xmrig-proxy
2021-07-05 19:13:14 +07:00
SChernykh
59d780169f Merge branch 'dev' of https://github.com/SChernykh/xmrig into dev 2021-07-05 13:57:16 +02:00
SChernykh
a30ede04f3 Updates from xmrig-proxy 2021-07-05 13:56:37 +02:00
XMRig
3f2dfa4279 Sync with proxy. 2021-07-05 02:31:29 +07:00
XMRig
7177b42903 v6.13.2-dev 2021-07-03 16:00:30 +07:00
XMRig
21638c2f58 Merge branch 'master' into dev 2021-07-03 16:00:05 +07:00
XMRig
02b2b87bb6 v6.13.1 2021-07-03 15:29:49 +07:00
XMRig
c8a9dba8fd Merge branch 'dev' 2021-07-03 15:29:14 +07:00
xmrig
9a77d39a3f Update CHANGELOG.md 2021-07-03 15:16:47 +07:00
xmrig
28a1d0fe1e Merge pull request #2468 from SChernykh/dev
Fix: don't send miner signature during regular mining
2021-07-02 00:37:04 +07:00
XMRig
0243789c04 v6.13.1-dev 2021-07-02 00:36:32 +07:00
XMRig
45dd58f808 Merge branch 'master' into dev 2021-07-02 00:35:52 +07:00
SChernykh
1b4abe1e98 Fix: don't send miner signature during regular mining 2021-07-01 19:31:55 +02:00
XMRig
9f778742a6 v6.13.0 2021-07-01 20:03:52 +07:00
XMRig
015f8aeed4 Merge branch 'dev' 2021-07-01 20:02:38 +07:00
xmrig
9e6311a7e0 Update CHANGELOG.md 2021-07-01 13:54:09 +07:00
XMRig
0af9d2e75b v6.13.0-dev 2021-06-28 19:02:48 +07:00
xmrig
6e2a84a46c Merge pull request #2445 from SChernykh/miner_signature
Support for solo mining with miner signatures (Wownero)
2021-06-28 18:11:13 +07:00
SChernykh
6bb8913066 Correct handling of block submit responses for Dero 2021-06-24 18:06:07 +02:00
SChernykh
cf104ebdc5 Update signing algorithm 2021-06-21 23:49:13 +02:00
XMRig
ecba750442 Add token support for online benchmark. 2021-06-20 09:28:39 +07:00
SChernykh
3967badc55 Added profiling 2021-06-19 16:19:16 +02:00
SChernykh
3f3f9b0661 Fixed GCC warnings 2021-06-19 14:54:03 +02:00
SChernykh
e3fc78a66c Fix Wownero hardfork version 2021-06-18 13:52:24 +02:00
SChernykh
e6d833c227 Proxy miner signature support 2021-06-17 22:48:08 +02:00
SChernykh
ebe299902c Proxy miner signature support (WIP) 2021-06-17 16:58:18 +02:00
SChernykh
bc63b63a2a More sanity checks 2021-06-17 13:18:34 +02:00
SChernykh
e739e7d704 More error handling in DaemonClient::parseJob 2021-06-17 10:39:22 +02:00
SChernykh
1bae083587 Fixed CalculateMerkleTreeHash 2021-06-17 10:26:17 +02:00
SChernykh
88959bd703 BlockTemplate: miner tx and root hash updating 2021-06-16 23:44:05 +02:00
SChernykh
93e689d601 Fix buffer size in generateMinerSignature 2021-06-16 18:20:11 +02:00
SChernykh
a136790bee Added support for solo mining with miner signatures (Wownero) 2021-06-16 18:07:36 +02:00
SChernykh
29f2dd4b9e Cleanup 2021-06-16 11:47:17 +02:00
SChernykh
3003c067d3 Fixed random32_unbiased 2021-06-16 00:19:49 +02:00
SChernykh
89bc6418b1 Secret key derivation 2021-06-16 00:10:34 +02:00
SChernykh
8458b4ee39 Added signature functions 2021-06-15 15:51:29 +02:00
SChernykh
7bfb801ce2 Cryptonote tools WIP 2021-06-15 00:28:32 +02:00
xmrig
4567499905 Merge pull request #2433 from candrews/patch-2
Fix shellcheck warnings in randomx_boost.sh
2021-06-09 23:41:58 +07:00
Craig Andrews
9b63955b09 Fix shellcheck warnings in randomx_boost.sh
Checked using www.shellcheck.net

Specific issues addessed:
* https://github.com/koalaman/shellcheck/wiki/SC2002
* Use POSIX instead of bash for wider compatibility
* Fail on error
2021-06-08 21:56:02 -04:00
XMRig
0414511de0 v6.12.3-dev 2021-05-31 13:51:41 +07:00
XMRig
b61dad128c Merge branch 'master' into dev 2021-05-31 13:49:35 +07:00
XMRig
80ae339343 v6.12.2 2021-05-31 12:58:30 +07:00
XMRig
4d87555398 Merge branch 'dev' 2021-05-31 12:57:55 +07:00
xmrig
bef82c5de6 Update CHANGELOG.md 2021-05-30 21:28:28 +07:00
xmrig
b069ad5dd1 Merge pull request #2358 from zzjzxq33/patch-1
Update openssl version to 1.1.1k
2021-05-30 17:53:53 +07:00
xmrig
f6a0646271 Merge pull request #2401 from SChernykh/dev
RandomX: fix broken light mode mining
2021-05-22 18:54:29 +07:00
SChernykh
b5f1a1feae RandomX: fix broken light mode mining
It broke after #2395
2021-05-22 13:49:22 +02:00
XMRig
1ce059da1c Add "argon2/ninja" algorithm alias. 2021-05-22 15:10:50 +07:00
xmrig
2929451ee1 Merge pull request #2398 from SChernykh/dev
RandomX ARMv8: optimized dataset read
2021-05-21 09:58:54 +07:00
SChernykh
94fecb5e92 RandomX ARMv8: optimized dataset read
Break dependency from readReg2 and readReg3. It should run faster on superscalar and out-of-order CPUs i.e. Apple M1.
2021-05-20 21:24:28 +02:00
xmrig
3bfa5ea038 Merge pull request #2395 from SChernykh/dev
RandomX: rewrote dataset read code
2021-05-20 18:58:48 +07:00
SChernykh
ff82ca57f2 RandomX: rewrote dataset read code
Unified code for AMD and Intel
1% faster on Intel
0.15% faster on AMD Ryzen
2021-05-20 12:45:42 +02:00
xmrig
7f7b1fb073 Merge pull request #2393 from SChernykh/dev
RandomX: added BMI2 version for scratchpad prefetch
2021-05-19 22:54:58 +07:00
SChernykh
d443dd86f1 RandomX: added BMI2 version for scratchpad prefetch
Saves 1 instruction and 1 byte in the main loop.
2021-05-19 17:52:16 +02:00
xmrig
3ac8f6b23a Merge pull request #2386 from SChernykh/dev
Enabled IMUL_RCP optimization for light mode mining
2021-05-17 16:36:23 +07:00
SChernykh
9b1f020a8b Enabled IMUL_RCP optimization for light mode mining
Better fix for #2377
2021-05-17 11:26:40 +02:00
XMRig
8bf88a4e74 Merge branch 'Spudz76-dev-fixCLKawPowPlatformHandling' into dev 2021-05-16 10:10:33 +07:00
XMRig
08a2c143f5 Regenerate OpenCL headers. 2021-05-16 10:09:29 +07:00
Tony Butler
4eb9a1aad5 Fix CL code for KawPow where it assumes everything is AMD 2021-05-15 20:34:57 -06:00
xmrig
c8c40586a1 Merge pull request #2378 from SChernykh/dev
Fixed broken light mode mining on x86
2021-05-16 07:03:57 +07:00
SChernykh
29cb416107 Fixed broken light mode mining on x86 2021-05-15 21:41:39 +02:00
xmrig
465169ff12 Merge pull request #2375 from Spudz76/dev-fixMacOSCudaLoader
Fixup MacOS CUDA backend default loader name
2021-05-14 18:48:09 +07:00
Tony Butler
df2bcd8192 Fixup MacOS CUDA backend default loader name 2021-05-14 05:28:31 -06:00
zzjzxq33
d89bb56964 Update openssl version to 1.1.1k 2021-05-09 11:11:46 +08:00
XMRig
87a0864e3b ...and --cpu-affinity. 2021-05-08 04:36:09 +07:00
XMRig
ecf5579f36 #2351 Fixed help output for --cpu-priority option. 2021-05-08 04:34:22 +07:00
xmrig
d5523d819f Merge pull request #2341 from SChernykh/dev
Update sse2neon.h
2021-05-03 23:17:12 +07:00
SChernykh
dbda2e9ccd Update sse2neon.h 2021-05-03 18:08:59 +02:00
xmrig
8babd7bc0a Merge pull request #2340 from SChernykh/dev
Fix AES detection on FreeBSD on ARM
2021-05-03 19:06:49 +07:00
SChernykh
27ced139a6 Fix AES detection on FreeBSD on ARM 2021-05-03 09:57:43 +02:00
xmrig
b46849e813 Merge pull request #2322 from SChernykh/dev
Update randomx_boost.sh
2021-04-28 19:12:37 +07:00
SChernykh
a96a6108ff Update randomx_boost.sh
- Support builtin MSR, see #2283
- Added detection of AMD EPYC CPUs
2021-04-28 14:10:30 +02:00
xmrig
c50c78b700 Merge pull request #2312 from SChernykh/dev
Add  missing allow_writes=on to randomx_boost.sh
2021-04-25 20:46:07 +07:00
SChernykh
cd7ab2c79f Add missing allow_writes=on to randomx_boost.sh 2021-04-25 15:31:30 +02:00
XMRig
695fbc013b #2280 Disable GPU backends in benchmark mode. 2021-04-25 15:28:45 +07:00
XMRig
a403c53543 Merge branch 'jsonboss-patch-1' into dev 2021-04-24 23:22:56 +07:00
XMRig
e26fbc96e9 Removed unnecessary system call. 2021-04-24 23:22:10 +07:00
XMRig
259c165e60 Merge branch 'patch-1' of https://github.com/jsonboss/xmrig into jsonboss-patch-1 2021-04-24 22:14:59 +07:00
XMRig
7897bf02dc v6.12.2-dev 2021-04-24 01:53:07 +07:00
XMRig
05f62c5ccc Merge branch 'master' into dev 2021-04-24 01:52:37 +07:00
XMRig
d82e100e30 v6.12.1 2021-04-23 19:43:12 +07:00
XMRig
5f869a414c Merge branch 'dev' 2021-04-23 19:42:29 +07:00
xmrig
7fd6be7d83 Update CHANGELOG.md 2021-04-23 18:54:42 +07:00
xmrig
ae6c536e98 Merge pull request #2296 from SChernykh/dev
Fixed Zen3 asm for cn/upx2
2021-04-21 19:52:52 +07:00
XMRig
c66c593123 v6.12.1-dev 2021-04-21 19:51:03 +07:00
XMRig
b3788b2ba3 Merge branch 'master' into dev 2021-04-21 19:49:54 +07:00
SChernykh
b7adb34c37 Fixed Zen3 asm for cn/upx2
- Invalid rounding mode was used which caused rejected shares sometimes
- Also optimized CN implode/explode functions a bit.
2021-04-21 13:22:25 +02:00
XMRig
ace8409a56 v6.12.0 2021-04-20 20:55:58 +07:00
XMRig
e2c757d9dd Merge branch 'dev' 2021-04-20 20:55:35 +07:00
xmrig
da35de993f Update CHANGELOG.md 2021-04-19 23:20:10 +07:00
xmrig
854b7618ef Merge pull request #2289 from SChernykh/dev
RandomX: optimized IMUL_RCP instruction
2021-04-19 22:54:02 +07:00
SChernykh
3477f9fbc1 RandomX: optimized IMUL_RCP instruction
+0.4% on AMD Zen2
+0.3% on AMD Zen3
+0.1% on Intel SandyBridge
+0.3% on rx/wow on Intel SandyBridge
2021-04-19 17:43:58 +02:00
xmrig
5799744f2f Update CHANGELOG.md 2021-04-19 20:56:45 +07:00
xmrig
61d165a314 Merge pull request #2287 from SChernykh/dev
Fixed rounding mode after running cn/upx
2021-04-19 18:06:16 +07:00
SChernykh
69186f2470 Optimized cn/upx for Zen3
0.9% faster
2021-04-19 12:29:44 +02:00
SChernykh
730d4a6cee Fix dvision by zero check in percent() 2021-04-19 12:05:07 +02:00
SChernykh
54bc91d5e3 Fixed rounding mode after running cn/upx 2021-04-19 12:02:57 +02:00
jsonboss
2012ffb231 support builtin msr 2021-04-19 10:38:27 +08:00
XMRig
5f9e0ebc6c v6.12.0-dev 2021-04-18 20:12:03 +07:00
xmrig
f314c69a70 Merge pull request #2278 from SChernykh/dev
Optimized cn/upx2
2021-04-17 23:41:26 +07:00
SChernykh
16fe462cad Optimized cn/upx2 for Ryzen CPUs 2021-04-17 18:18:26 +02:00
xmrig
e6e2987ddf Merge pull request #2276 from SChernykh/dev
Added support for Uplexa (cn/upx2 algorithm)
2021-04-17 20:10:54 +07:00
SChernykh
ed456b02cf Update CnHash.cpp 2021-04-17 15:06:31 +02:00
SChernykh
da7f5826cb Added support for Uplexa (cn/upx2 algorithm) 2021-04-17 14:53:42 +02:00
XMRig
6cb398bb42 Merge branch 'dev' of github.com:xmrig/xmrig into dev 2021-04-14 23:44:42 +07:00
XMRig
748be760e8 Added support for --user command line option for the benchmark. 2021-04-14 23:43:31 +07:00
xmrig
4a4118bb8e Merge pull request #2261 from SChernykh/dev
Show total hashrate if compiled without OpenCL
2021-04-13 19:06:42 +07:00
SChernykh
77f1bf0861 Show total hashrate if compiled without OpenCL 2021-04-13 14:02:29 +02:00
XMRig
6bb29b3e7b v6.11.3-dev 2021-04-11 21:13:39 +07:00
XMRig
f720772338 Merge branch 'master' into dev 2021-04-11 21:13:08 +07:00
XMRig
e53e48b88c v6.11.2 2021-04-11 17:24:46 +07:00
XMRig
ecf36ee891 Merge branch 'dev' 2021-04-11 17:24:23 +07:00
xmrig
23ef949dd3 Update CHANGELOG.md 2021-04-11 11:45:07 +07:00
XMRig
92e708c6e7 Update llhttp to v5.1.0 2021-04-10 21:23:32 +07:00
XMRig
30cfcc27db #2207 Fixed regression in HTTP parser. 2021-04-10 21:02:59 +07:00
XMRig
3c6077fb02 v6.11.2-dev 2021-04-08 00:33:01 +07:00
XMRig
63883b4fa7 Merge branch 'master' into dev 2021-04-08 00:32:21 +07:00
XMRig
0f83b5e06c v6.11.1 2021-04-07 10:34:37 +07:00
XMRig
637a333197 Merge branch 'dev' 2021-04-07 10:33:48 +07:00
xmrig
3171b06048 Update CHANGELOG.md 2021-04-07 10:32:17 +07:00
xmrig
2a66a0fa2f Merge pull request #2239 from SChernykh/dev
Fixed broken "coin" setting functionality
2021-04-07 10:30:28 +07:00
SChernykh
c080d5b962 Fixed broken "coin" setting functionality 2021-04-06 23:02:10 +02:00
XMRig
0133107f14 v6.11.0 2021-04-06 21:11:44 +07:00
XMRig
253e349ef9 Merge branch 'dev' 2021-04-06 21:11:13 +07:00
xmrig
5126cc1414 Update CHANGELOG.md 2021-04-06 15:48:18 +07:00
XMRig
ea1245026d #2234 Use const_cast. 2021-04-06 12:07:06 +07:00
xmrig
2158adb711 Merge pull request #2234 from esrrhs/dev
fix build error on gcc 4.8.5
2021-04-06 12:00:36 +07:00
xmrig
8554bb4d9c Merge pull request #2235 from SChernykh/dev
Fixed cn-heavy for GCC-8
2021-04-04 18:09:09 +07:00
SChernykh
1741354498 Fixed cn-heavy for GCC-8 2021-04-04 10:18:27 +02:00
esrrhs
866e97efcf fix build error on gcc 9.3.0
FileLogWriter.h:34:41: error: array used as initializer
2021-04-04 12:42:14 +08:00
xmrig
277352d072 Merge pull request #2233 from SChernykh/dev
Fixed compilation for ARM
2021-04-03 23:03:05 +07:00
SChernykh
8cae605e1f Update randomx.cmake 2021-04-03 17:59:28 +02:00
SChernykh
59c85eaf6a Fixed compilation for ARM 2021-04-03 17:50:52 +02:00
xmrig
864233c110 Merge pull request #2228 from esrrhs/dev
remove useless v4_random_math_init if algo is not cn/r
2021-04-02 15:49:53 +07:00
xmrig
e9b32b3009 Merge pull request #2229 from SChernykh/dev
Don't use RandomX JIT if WITH_ASM=OFF
2021-04-02 15:47:51 +07:00
SChernykh
ec608bbd05 Don't use RandomX JIT if WITH_ASM=OFF
Because RandomX JIT use asm code
2021-04-02 10:05:46 +02:00
esrrhs
ec2793bcc9 remove useless v4_random_math_init if algo is not cn/r 2021-04-02 14:59:09 +08:00
xmrig
eb40f07552 Merge pull request #2225 from gentoo-monero/fix-2224
Add missing include
2021-04-01 17:27:53 +07:00
Matthew Smith
28f268aeba Add missing include
memory header ends up not being included when built without OpenCL
support.

Closes: https://github.com/xmrig/xmrig/issues/2224
2021-04-01 11:01:55 +01:00
XMRig
bad5458d40 Merge branch 'pr2217' into dev 2021-03-29 18:17:33 +07:00
XMRig
b72e21fc3c Merge branch 'master' of https://github.com/esrrhs/xmrig into pr2217 2021-03-29 18:16:45 +07:00
esrrhs
d578a3828f setBlob should run after setAlgorithm 2021-03-29 12:11:03 +08:00
xmrig
6c417eb9af Merge pull request #2216 from SChernykh/dev
Optimize cn-heavy in GCC builds
2021-03-28 21:13:45 +07:00
SChernykh
dc70893e6b Optimize cn-heavy in GCC builds
+0.7% in GCC builds, but GCC is still slower than MSVC on cn-heavy.
2021-03-28 16:12:09 +02:00
xmrig
c5c958743e Merge pull request #2214 from SChernykh/cn-heavy-opt
Optimized cn-heavy
2021-03-28 09:56:22 +07:00
xmrig
89f2fa6818 Merge pull request #2213 from SChernykh/dev
Fixed use-after-free bug when exiting
2021-03-28 09:55:50 +07:00
SChernykh
bcfd9edaa5 Optimized cn-heavy
- Remove unnecessary type conversion when doing `idx0 = d ^ q;`
- Saves 1 CPU cycle in the main loop
- 0.2% speedup on Ryzen 5 5600X, results on other CPUs may vary
2021-03-27 22:21:01 +01:00
SChernykh
e0f774d6dd Fixed use-after-free bug when exiting 2021-03-27 21:53:40 +01:00
XMRig
955cc366d1 v6.11.0-dev 2021-03-20 13:42:46 +07:00
xmrig
bc4f6249be Merge pull request #2196 from xmrig/feature-dns2
Improved DNS subsystem
2021-03-20 12:50:53 +07:00
XMRig
0d45600b0e Added command line options --dns-ipv6 and --dns-ttl. 2021-03-20 11:12:09 +07:00
XMRig
2c8f7f692c Added DNS config. 2021-03-20 00:09:59 +07:00
XMRig
3e41bdc552 New DNS implementation. 2021-03-16 22:24:37 +07:00
XMRig
5b189696d7 Added DnsRecords class. 2021-03-14 09:44:56 +07:00
XMRig
c6bcea3811 Improved DnsRecord class. 2021-03-13 20:30:52 +07:00
xmrig
900dd13c45 Merge pull request #2177 from SChernykh/dev
Fix `vld1q_u8_x4` compilation error with GCC 10.2
2021-03-13 08:30:44 +07:00
SChernykh
2876f17f65 Fix vld1q_u8_x4 compilation error with GCC 10.2 2021-03-12 16:26:02 +01:00
xmrig
b2563ca8a6 Merge pull request #2172 from bisand/patch-1
Added reference to limits.h in AdlLib_linux.cpp
2021-03-11 18:07:23 +07:00
André Biseth
7c0d60ac68 Added reference to limits.h in AdlLib_linux.cpp
Suggested solution to bug https://github.com/xmrig/xmrig/issues/2171
2021-03-11 11:50:05 +01:00
xmrig
813a1885cb Merge pull request #2169 from SChernykh/dev
Fix wrong type in Handle::deleteLater()
2021-03-11 06:26:27 +07:00
SChernykh
54bcf05b1d Fix wrong type in Handle::deleteLater()
Bug found by Address Sanitizer
2021-03-10 14:55:06 +01:00
XMRig
bbea8810a7 v6.10.1-dev 2021-03-08 06:04:59 +07:00
XMRig
b6514957f1 Merge branch 'master' into dev 2021-03-08 06:04:32 +07:00
XMRig
69590f9777 v6.10.0 2021-03-08 04:05:27 +07:00
xmrig
576ff120e5 Merge pull request #2128 from ianmaddox/patch-1
Minor verbiage tweak
2021-03-08 04:02:02 +07:00
xmrig
2d52118c1b Merge pull request #2161 from coolhaircut/patch-1
Added Userspace MSR permissions clarification in CPU.md
2021-03-08 04:01:15 +07:00
xmrig
28ad59d828 Merge pull request #2129 from felixonmars/patch-1
Correct a typo in doc/CPU.md
2021-03-08 04:00:38 +07:00
XMRig
e0c630f34f Merge branch 'dev' 2021-03-08 03:59:09 +07:00
XMRig
b8f9a326aa 6.10.0-dev 2021-03-07 01:44:38 +07:00
Cool Dude (with a cool haircut)
542617b6db Update CPU.md 2021-03-05 22:54:03 +00:00
XMRig
f5db50c9d7 Sync with the proxy. 2021-03-06 05:32:54 +07:00
XMRig
856c8e6bcd Fixed build without TLS support. 2021-03-06 02:07:10 +07:00
XMRig
b3dbf6e23f http-parser replaced to llhttp. 2021-03-06 01:46:49 +07:00
xmrig
a11c57226b Merge pull request #2158 from SChernykh/dev
Fix GCC compilation
2021-03-04 16:48:05 +07:00
SChernykh
94d2cac775 Fix GCC compilation 2021-03-04 10:45:39 +01:00
XMRig
548a7d46e1 Add note about CPU affinity. 2021-03-04 16:19:06 +07:00
xmrig
bebc163e25 Merge pull request #2157 from SChernykh/dev
Fix crash in cn-heavy on Zen3 with manual thread count
2021-03-04 16:03:54 +07:00
SChernykh
70cddc06ba Fix crash in cn-heavy on Zen3 with manual thread count 2021-03-04 10:02:35 +01:00
XMRig
1f9cdc0564 Update hwloc for MSVC. 2021-03-04 03:23:26 +07:00
XMRig
a5a7ee716d Update build scripts. 2021-03-03 19:38:54 +07:00
xmrig
d2f24d94b9 Merge pull request #2150 from TheGreatMcPain/dev
Update sse2neon.h to the latest master. Fixes build on armv7.
2021-03-02 19:41:11 +07:00
TheGreatMcPain
ba3299b61b Update sse2neon.h to the latest master. Fixes build on armv7.
A few days after this header was introduced. Upstream updated it with
armv7 versions of `_mm_aesenc_si128` which allows xmrig to build
on armv7.
2021-03-02 01:33:25 -06:00
xmrig
ca5dfe7c12 Merge pull request #2147 from SChernykh/dev
Fixed many "new job" messages when solo mining
2021-03-01 23:49:03 +07:00
SChernykh
91ad6fcf3d Fixed many "new job" messages when solo mining
Fix for https://github.com/xmrig/xmrig/issues/2127
2021-03-01 17:46:05 +01:00
XMRig
0b7dfaabe0 Code cleanup. 2021-03-01 19:04:03 +07:00
XMRig
6f8ffb7660 Fixed possible out of order write to log file. 2021-03-01 18:54:20 +07:00
XMRig
4a8e7510e1 #2123 Ignore regex exception. 2021-02-27 15:29:14 +07:00
Felix Yan
32876dd01d Correct a typo in doc/CPU.md 2021-02-24 04:36:27 +08:00
Ian Maddox
37df513b32 Minor verbiage tweak
Fixing mixed phrasing in error message
2021-02-23 11:34:10 -08:00
xmrig
31a5d05dc1 Merge pull request #2122 from SChernykh/dev
Fixed pause logic when both pause on battery and user activity are en…
2021-02-21 22:36:32 +07:00
SChernykh
d478d737c4 Fixed pause logic when both pause on battery and user activity are enabled 2021-02-21 16:33:57 +01:00
XMRig
e20daff4eb v6.9.1-dev 2021-02-21 22:28:15 +07:00
XMRig
1ccdcb1645 Merge branch 'master' into dev 2021-02-21 22:27:36 +07:00
XMRig
072881e1a1 v6.9.0 2021-02-21 21:23:48 +07:00
XMRig
0c4a3cfc30 Merge branch 'dev' 2021-02-21 21:23:15 +07:00
xmrig
cffd0f50a4 Update CPU.md 2021-02-21 20:22:06 +07:00
XMRig
4b1857114e v6.9.0-dev 2021-02-20 14:28:20 +07:00
XMRig
b49fb27e84 Added idle time detection for macOS. 2021-02-20 13:18:31 +07:00
XMRig
ee341118ce #2104 Added user configurable idle time. 2021-02-19 23:35:30 +07:00
XMRig
f599807bbb Simplified code, fixed broken pause. 2021-02-19 16:26:31 +07:00
xmrig
a2ad626012 Merge pull request #2117 from SChernykh/dev
Fixed crash when GPU mining cn-heavy on Zen3 system
2021-02-18 21:08:44 +07:00
SChernykh
e8a99809b6 Fixed crash when GPU mining cn-heavy on Zen3 system 2021-02-18 14:49:37 +01:00
XMRig
0fe20fe88c Merge remote-tracking branch 'remotes/origin/pr2112' into dev 2021-02-18 15:35:59 +07:00
XMRig
d1d1517b4f Fixed macOS build. 2021-02-18 15:22:39 +07:00
XMRig
5980675876 Code and copyright cleanup. 2021-02-18 12:56:39 +07:00
Hansie Odendaal
3b87cd97ce Allow result submission to origin daemon with self-select
With `self-select` mode enabled, the `submit-to-origin` config option
will let the `SelfSelectClient` submit the solution to both
the daemon where it got the template from as well as to
the connected pool, for miners that want to do pool minining
with Monero and solo mining with an altcoin (merged mining variant).

Thank you and special credit to @StriderDM (https://github.com/StriderDM)!
2021-02-17 18:05:13 +02:00
xmrig
d2f01cfa86 Merge pull request #2104 from SChernykh/dev
Added `pause-on-active` option
2021-02-15 11:04:14 +07:00
SChernykh
82830e359a Added pause-on-active option
Windows only for now. When set to true, pauses mining when user touches mouse or keyboard.
2021-02-14 15:32:18 +01:00
XMRig
8e3fec5768 v6.8.3 2021-02-12 22:51:26 +07:00
XMRig
4fd23a1bf4 Merge branch 'master' into dev 2021-02-12 22:50:52 +07:00
XMRig
8bfaddd3fc v6.8.2 2021-02-12 18:47:16 +07:00
XMRig
dabafaaadb Merge branch 'dev' 2021-02-12 18:46:41 +07:00
xmrig
5cda714254 Update CHANGELOG.md 2021-02-12 18:35:43 +07:00
xmrig
91151ce4a1 Merge pull request #2089 from SChernykh/dev
Optimized cn-heavy for Zen3
2021-02-08 16:24:16 +07:00
SChernykh
dc1443f3b8 Cryptonight: add prefetching to interleaved mode 2021-02-07 23:29:54 +01:00
SChernykh
8af8df25aa Optimized cn-heavy for Zen3
- Uses scratchpad interleaving to access only the closest L3 slice from each CPU core.
- Also activates MSR mod for cn-heavy because CPU prefetchers get confused with interleaving
- 7-8% speedup on Zen3
2021-02-07 22:05:11 +01:00
XMRig
b1e14dc1d3 Always disable kawpow for CPU backend. 2021-02-07 18:49:54 +07:00
XMRig
f460d76f8d Add missing option to config example. 2021-02-06 16:17:53 +07:00
xmrig
1c63e9efba Merge pull request #2080 from SChernykh/dev 2021-02-04 04:29:59 +07:00
SChernykh
21abbe4e84 Fix compile error in Termux 2021-02-03 19:05:05 +01:00
XMRig
3080f47cd6 v6.8.2-dev 2021-02-03 18:01:14 +07:00
XMRig
f4ebdaa8e5 Merge branch 'master' into dev 2021-02-03 18:00:42 +07:00
XMRig
1bcfd0cdea v6.8.1 2021-02-03 07:00:39 +07:00
XMRig
9396ecf93d Merge branch 'dev' 2021-02-03 06:57:11 +07:00
xmrig
a4af964696 Update CHANGELOG.md 2021-02-03 06:04:30 +07:00
XMRig
2c8d8ee2ab Fixed macOS build and compile warning. 2021-02-02 13:53:45 +07:00
xmrig
631a8ca802 Merge pull request #2077 from SChernykh/dev
Fix for illegal instruction crash on ARM
2021-02-02 04:57:36 +07:00
SChernykh
346892e170 Update jit_compiler_a64.cpp 2021-02-01 22:52:02 +01:00
SChernykh
db03573804 ARM JIT: added missing cache flush 2021-02-01 22:42:35 +01:00
SChernykh
e74573f81f Fixed code allocation for ARM 2021-02-01 22:36:11 +01:00
xmrig
0e70974d7d Merge pull request #2076 from xmrig/feature-flexible-hugepages
Added support for flexible huge page sizes on Linux.
2021-02-02 04:07:41 +07:00
xmrig
3a3ee91324 Merge pull request #2075 from SChernykh/dev
Fixed crashes on ARM
2021-02-02 03:06:58 +07:00
SChernykh
4108428872 Fixed crashes on ARM 2021-02-01 17:07:45 +01:00
XMRig
4c3425a958 Added "--hugepage-size" command line option. 2021-02-01 05:06:24 +07:00
XMRig
09624c4f9b Added support for flexible huge page sizes on Linux. 2021-01-31 23:38:57 +07:00
XMRig
8faef28e7d Detect Apple M1 on Linux. 2021-01-31 05:41:32 +07:00
XMRig
62450f4ed8 Update ARM CPUs names. 2021-01-31 03:53:22 +07:00
XMRig
2c52a5a352 #2066 Fixed AMD GPUs health data readings. 2021-01-30 02:42:59 +07:00
XMRig
7d52bd7454 Extend normalization rules. 2021-01-29 18:22:24 +07:00
XMRig
f68b105bd9 Normalize DMI memory slot name. 2021-01-29 04:23:50 +07:00
XMRig
9ca1a6129b #2066 Quick fix for AMD GPUs health data. 2021-01-29 01:23:35 +07:00
xmrig
7a3df1c0bb Merge pull request #2067 from SChernykh/dev
Fix compilation error when RandomX and Argon2 are disabled
2021-01-28 20:44:03 +07:00
SChernykh
22a1b8d82d Fix compilation error when RandomX and Argon2 are disabled 2021-01-28 14:38:28 +01:00
xmrig
0a462fbef5 Merge pull request #2064 from SChernykh/dev
Added documentation for config.json CPU options
2021-01-28 19:41:15 +07:00
SChernykh
f302b4b0ef Added documentation for config.json CPU options 2021-01-28 13:37:27 +01:00
XMRig
65fe26dc6c Don't print empty memory slots if the total count above 8. 2021-01-28 00:00:00 +07:00
XMRig
e6d4921e21 v6.8.1-dev 2021-01-26 16:40:10 +07:00
XMRig
f82d67e76e Merge branch 'master' into dev 2021-01-26 16:38:37 +07:00
XMRig
4e671a945d v6.8.0 2021-01-26 15:26:16 +07:00
XMRig
e38d277143 Merge branch 'dev' 2021-01-26 15:25:20 +07:00
XMRig
8eb9b4d37a Update default config example. 2021-01-26 15:15:08 +07:00
xmrig
2d45cc64c1 Update CHANGELOG.md 2021-01-26 15:08:05 +07:00
XMRig
b9081e992b Code cleanup 2021-01-25 22:00:42 +07:00
XMRig
1424b2975f Fixed DMI memory speed. 2021-01-24 15:56:02 +07:00
XMRig
0fa5db8fa3 Code cleanup. 2021-01-24 15:02:22 +07:00
xmrig
5999dccd57 Merge pull request #2058 from SChernykh/dev
RandomX JIT x86: remove unnecessary instructions
2021-01-24 13:59:56 +07:00
SChernykh
78922a0772 RandomX JIT x86: remove unnecessary instructions
Adopted from https://github.com/tevador/RandomX/pull/201
2021-01-23 22:28:50 +01:00
XMRig
bc3914883a Merge branch 'alvv-z-patch-1' into dev 2021-01-24 02:30:22 +07:00
XMRig
86dae9e149 Merge branch 'patch-1' of https://github.com/alvv-z/xmrig into alvv-z-patch-1 2021-01-24 02:30:05 +07:00
xmrig
05b2260393 Merge pull request #2057 from xmrig/feature-msr2
Improved MSR subsystem code quality
2021-01-24 02:28:54 +07:00
XMRig
672f6df6c1 Fixed Cache QoS restore on exit where it not supported. 2021-01-24 02:23:27 +07:00
XMRig
9dae559b73 Added RxMsr class. 2021-01-23 23:23:39 +07:00
XMRig
b9d813c403 Move Ryzen related fixes to RxFix class. 2021-01-23 00:27:56 +07:00
XMRig
c48e2e6af8 Added new class Msr. 2021-01-22 23:50:25 +07:00
xmrig
76fba819fe Merge pull request #2055 from GoDzM4TT3O/patch-1
Add missing "cstdio" library
2021-01-22 22:19:41 +07:00
GoDzM4TT3O
6bab624885 Add missing "cstdio" library
Compilation fails if the above library is missing. This fixes a compilation error.
2021-01-22 14:18:28 +01:00
XMRig
3730bcd434 Merge branch 'master' into feature-msr2 2021-01-22 16:55:57 +07:00
XMRig
3b7d30a91d v6.8.0-dev 2021-01-22 00:27:38 +07:00
XMRig
c8588903e3 Enable DMI reader by default. 2021-01-22 00:12:34 +07:00
xmrig
0b4fec15dd Merge pull request #2052 from xmrig/feature-dmi
Added DMI/SMBIOS reader
2021-01-22 00:09:10 +07:00
XMRig
ef8cc28f3f Added DMI data to online benchmark. 2021-01-21 23:22:01 +07:00
XMRig
8471f7fad3 Added "GET /2/dmi" API endpoint. 2021-01-20 22:54:02 +07:00
alvv-z
b99dc440af Spelling Check
agaiin -> again
2021-01-20 12:36:47 +01:00
XMRig
9a02007900 Added config option "dmi" and command line option "--no-dmi". 2021-01-20 16:02:48 +07:00
XMRig
efc5e5d811 Fix summary. 2021-01-20 00:45:36 +07:00
XMRig
dea5be0a57 Added basic system reader. 2021-01-20 00:43:01 +07:00
XMRig
24c290963a Added DMI reader for macOS. 2021-01-19 14:16:03 +07:00
XMRig
9dffcdaddd Enable FreeBSD support. 2021-01-19 01:45:17 +07:00
XMRig
3df47052ed Added legacy DMI readers for Linux. 2021-01-19 01:23:09 +07:00
XMRig
3b8d081c8c Add support for older DMI formats on Linux. 2021-01-18 22:56:57 +07:00
XMRig
05e6f66169 Added basic Linux support. 2021-01-18 16:53:42 +07:00
XMRig
11e0d3de3a Added DMI reader (Windows only). 2021-01-18 11:23:29 +07:00
XMRig
ea367da064 #2043 Fix compile warning. 2021-01-17 17:48:35 +07:00
xmrig
a999a56775 Merge pull request #2041 from coldiron/typo-fixes
fixed grammar in a couple of awkward error messages
2021-01-16 10:15:29 +07:00
Richard Mitsuk Lavitt
590252bd5e fixed grammar in a couple of awkward error messages 2021-01-15 14:33:38 -06:00
XMRig
cc2de4f768 v6.7.3-dev 2021-01-15 20:11:28 +07:00
XMRig
aeea0e0a6c Merge branch 'master' into dev 2021-01-15 20:09:26 +07:00
XMRig
82d698a1e5 v6.7.2 2021-01-15 19:31:41 +07:00
XMRig
95b2b5e028 Merge branch 'dev' 2021-01-15 19:31:09 +07:00
xmrig
eae84d47e7 Update CHANGELOG.md 2021-01-15 19:30:22 +07:00
XMRig
45d12314f4 Sync changes. 2021-01-15 19:18:52 +07:00
xmrig
fa11cb623d Merge pull request #2039 from SChernykh/dev
Fixed solo mining
2021-01-15 18:49:04 +07:00
SChernykh
7da04c6a2c Always use cvt_bin2hex 2021-01-15 12:46:27 +01:00
SChernykh
5c449913af Fixed solo mining
It was broken since 6.7.0
2021-01-15 11:18:36 +01:00
XMRig
af019fed8e v6.7.2-dev 2021-01-11 18:29:56 +07:00
XMRig
8872630c46 Merge branch 'master' into dev 2021-01-11 18:29:06 +07:00
XMRig
d3ec21cbf5 v6.7.1 2021-01-11 16:13:29 +07:00
XMRig
395dd4086b Merge branch 'dev' 2021-01-11 16:12:14 +07:00
XMRig
a7f9808621 Fixed HOSTNAME environment variable. 2021-01-11 11:42:32 +07:00
xmrig
88862b617f Update CHANGELOG.md 2021-01-10 07:53:44 +07:00
xmrig
39bfa0c420 Merge pull request #2028 from SChernykh/dev
RandomX x86 JIT: remove redundant CFROUND
2021-01-08 04:58:25 +07:00
SChernykh
f62f4e6108 RandomX x86 JIT: remove redundant CFROUND 2021-01-07 16:20:00 +01:00
xmrig
9f128d1182 Merge pull request #2009 from SChernykh/dev
AstroBWT OpenCL fixes
2020-12-27 22:56:58 +07:00
SChernykh
2f2b33c82b AstroBWT OpenCL fixes
- Rewrote main BWT kernel to work properly on Navi
- Fixed nonce iterations in OclWorker
- Fixed memory allocation for AstroBWT
2020-12-27 16:44:35 +01:00
xmrig
56280cb1d5 Merge pull request #2007 from Frago9876543210/dev
Added scripts/{build, deps} into .gitignore
2020-12-26 00:13:19 +07:00
Frago9876543210
07127c6e87 Added scripts/{build, deps} into .gitignore 2020-12-25 20:05:18 +03:00
xmrig
3dabc77a09 Merge pull request #1998 from SChernykh/dev
Show hashrate in the benchmark finished message
2020-12-23 21:04:11 +07:00
SChernykh
66349e3d23 Show hashrate in the benchmark finished message 2020-12-23 14:31:38 +01:00
XMRig
85a78ce537 #1995 Fixed log initialization. 2020-12-22 21:41:39 +07:00
XMRig
0d9f17670e v6.7.1-dev 2020-12-21 20:59:00 +07:00
XMRig
deb561a410 Merge branch 'master' into dev 2020-12-21 20:57:49 +07:00
640 changed files with 130277 additions and 27016 deletions

View File

@@ -17,6 +17,9 @@ Steps to reproduce the behavior.
A clear and concise description of what you expected to happen. A clear and concise description of what you expected to happen.
**Required data** **Required data**
- XMRig version
- Either the exact link to a release you downloaded from https://github.com/xmrig/xmrig/releases
- Or the exact command lines that you used to build XMRig
- Miner log as text or screenshot - Miner log as text or screenshot
- Config file or command line (without wallets) - Config file or command line (without wallets)
- OS: [e.g. Windows] - OS: [e.g. Windows]

2
.gitignore vendored
View File

@@ -1,4 +1,6 @@
/build /build
scripts/build
scripts/deps
/CMakeLists.txt.user /CMakeLists.txt.user
/.idea /.idea
/src/backend/opencl/cl/cn/cryptonight_gen.cl /src/backend/opencl/cl/cn/cryptonight_gen.cl

View File

@@ -1,3 +1,286 @@
# v6.22.3
- [#3605](https://github.com/xmrig/xmrig/pull/3605) CUDA backend: added missing RandomX dataset update.
- [#3646](https://github.com/xmrig/xmrig/pull/3646) Optimized auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
- [#3652](https://github.com/xmrig/xmrig/pull/3652) Fixed possible crash when submitting RandomX benchmark.
- [#3662](https://github.com/xmrig/xmrig/pull/3662) Fixed OpenCL kernel compilation error on some platforms.
# v6.22.2
- [#3569](https://github.com/xmrig/xmrig/pull/3569) Fixed corrupted API output in some rare conditions.
- [#3571](https://github.com/xmrig/xmrig/pull/3571) Fixed number of threads on the new Intel Core Ultra CPUs.
# v6.22.1
- [#3531](https://github.com/xmrig/xmrig/pull/3531) Always reset nonce on RandomX dataset change.
- [#3534](https://github.com/xmrig/xmrig/pull/3534) Fixed threads auto-config on Zen5.
- [#3535](https://github.com/xmrig/xmrig/pull/3535) RandomX: tweaks for Zen5.
- [#3539](https://github.com/xmrig/xmrig/pull/3539) Added Zen5 to `randomx_boost.sh`.
- [#3540](https://github.com/xmrig/xmrig/pull/3540) Detect AMD engineering samples in `randomx_boost.sh`.
# v6.22.0
- [#2411](https://github.com/xmrig/xmrig/pull/2411) Added support for [Yada](https://yadacoin.io/) (`rx/yada` algorithm).
- [#3492](https://github.com/xmrig/xmrig/pull/3492) Fixed `--background` option on Unix systems.
- [#3518](https://github.com/xmrig/xmrig/pull/3518) Possible fix for corrupted API output in rare cases.
- [#3522](https://github.com/xmrig/xmrig/pull/3522) Removed `rx/keva` algorithm.
- [#3525](https://github.com/xmrig/xmrig/pull/3525) Added Zen5 detection.
- [#3528](https://github.com/xmrig/xmrig/pull/3528) Added `rx/yada` OpenCL support.
# v6.21.3
- [#3462](https://github.com/xmrig/xmrig/pull/3462) RandomX: correct memcpy size for JIT initialization.
# v6.21.2
- The dependencies of all prebuilt releases have been updated. Support for old Ubuntu releases has been dropped.
- [#2800](https://github.com/xmrig/xmrig/issues/2800) Fixed donation with GhostRider algorithm for builds without KawPow algorithm.
- [#3436](https://github.com/xmrig/xmrig/pull/3436) Fixed, the file log writer was not thread-safe.
- [#3450](https://github.com/xmrig/xmrig/pull/3450) Fixed RandomX crash when compiled with fortify_source.
# v6.21.1
- [#3391](https://github.com/xmrig/xmrig/pull/3391) Added support for townforge (monero fork using randomx).
- [#3399](https://github.com/xmrig/xmrig/pull/3399) Fixed Zephyr mining (OpenCL).
- [#3420](https://github.com/xmrig/xmrig/pull/3420) Fixed segfault in HTTP API rebind.
# v6.21.0
- [#3302](https://github.com/xmrig/xmrig/pull/3302) [#3312](https://github.com/xmrig/xmrig/pull/3312) Enabled keepalive for Windows (>= Vista).
- [#3320](https://github.com/xmrig/xmrig/pull/3320) Added "built for OS/architecture/bits" to "ABOUT".
- [#3339](https://github.com/xmrig/xmrig/pull/3339) Added SNI option for TLS connections.
- [#3342](https://github.com/xmrig/xmrig/pull/3342) Update `cn_main_loop.asm`.
- [#3346](https://github.com/xmrig/xmrig/pull/3346) ARM64 JIT: don't use `x18` register.
- [#3348](https://github.com/xmrig/xmrig/pull/3348) Update to latest `sse2neon.h`.
- [#3356](https://github.com/xmrig/xmrig/pull/3356) Updated pricing record size for **Zephyr** solo mining.
- [#3358](https://github.com/xmrig/xmrig/pull/3358) **Zephyr** solo mining: handle multiple outputs.
# v6.20.0
- Added new ARM CPU names.
- [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`.
- [#2830](https://github.com/xmrig/xmrig/pull/2830) Added API rebind polling.
- [#2927](https://github.com/xmrig/xmrig/pull/2927) Fixed compatibility with hwloc 1.11.x.
- [#3060](https://github.com/xmrig/xmrig/pull/3060) Added x86 to `README.md`.
- [#3236](https://github.com/xmrig/xmrig/pull/3236) Fixed: receive CUDA loader error on Linux too.
- [#3290](https://github.com/xmrig/xmrig/pull/3290) Added [Zephyr](https://www.zephyrprotocol.com/) coin support for solo mining.
# v6.19.3
- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job.
- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs.
- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation.
- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior.
- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list.
- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts.
# v6.19.2
- [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`.
- [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.
- [#3240](https://github.com/xmrig/xmrig/pull/3240) Improved .cmd files when run by shortcuts on another drive.
- [#3241](https://github.com/xmrig/xmrig/pull/3241) Added view tag calculation (fixes Wownero solo mining issue).
# v6.19.1
- Resolved deprecated methods warnings with OpenSSL 3.0.
- [#3213](https://github.com/xmrig/xmrig/pull/3213) Fixed build with 32-bit clang 15.
- [#3218](https://github.com/xmrig/xmrig/pull/3218) Fixed: `--randomx-wrmsr=-1` worked only on Intel.
- [#3228](https://github.com/xmrig/xmrig/pull/3228) Fixed build with gcc 13.
# v6.19.0
- [#3144](https://github.com/xmrig/xmrig/pull/3144) Update to latest `sse2neon.h`.
- [#3161](https://github.com/xmrig/xmrig/pull/3161) MSVC build: enabled parallel compilation.
- [#3163](https://github.com/xmrig/xmrig/pull/3163) Improved Zen 3 MSR mod.
- [#3176](https://github.com/xmrig/xmrig/pull/3176) Update cmake required version to 3.1.
- [#3182](https://github.com/xmrig/xmrig/pull/3182) DragonflyBSD compilation fixes.
- [#3196](https://github.com/xmrig/xmrig/pull/3196) Show IP address for failed connections.
- [#3185](https://github.com/xmrig/xmrig/issues/3185) Fixed macOS DMI reader.
- [#3198](https://github.com/xmrig/xmrig/pull/3198) Fixed broken RandomX light mode mining.
- [#3202](https://github.com/xmrig/xmrig/pull/3202) Solo mining: added job timeout (default is 15 seconds).
# v6.18.1
- [#3129](https://github.com/xmrig/xmrig/pull/3129) Fix: protectRX flushed CPU cache only on MacOS/iOS.
- [#3126](https://github.com/xmrig/xmrig/pull/3126) Don't reset when pool sends the same job blob.
- [#3120](https://github.com/xmrig/xmrig/pull/3120) RandomX: optimized `CFROUND` elimination.
- [#3109](https://github.com/xmrig/xmrig/pull/3109) RandomX: added Blake2 AVX2 version.
- [#3082](https://github.com/xmrig/xmrig/pull/3082) Fixed GCC 12 warnings.
- [#3075](https://github.com/xmrig/xmrig/pull/3075) Recognize `armv7ve` as valid ARMv7 target.
- [#3132](https://github.com/xmrig/xmrig/pull/3132) RandomX: added MSR mod for Zen 4.
- [#3134](https://github.com/xmrig/xmrig/pull/3134) Added Zen4 to `randomx_boost.sh`.
# v6.18.0
- [#3067](https://github.com/xmrig/xmrig/pull/3067) Monero v15 network upgrade support and more house keeping.
- Removed deprecated AstroBWTv1 and v2.
- Fixed debug GhostRider build.
- Monero v15 network upgrade support.
- Fixed ZMQ debug log.
- Improved daemon ZMQ mining stability.
- [#3054](https://github.com/xmrig/xmrig/pull/3054) Fixes for 32-bit ARM.
- [#3042](https://github.com/xmrig/xmrig/pull/3042) Fixed being unable to resume from `pause-on-battery`.
- [#3031](https://github.com/xmrig/xmrig/pull/3031) Fixed `--cpu-priority` not working sometimes.
- [#3020](https://github.com/xmrig/xmrig/pull/3020) Removed old AstroBWT algorithm.
# v6.17.0
- [#2954](https://github.com/xmrig/xmrig/pull/2954) **Dero HE fork support (`astrobwt/v2` algorithm).**
- [#2961](https://github.com/xmrig/xmrig/pull/2961) Dero HE (`astrobwt/v2`) CUDA config generator.
- [#2969](https://github.com/xmrig/xmrig/pull/2969) Dero HE (`astrobwt/v2`) OpenCL support.
- Fixed displayed DMI memory information for empty slots.
- [#2932](https://github.com/xmrig/xmrig/pull/2932) Fixed GhostRider with hwloc disabled.
# v6.16.4
- [#2904](https://github.com/xmrig/xmrig/pull/2904) Fixed unaligned memory accesses.
- [#2908](https://github.com/xmrig/xmrig/pull/2908) Added MSVC/2022 to `version.h`.
- [#2910](https://github.com/xmrig/xmrig/issues/2910) Fixed donation for GhostRider/RTM.
# v6.16.3
- [#2778](https://github.com/xmrig/xmrig/pull/2778) Fixed `READY threads X/X` display after algorithm switching.
- [#2782](https://github.com/xmrig/xmrig/pull/2782) Updated GhostRider documentation.
- [#2815](https://github.com/xmrig/xmrig/pull/2815) Fixed `cn-heavy` in 32-bit builds.
- [#2827](https://github.com/xmrig/xmrig/pull/2827) GhostRider: set correct priority for helper threads.
- [#2837](https://github.com/xmrig/xmrig/pull/2837) RandomX: don't restart mining threads when the seed changes.
- [#2848](https://github.com/xmrig/xmrig/pull/2848) GhostRider: added support for `client.reconnect` method.
- [#2856](https://github.com/xmrig/xmrig/pull/2856) Fix for short responses from some Raptoreum pools.
- [#2873](https://github.com/xmrig/xmrig/pull/2873) Fixed GhostRider benchmark on single-core systems.
- [#2882](https://github.com/xmrig/xmrig/pull/2882) Fixed ARMv7 compilation.
- [#2893](https://github.com/xmrig/xmrig/pull/2893) KawPow OpenCL: use separate UV loop for building programs.
# v6.16.2
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
- Fixed several performance bottlenecks introduced in v6.16.1.
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
- **Linux builds are up to 10% faster now compared to v6.16.0 GCC build.**
- **Windows builds are up to 5% faster now compared to v6.16.0 MSVC build.**
# v6.16.1
- [#2729](https://github.com/xmrig/xmrig/pull/2729) GhostRider fixes:
- Added average hashrate display.
- Fixed the number of threads shown at startup.
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use).
- [#2738](https://github.com/xmrig/xmrig/pull/2738) GhostRider fixes:
- Fixed "difficulty is not a number" error when diff is high on some pools.
- Fixed GhostRider compilation when `WITH_KAWPOW=OFF`.
- [#2740](https://github.com/xmrig/xmrig/pull/2740) Added VAES support for Cryptonight variants **+4% speedup on Zen3**.
- VAES instructions are available on Intel Ice Lake/AMD Zen3 and newer CPUs.
- +4% speedup on Ryzen 5 5600X.
# v6.16.0
- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.
- [#2684](https://github.com/xmrig/xmrig/pull/2684) MSR mod: fix for error 183.
# v6.15.3
- [#2614](https://github.com/xmrig/xmrig/pull/2614) OpenCL fixes for non-AMD platforms.
- [#2623](https://github.com/xmrig/xmrig/pull/2623) Fixed compiling without kawpow.
- [#2636](https://github.com/xmrig/xmrig/pull/2636) [#2639](https://github.com/xmrig/xmrig/pull/2639) AstroBWT speedup (up to +35%).
- [#2646](https://github.com/xmrig/xmrig/pull/2646) Fixed MSVC compilation error.
# v6.15.2
- [#2606](https://github.com/xmrig/xmrig/pull/2606) Fixed: AstroBWT auto-config ignored `max-threads-hint`.
- Fixed possible crash on Windows (regression in v6.15.1).
# v6.15.1
- [#2586](https://github.com/xmrig/xmrig/pull/2586) Fixed Windows 7 compatibility.
- [#2594](https://github.com/xmrig/xmrig/pull/2594) Added Windows taskbar icon colors.
# v6.15.0
- [#2548](https://github.com/xmrig/xmrig/pull/2548) Added automatic coin detection for daemon mining.
- [#2563](https://github.com/xmrig/xmrig/pull/2563) Added new algorithm RandomX Graft (`rx/graft`).
- [#2565](https://github.com/xmrig/xmrig/pull/2565) AstroBWT: added AVX2 Salsa20 implementation.
- Added support for new CUDA plugin API (previous API still supported).
# v6.14.1
- [#2532](https://github.com/xmrig/xmrig/pull/2532) Refactoring: stable (persistent) algorithms IDs.
- [#2537](https://github.com/xmrig/xmrig/pull/2537) Fixed Termux build.
# v6.14.0
- [#2484](https://github.com/xmrig/xmrig/pull/2484) Added ZeroMQ support for solo mining.
- [#2476](https://github.com/xmrig/xmrig/issues/2476) Fixed crash in DMI memory reader.
- [#2492](https://github.com/xmrig/xmrig/issues/2492) Added missing `--huge-pages-jit` command line option.
- [#2512](https://github.com/xmrig/xmrig/pull/2512) Added show the number of transactions in pool job.
# v6.13.1
- [#2468](https://github.com/xmrig/xmrig/pull/2468) Fixed regression in previous version: don't send miner signature during regular mining.
# v6.13.0
- [#2445](https://github.com/xmrig/xmrig/pull/2445) Added support for solo mining with miner signatures for the upcoming Wownero fork.
# v6.12.2
- [#2280](https://github.com/xmrig/xmrig/issues/2280) GPU backends are now disabled in benchmark mode.
- [#2322](https://github.com/xmrig/xmrig/pull/2322) Improved MSR compatibility with recent Linux kernels and updated `randomx_boost.sh`.
- [#2340](https://github.com/xmrig/xmrig/pull/2340) Fixed AES detection on FreeBSD on ARM.
- [#2341](https://github.com/xmrig/xmrig/pull/2341) `sse2neon` updated to the latest version.
- [#2351](https://github.com/xmrig/xmrig/issues/2351) Fixed help output for `--cpu-priority` and `--cpu-affinity` option.
- [#2375](https://github.com/xmrig/xmrig/pull/2375) Fixed macOS CUDA backend default loader name.
- [#2378](https://github.com/xmrig/xmrig/pull/2378) Fixed broken light mode mining on x86.
- [#2379](https://github.com/xmrig/xmrig/pull/2379) Fixed CL code for KawPow where it assumes everything is AMD.
- [#2386](https://github.com/xmrig/xmrig/pull/2386) RandomX: enabled `IMUL_RCP` optimization for light mode mining.
- [#2393](https://github.com/xmrig/xmrig/pull/2393) RandomX: added BMI2 version for scratchpad prefetch.
- [#2395](https://github.com/xmrig/xmrig/pull/2395) RandomX: rewrote dataset read code.
- [#2398](https://github.com/xmrig/xmrig/pull/2398) RandomX: optimized ARMv8 dataset read.
- Added `argon2/ninja` alias for `argon2/wrkz` algorithm.
# v6.12.1
- [#2296](https://github.com/xmrig/xmrig/pull/2296) Fixed Zen3 assembly code for `cn/upx2` algorithm.
# v6.12.0
- [#2276](https://github.com/xmrig/xmrig/pull/2276) Added support for Uplexa (`cn/upx2` algorithm).
- [#2261](https://github.com/xmrig/xmrig/pull/2261) Show total hashrate if compiled without OpenCL.
- [#2289](https://github.com/xmrig/xmrig/pull/2289) RandomX: optimized `IMUL_RCP` instruction.
- Added support for `--user` command line option for online benchmark.
# v6.11.2
- [#2207](https://github.com/xmrig/xmrig/issues/2207) Fixed regression in HTTP parser and llhttp updated to v5.1.0.
# v6.11.1
- [#2239](https://github.com/xmrig/xmrig/pull/2239) Fixed broken `coin` setting functionality.
# v6.11.0
- [#2196](https://github.com/xmrig/xmrig/pull/2196) Improved DNS subsystem and added new DNS specific options.
- [#2172](https://github.com/xmrig/xmrig/pull/2172) Fixed build on Alpine 3.13.
- [#2177](https://github.com/xmrig/xmrig/pull/2177) Fixed ARM specific compilation error with GCC 10.2.
- [#2214](https://github.com/xmrig/xmrig/pull/2214) [#2216](https://github.com/xmrig/xmrig/pull/2216) [#2235](https://github.com/xmrig/xmrig/pull/2235) Optimized `cn-heavy` algorithm.
- [#2217](https://github.com/xmrig/xmrig/pull/2217) Fixed mining job creation sequence.
- [#2225](https://github.com/xmrig/xmrig/pull/2225) Fixed build without OpenCL support on some systems.
- [#2229](https://github.com/xmrig/xmrig/pull/2229) Don't use RandomX JIT if `WITH_ASM=OFF`.
- [#2228](https://github.com/xmrig/xmrig/pull/2228) Removed useless code for cryptonight algorithms.
- [#2234](https://github.com/xmrig/xmrig/pull/2234) Fixed build error on gcc 4.8.
# v6.10.0
- [#2122](https://github.com/xmrig/xmrig/pull/2122) Fixed pause logic when both pause on battery and user activity are enabled.
- [#2123](https://github.com/xmrig/xmrig/issues/2123) Fixed compatibility with gcc 4.8.
- [#2147](https://github.com/xmrig/xmrig/pull/2147) Fixed many `new job` messages when solo mining.
- [#2150](https://github.com/xmrig/xmrig/pull/2150) Updated `sse2neon.h` to the latest master, fixes build on ARMv7.
- [#2157](https://github.com/xmrig/xmrig/pull/2157) Fixed crash in `cn-heavy` on Zen3 with manual thread count.
- Fixed possible out of order write to log file.
- [http-parser](https://github.com/nodejs/http-parser) replaced to [llhttp](https://github.com/nodejs/llhttp).
- For official builds: libuv, hwloc and OpenSSL updated to latest versions.
# v6.9.0
- [#2104](https://github.com/xmrig/xmrig/pull/2104) Added [pause-on-active](https://xmrig.com/docs/miner/config/misc#pause-on-active) config option and `--pause-on-active=N` command line option.
- [#2112](https://github.com/xmrig/xmrig/pull/2112) Added support for [Tari merge mining](https://github.com/tari-project/tari/blob/development/README.md#tari-merge-mining).
- [#2117](https://github.com/xmrig/xmrig/pull/2117) Fixed crash when GPU mining `cn-heavy` on Zen3 system.
# v6.8.2
- [#2080](https://github.com/xmrig/xmrig/pull/2080) Fixed compile error in Termux.
- [#2089](https://github.com/xmrig/xmrig/pull/2089) Optimized CryptoNight-Heavy for Zen3, 7-8% speedup.
# v6.8.1
- [#2064](https://github.com/xmrig/xmrig/pull/2064) Added documentation for config.json CPU options.
- [#2066](https://github.com/xmrig/xmrig/issues/2066) Fixed AMD GPUs health data readings on Linux.
- [#2067](https://github.com/xmrig/xmrig/pull/2067) Fixed compilation error when RandomX and Argon2 are disabled.
- [#2076](https://github.com/xmrig/xmrig/pull/2076) Added support for flexible huge page sizes on Linux.
- [#2077](https://github.com/xmrig/xmrig/pull/2077) Fixed `illegal instruction` crash on ARM.
# v6.8.0
- [#2052](https://github.com/xmrig/xmrig/pull/2052) Added DMI/SMBIOS reader.
- Added information about memory modules on the miner startup and for online benchmark.
- Added new HTTP API endpoint: `GET /2/dmi`.
- Added new command line option `--no-dmi` or config option `"dmi"`.
- Added new CMake option `-DWITH_DMI=OFF`.
- [#2057](https://github.com/xmrig/xmrig/pull/2057) Improved MSR subsystem code quality.
- [#2058](https://github.com/xmrig/xmrig/pull/2058) RandomX JIT x86: removed unnecessary instructions.
# v6.7.2
- [#2039](https://github.com/xmrig/xmrig/pull/2039) Fixed solo mining.
# v6.7.1
- [#1995](https://github.com/xmrig/xmrig/issues/1995) Fixed log initialization.
- [#1998](https://github.com/xmrig/xmrig/pull/1998) Added hashrate in the benchmark finished message.
- [#2009](https://github.com/xmrig/xmrig/pull/2009) AstroBWT OpenCL fixes.
- [#2028](https://github.com/xmrig/xmrig/pull/2028) RandomX x86 JIT: removed redundant `CFROUND`.
# v6.7.0 # v6.7.0
- **[#1991](https://github.com/xmrig/xmrig/issues/1991) Added Apple M1 processor support.** - **[#1991](https://github.com/xmrig/xmrig/issues/1991) Added Apple M1 processor support.**
- **[#1986](https://github.com/xmrig/xmrig/pull/1986) Up to 20-30% faster RandomX dataset initialization with AVX2 on some CPUs.** - **[#1986](https://github.com/xmrig/xmrig/pull/1986) Up to 20-30% faster RandomX dataset initialization with AVX2 on some CPUs.**

View File

@@ -1,14 +1,15 @@
cmake_minimum_required(VERSION 2.8.12) cmake_minimum_required(VERSION 3.10)
project(xmrig) project(xmrig)
option(WITH_HWLOC "Enable hwloc support" ON) option(WITH_HWLOC "Enable hwloc support" ON)
option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON) option(WITH_CN_LITE "Enable CryptoNight-Lite algorithms family" ON)
option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON) option(WITH_CN_HEAVY "Enable CryptoNight-Heavy algorithms family" ON)
option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON) option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
option(WITH_CN_FEMTO "Enable CryptoNight-UPX2 algorithm" ON)
option(WITH_RANDOMX "Enable RandomX algorithms family" ON) option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON) option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
option(WITH_KAWPOW "Enable KawPow algorithms family" ON) option(WITH_KAWPOW "Enable KawPow algorithms family" ON)
option(WITH_GHOSTRIDER "Enable GhostRider algorithm" ON)
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON) option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
option(WITH_DEBUG_LOG "Enable debug log output" OFF) option(WITH_DEBUG_LOG "Enable debug log output" OFF)
option(WITH_TLS "Enable OpenSSL support" ON) option(WITH_TLS "Enable OpenSSL support" ON)
@@ -17,6 +18,8 @@ option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON)
option(WITH_ENV_VARS "Enable environment variables support in config file" ON) option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF) option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
option(WITH_OPENCL "Enable OpenCL backend" ON) option(WITH_OPENCL "Enable OpenCL backend" ON)
set(WITH_OPENCL_VERSION 200 CACHE STRING "Target OpenCL version")
set_property(CACHE WITH_OPENCL_VERSION PROPERTY STRINGS 120 200 210 220)
option(WITH_CUDA "Enable CUDA backend" ON) option(WITH_CUDA "Enable CUDA backend" ON)
option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (only if CUDA backend enabled)" ON) option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (only if CUDA backend enabled)" ON)
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON) option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
@@ -24,11 +27,15 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF) option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
option(WITH_PROFILING "Enable profiling for developers" OFF) option(WITH_PROFILING "Enable profiling for developers" OFF)
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON) option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
option(WITH_AVX2 "Enable AVX2 for Blake2" ON)
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON) option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF) option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
option(BUILD_STATIC "Build static binary" OFF) option(BUILD_STATIC "Build static binary" OFF)
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0) option(ARM_V8 "Force ARMv8 (64 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
option(ARM_V7 "Force ARMv7 (32 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
option(HWLOC_DEBUG "Enable hwloc debug helpers and log" OFF) option(HWLOC_DEBUG "Enable hwloc debug helpers and log" OFF)
@@ -54,6 +61,7 @@ set(HEADERS
src/core/config/usage.h src/core/config/usage.h
src/core/Controller.h src/core/Controller.h
src/core/Miner.h src/core/Miner.h
src/core/Taskbar.h
src/net/interfaces/IJobResultListener.h src/net/interfaces/IJobResultListener.h
src/net/JobResult.h src/net/JobResult.h
src/net/JobResults.h src/net/JobResults.h
@@ -102,6 +110,7 @@ set(SOURCES
src/core/config/ConfigTransform.cpp src/core/config/ConfigTransform.cpp
src/core/Controller.cpp src/core/Controller.cpp
src/core/Miner.cpp src/core/Miner.cpp
src/core/Taskbar.cpp
src/net/JobResults.cpp src/net/JobResults.cpp
src/net/Network.cpp src/net/Network.cpp
src/net/strategies/DonateStrategy.cpp src/net/strategies/DonateStrategy.cpp
@@ -122,6 +131,19 @@ set(SOURCES_CRYPTO
src/crypto/common/VirtualMemory.cpp src/crypto/common/VirtualMemory.cpp
) )
if (CMAKE_C_COMPILER_ID MATCHES GNU)
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
endif()
if (WITH_VAES)
add_definitions(-DXMRIG_VAES)
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.h)
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.cpp)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/cn/CryptoNight_x86_vaes.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize -mavx2 -mvaes")
endif()
endif()
if (WITH_HWLOC) if (WITH_HWLOC)
list(APPEND HEADERS_CRYPTO list(APPEND HEADERS_CRYPTO
src/crypto/common/NUMAMemoryPool.h src/crypto/common/NUMAMemoryPool.h
@@ -140,14 +162,16 @@ if (XMRIG_OS_WIN)
src/crypto/common/VirtualMemory_win.cpp src/crypto/common/VirtualMemory_win.cpp
) )
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv) set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv dbghelp)
elseif (XMRIG_OS_APPLE) elseif (XMRIG_OS_APPLE)
list(APPEND SOURCES_OS list(APPEND SOURCES_OS
src/App_unix.cpp src/App_unix.cpp
src/crypto/common/VirtualMemory_unix.cpp src/crypto/common/VirtualMemory_unix.cpp
) )
find_library(IOKIT_LIBRARY IOKit) find_library(IOKIT_LIBRARY IOKit)
set(EXTRA_LIBS ${IOKIT_LIBRARY}) find_library(CORESERVICES_LIBRARY CoreServices)
set(EXTRA_LIBS ${IOKIT_LIBRARY} ${CORESERVICES_LIBRARY})
else() else()
list(APPEND SOURCES_OS list(APPEND SOURCES_OS
src/App_unix.cpp src/App_unix.cpp
@@ -169,15 +193,15 @@ else()
endif() endif()
add_definitions(-DXMRIG_MINER_PROJECT -DXMRIG_JSON_SINGLE_LINE_ARRAY) add_definitions(-DXMRIG_MINER_PROJECT -DXMRIG_JSON_SINGLE_LINE_ARRAY)
add_definitions(-D__STDC_FORMAT_MACROS -DUNICODE) add_definitions(-D__STDC_FORMAT_MACROS -DUNICODE -D_FILE_OFFSET_BITS=64)
find_package(UV REQUIRED) find_package(UV REQUIRED)
include(cmake/flags.cmake) include(cmake/flags.cmake)
include(cmake/randomx.cmake) include(cmake/randomx.cmake)
include(cmake/argon2.cmake) include(cmake/argon2.cmake)
include(cmake/astrobwt.cmake)
include(cmake/kawpow.cmake) include(cmake/kawpow.cmake)
include(cmake/ghostrider.cmake)
include(cmake/OpenSSL.cmake) include(cmake/OpenSSL.cmake)
include(cmake/asm.cmake) include(cmake/asm.cmake)
@@ -193,10 +217,17 @@ if (WITH_CN_PICO)
add_definitions(/DXMRIG_ALGO_CN_PICO) add_definitions(/DXMRIG_ALGO_CN_PICO)
endif() endif()
if (WITH_CN_FEMTO)
add_definitions(/DXMRIG_ALGO_CN_FEMTO)
endif()
if (WITH_EMBEDDED_CONFIG) if (WITH_EMBEDDED_CONFIG)
add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG) add_definitions(/DXMRIG_FEATURE_EMBEDDED_CONFIG)
endif() endif()
include(src/hw/api/api.cmake)
include(src/hw/dmi/dmi.cmake)
include_directories(src) include_directories(src)
include_directories(src/3rdparty) include_directories(src/3rdparty)
include_directories(${UV_INCLUDE_DIR}) include_directories(${UV_INCLUDE_DIR})
@@ -206,16 +237,20 @@ if (WITH_DEBUG_LOG)
endif() endif()
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES}) add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY}) target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY})
if (WIN32) if (WIN32)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) if (NOT ARM_TARGET)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
endif()
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_1M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_1M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_10M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_10M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/pool_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/pool_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>) add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/rtm_ghostrider_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
endif() endif()
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode) if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode)
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} ${CMAKE_PROJECT_NAME}) add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:${CMAKE_PROJECT_NAME}>")
endif() endif()

View File

@@ -7,10 +7,10 @@
[![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers) [![GitHub stars](https://img.shields.io/github/stars/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/stargazers)
[![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network) [![GitHub forks](https://img.shields.io/github/forks/xmrig/xmrig.svg)](https://github.com/xmrig/xmrig/network)
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and AstroBWT unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD. XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
## Mining backends ## Mining backends
- **CPU** (x64/ARMv8) - **CPU** (x86/x64/ARMv7/ARMv8)
- **OpenCL** for AMD GPUs. - **OpenCL** for AMD GPUs.
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda). - **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
@@ -19,7 +19,7 @@ XMRig is a high performance, open source, cross platform RandomX, KawPow, Crypto
* **[Build from source](https://xmrig.com/docs/miner/build)** * **[Build from source](https://xmrig.com/docs/miner/build)**
## Usage ## Usage
The preferred way to configure the miner is the [JSON config file](src/config.json) as it is more flexible and human friendly. The [command line interface](https://xmrig.com/docs/miner/command-line-options) does not cover all features, such as mining profiles for different algorithms. Important options can be changed during runtime without miner restart by editing the config file or executing API calls. The preferred way to configure the miner is the [JSON config file](https://xmrig.com/docs/miner/config) as it is more flexible and human friendly. The [command line interface](https://xmrig.com/docs/miner/command-line-options) does not cover all features, such as mining profiles for different algorithms. Important options can be changed during runtime without miner restart by editing the config file or executing [API](https://xmrig.com/docs/miner/api) calls.
* **[Wizard](https://xmrig.com/wizard)** helps you create initial configuration for the miner. * **[Wizard](https://xmrig.com/wizard)** helps you create initial configuration for the miner.
* **[Workers](http://workers.xmrig.info)** helps manage your miners via HTTP API. * **[Workers](http://workers.xmrig.info)** helps manage your miners via HTTP API.

View File

@@ -1,45 +0,0 @@
if (WITH_ASTROBWT)
add_definitions(/DXMRIG_ALGO_ASTROBWT)
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/AstroBWT.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/AstroBWT.cpp
)
if (XMRIG_ARM)
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h
src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/salsa20_ref/salsa20.c
)
else()
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
add_definitions(/DASTROBWT_AVX2)
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM)
list(APPEND SOURCES_CRYPTO src/crypto/astrobwt/sha3_256_avx2.asm)
else()
enable_language(ASM)
list(APPEND SOURCES_CRYPTO src/crypto/astrobwt/sha3_256_avx2.S)
endif()
endif()
list(APPEND HEADERS_CRYPTO
src/crypto/astrobwt/Salsa20.hpp
)
list(APPEND SOURCES_CRYPTO
src/crypto/astrobwt/Salsa20.cpp
)
endif()
else()
remove_definitions(/DXMRIG_ALGO_ASTROBWT)
endif()

View File

@@ -1,47 +1,72 @@
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
set(XMRIG_64_BIT ON)
add_definitions(-DXMRIG_64_BIT)
else()
set(XMRIG_64_BIT OFF)
endif()
if (NOT CMAKE_SYSTEM_PROCESSOR) if (NOT CMAKE_SYSTEM_PROCESSOR)
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined") message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
endif() endif()
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$" AND CMAKE_SIZEOF_VOID_P EQUAL 8) include(CheckCXXCompilerFlag)
add_definitions(/DRAPIDJSON_SSE2)
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(VAES_SUPPORTED ON)
else()
CHECK_CXX_COMPILER_FLAG("-mavx2 -mvaes" VAES_SUPPORTED)
endif()
if (NOT VAES_SUPPORTED)
set(WITH_VAES OFF)
endif()
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
add_definitions(-DRAPIDJSON_SSE2)
else() else()
set(WITH_SSE4_1 OFF) set(WITH_SSE4_1 OFF)
set(WITH_AVX2 OFF)
set(WITH_VAES OFF)
endif()
add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
if (ARM_V8)
set(ARM_TARGET 8)
elseif (ARM_V7)
set(ARM_TARGET 7)
endif() endif()
if (NOT ARM_TARGET) if (NOT ARM_TARGET)
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$") if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
set(ARM_TARGET 8) set(ARM_TARGET 8)
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l)$") elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
set(ARM_TARGET 7) set(ARM_TARGET 7)
endif() endif()
endif() endif()
if (ARM_TARGET AND ARM_TARGET GREATER 6) if (ARM_TARGET AND ARM_TARGET GREATER 6)
set(XMRIG_ARM ON) set(XMRIG_ARM ON)
add_definitions(/DXMRIG_ARM) add_definitions(-DXMRIG_ARM=${ARM_TARGET})
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})") message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
include(CheckCXXCompilerFlag) if (ARM_TARGET EQUAL 8 AND (CMAKE_CXX_COMPILER_ID MATCHES GNU OR CMAKE_CXX_COMPILER_ID MATCHES Clang))
if (ARM_TARGET EQUAL 8)
set(XMRIG_ARMv8 ON)
add_definitions(/DXMRIG_ARMv8)
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO) CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)
if (XMRIG_ARM_CRYPTO) if (XMRIG_ARM_CRYPTO)
add_definitions(/DXMRIG_ARM_CRYPTO) add_definitions(-DXMRIG_ARM_CRYPTO)
set(ARM8_CXX_FLAGS "-march=armv8-a+crypto") set(ARM8_CXX_FLAGS "-march=armv8-a+crypto")
else() else()
set(ARM8_CXX_FLAGS "-march=armv8-a") set(ARM8_CXX_FLAGS "-march=armv8-a")
endif() endif()
elseif (ARM_TARGET EQUAL 7)
set(XMRIG_ARMv7 ON)
add_definitions(/DXMRIG_ARMv7)
endif() endif()
endif() endif()
if (WITH_SSE4_1) if (WITH_SSE4_1)
add_definitions(/DXMRIG_FEATURE_SSE4_1) add_definitions(-DXMRIG_FEATURE_SSE4_1)
endif()
if (WITH_AVX2)
add_definitions(-DXMRIG_FEATURE_AVX2)
endif() endif()

View File

@@ -10,7 +10,7 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
endif() endif()
if (CMAKE_BUILD_TYPE STREQUAL "Release") if (CMAKE_BUILD_TYPE STREQUAL "Release")
add_definitions(/DNDEBUG) add_definitions(-DNDEBUG)
endif() endif()
include(CheckSymbolExists) include(CheckSymbolExists)
@@ -22,17 +22,17 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-strict-aliasing -Wno-class-memaccess") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-strict-aliasing -Wno-class-memaccess")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s")
if (XMRIG_ARMv8) if (ARM_TARGET EQUAL 8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
elseif (XMRIG_ARMv7) elseif (ARM_TARGET EQUAL 7)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
else() else()
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
add_definitions(/DHAVE_ROTR) add_definitions(-DHAVE_ROTR)
endif() endif()
if (WIN32) if (WIN32)
@@ -49,41 +49,29 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
endif() endif()
add_definitions(/D_GNU_SOURCE) add_definitions(-D_GNU_SOURCE -DHAVE_BUILTIN_CLEAR_CACHE)
if (${CMAKE_VERSION} VERSION_LESS "3.1.0")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
endif()
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC) elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") set(CMAKE_C_FLAGS_RELEASE "/MP /MT /O2 /Oi /DNDEBUG /GL")
set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL") set(CMAKE_CXX_FLAGS_RELEASE "/MP /MT /O2 /Oi /DNDEBUG /GL")
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Ob1 /Zi /DRELWITHDEBINFO") set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Ob1 /Zi /DRELWITHDEBINFO") set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
add_definitions(/D_CRT_SECURE_NO_WARNINGS) add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -DNOMINMAX -DHAVE_ROTR)
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
add_definitions(/DNOMINMAX)
add_definitions(/DHAVE_ROTR)
elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang) elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants") set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -funroll-loops -fmerge-all-constants")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-missing-braces") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti")
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants") set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -funroll-loops -fmerge-all-constants")
if (XMRIG_ARMv8) if (ARM_TARGET EQUAL 8)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS}")
elseif (XMRIG_ARMv7) elseif (ARM_TARGET EQUAL 7)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}") set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}") set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
else() else()
@@ -92,19 +80,18 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR) check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
if (HAVE_ROTR) if (HAVE_ROTR)
add_definitions(/DHAVE_ROTR) add_definitions(-DHAVE_ROTR)
endif() endif()
endif() endif()
if (BUILD_STATIC) if ((WIN32 AND ARM_TARGET) OR BUILD_STATIC)
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static") set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
endif() endif()
endif() endif()
if (NOT WIN32) if (NOT WIN32)
check_symbol_exists("__builtin___clear_cache" "stdlib.h" HAVE_BUILTIN_CLEAR_CACHE) check_symbol_exists("__builtin___clear_cache" "stdlib.h" HAVE_BUILTIN_CLEAR_CACHE)
if (HAVE_BUILTIN_CLEAR_CACHE) if (HAVE_BUILTIN_CLEAR_CACHE)
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE) add_definitions(-DHAVE_BUILTIN_CLEAR_CACHE)
endif() endif()
endif() endif()

8
cmake/ghostrider.cmake Normal file
View File

@@ -0,0 +1,8 @@
if (WITH_GHOSTRIDER)
add_definitions(/DXMRIG_ALGO_GHOSTRIDER)
add_subdirectory(src/crypto/ghostrider)
set(GHOSTRIDER_LIBRARY ghostrider)
else()
remove_definitions(/DXMRIG_ALGO_GHOSTRIDER)
set(GHOSTRIDER_LIBRARY "")
endif()

View File

@@ -15,39 +15,37 @@ else()
set(XMRIG_OS_ANDROID ON) set(XMRIG_OS_ANDROID ON)
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux") elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
set(XMRIG_OS_LINUX ON) set(XMRIG_OS_LINUX ON)
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD) elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
set(XMRIG_OS_FREEBSD ON) set(XMRIG_OS_FREEBSD ON)
endif() endif()
endif() endif()
if (XMRIG_OS_WIN) if (XMRIG_OS_WIN)
add_definitions(/DWIN32) add_definitions(-DWIN32 -DXMRIG_OS_WIN)
add_definitions(/DXMRIG_OS_WIN)
elseif(XMRIG_OS_APPLE) elseif(XMRIG_OS_APPLE)
add_definitions(/DXMRIG_OS_APPLE) add_definitions(-DXMRIG_OS_APPLE)
if (XMRIG_OS_IOS) if (XMRIG_OS_IOS)
add_definitions(/DXMRIG_OS_IOS) add_definitions(-DXMRIG_OS_IOS)
else() else()
add_definitions(/DXMRIG_OS_MACOS) add_definitions(-DXMRIG_OS_MACOS)
endif() endif()
if (XMRIG_ARM) if (XMRIG_ARM)
set(WITH_SECURE_JIT ON) set(WITH_SECURE_JIT ON)
endif() endif()
elseif(XMRIG_OS_UNIX) elseif(XMRIG_OS_UNIX)
add_definitions(/DXMRIG_OS_UNIX) add_definitions(-DXMRIG_OS_UNIX)
if (XMRIG_OS_ANDROID) if (XMRIG_OS_ANDROID)
add_definitions(/DXMRIG_OS_ANDROID) add_definitions(-DXMRIG_OS_ANDROID)
elseif (XMRIG_OS_LINUX) elseif (XMRIG_OS_LINUX)
add_definitions(/DXMRIG_OS_LINUX) add_definitions(-DXMRIG_OS_LINUX)
elseif (XMRIG_OS_FREEBSD) elseif (XMRIG_OS_FREEBSD)
add_definitions(/DXMRIG_OS_FREEBSD) add_definitions(-DXMRIG_OS_FREEBSD)
endif() endif()
endif() endif()
if (WITH_SECURE_JIT) if (WITH_SECURE_JIT)
add_definitions(/DXMRIG_SECURE_JIT) add_definitions(-DXMRIG_SECURE_JIT)
endif() endif()

View File

@@ -1,4 +1,18 @@
if (WITH_RANDOMX) if (WITH_RANDOMX)
include(CheckSymbolExists)
if (WIN32)
check_symbol_exists(_aligned_malloc "stdlib.h" HAVE_ALIGNED_MALLOC)
if (HAVE_ALIGNED_MALLOC)
add_compile_definitions(HAVE_ALIGNED_MALLOC)
endif()
else()
check_symbol_exists(posix_memalign "stdlib.h" HAVE_POSIX_MEMALIGN)
if (HAVE_POSIX_MEMALIGN)
add_compile_definitions(HAVE_POSIX_MEMALIGN)
endif()
endif()
add_definitions(/DXMRIG_ALGO_RANDOMX) add_definitions(/DXMRIG_ALGO_RANDOMX)
set(WITH_ARGON2 ON) set(WITH_ARGON2 ON)
@@ -42,13 +56,13 @@ if (WITH_RANDOMX)
src/crypto/rx/RxVm.cpp src/crypto/rx/RxVm.cpp
) )
if (CMAKE_C_COMPILER_ID MATCHES MSVC) if (WITH_ASM AND CMAKE_C_COMPILER_ID MATCHES MSVC)
enable_language(ASM_MASM) enable_language(ASM_MASM)
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.asm src/crypto/randomx/jit_compiler_x86_static.asm
src/crypto/randomx/jit_compiler_x86.cpp src/crypto/randomx/jit_compiler_x86.cpp
) )
elseif (NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8) elseif (WITH_ASM AND NOT XMRIG_ARM AND CMAKE_SIZEOF_VOID_P EQUAL 8)
list(APPEND SOURCES_CRYPTO list(APPEND SOURCES_CRYPTO
src/crypto/randomx/jit_compiler_x86_static.S src/crypto/randomx/jit_compiler_x86_static.S
src/crypto/randomx/jit_compiler_x86.cpp src/crypto/randomx/jit_compiler_x86.cpp
@@ -76,7 +90,15 @@ if (WITH_RANDOMX)
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c) list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang) if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS -msse4.1) set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS "-Ofast -msse4.1")
endif()
endif()
if (WITH_AVX2)
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/avx2/blake2b_avx2.c)
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
set_source_files_properties(src/crypto/randomx/blake2/avx2/blake2b_avx2.c PROPERTIES COMPILE_FLAGS "-Ofast -mavx2")
endif() endif()
endif() endif()
@@ -100,13 +122,29 @@ if (WITH_RANDOMX)
message("-- WITH_MSR=ON") message("-- WITH_MSR=ON")
if (XMRIG_OS_WIN) if (XMRIG_OS_WIN)
list(APPEND SOURCES_CRYPTO src/crypto/rx/Rx_win.cpp) list(APPEND SOURCES_CRYPTO
src/crypto/rx/RxFix_win.cpp
src/hw/msr/Msr_win.cpp
)
elseif (XMRIG_OS_LINUX) elseif (XMRIG_OS_LINUX)
list(APPEND SOURCES_CRYPTO src/crypto/rx/Rx_linux.cpp) list(APPEND SOURCES_CRYPTO
src/crypto/rx/RxFix_linux.cpp
src/hw/msr/Msr_linux.cpp
)
endif() endif()
list(APPEND HEADERS_CRYPTO src/crypto/rx/msr/MsrItem.h) list(APPEND HEADERS_CRYPTO
list(APPEND SOURCES_CRYPTO src/crypto/rx/msr/MsrItem.cpp) src/crypto/rx/RxFix.h
src/crypto/rx/RxMsr.h
src/hw/msr/Msr.h
src/hw/msr/MsrItem.h
)
list(APPEND SOURCES_CRYPTO
src/crypto/rx/RxMsr.cpp
src/hw/msr/Msr.cpp
src/hw/msr/MsrItem.cpp
)
else() else()
remove_definitions(/DXMRIG_FEATURE_MSR) remove_definitions(/DXMRIG_FEATURE_MSR)
remove_definitions(/DXMRIG_FIX_RYZEN) remove_definitions(/DXMRIG_FIX_RYZEN)

View File

@@ -13,7 +13,6 @@ Option `coin` useful for pools without [algorithm negotiation](https://xmrig.com
| Name | Memory | Version | Description | Notes | | Name | Memory | Version | Description | Notes |
|------|--------|---------|-------------|-------| |------|--------|---------|-------------|-------|
| `kawpow` | - | 6.0.0+ | KawPow (Ravencoin) | GPU only | | `kawpow` | - | 6.0.0+ | KawPow (Ravencoin) | GPU only |
| `rx/keva` | 1 MB | 5.9.0+ | RandomKEVA (RandomX variant for Keva). | |
| `astrobwt` | 20 MB | 5.8.0+ | AstroBWT (Dero). | | | `astrobwt` | 20 MB | 5.8.0+ | AstroBWT (Dero). | |
| `cn-pico/tlo` | 256 KB | 5.5.0+ | CryptoNight-Pico (Talleo). | | | `cn-pico/tlo` | 256 KB | 5.5.0+ | CryptoNight-Pico (Talleo). | |
| `rx/sfx` | 2 MB | 5.4.0+ | RandomSFX (RandomX variant for Safex). | | | `rx/sfx` | 2 MB | 5.4.0+ | RandomSFX (RandomX variant for Safex). | |

View File

@@ -256,7 +256,7 @@
# v2.8.0 # v2.8.0
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).** - **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
- Added global and per thread option `"asm"` and and command line equivalent. - Added global and per thread option `"asm"` and command line equivalent.
- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.** - **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.**
- Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents. - Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents.
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners. - [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.

View File

@@ -1,3 +1,5 @@
**:warning: Recent version of this page https://xmrig.com/docs/miner/config/cpu.**
# CPU backend # CPU backend
All CPU related settings contains in one `cpu` object in config file, CPU backend allow specify multiple profiles and allow switch between them without restrictions by pool request or config change. Default auto-configuration create reasonable minimum of profiles which cover all supported algorithms. All CPU related settings contains in one `cpu` object in config file, CPU backend allow specify multiple profiles and allow switch between them without restrictions by pool request or config change. Default auto-configuration create reasonable minimum of profiles which cover all supported algorithms.
@@ -75,6 +77,35 @@ Each number represent one thread and means CPU affinity, this is default format
``` ```
Internal format, but can be user defined. Internal format, but can be user defined.
## RandomX options
#### `init`
Thread count to initialize RandomX dataset. Auto-detect (`-1`) or any number greater than 0 to use that many threads.
#### `init-avx2`
Use AVX2 for dataset initialization. Faster on some CPUs. Auto-detect (`-1`), disabled (`0`), always enabled on CPUs that support AVX2 (`1`).
#### `mode`
RandomX mining mode: `auto`, `fast` (2 GB memory), `light` (256 MB memory).
#### `1gb-pages`
Use 1GB hugepages for RandomX dataset (Linux only). Enabled (`true`) or disabled (`false`). It gives 1-3% speedup.
#### `wrmsr`
[MSR mod](https://xmrig.com/docs/miner/randomx-optimization-guide/msr). Enabled (`true`) or disabled (`false`). It gives up to 15% speedup depending on your system. _(**Note**: Userspace MSR writes are no longer enabled by default; the flag `msr.allow_writes=on` must be set for Linux Kernels 5.9 and after.)_
#### `rdmsr`
Restore MSR register values to their original values on exit. Used together with `wrmsr`. Enabled (`true`) or disabled (`false`).
#### `cache_qos`
[Cache QoS](https://xmrig.com/docs/miner/randomx-optimization-guide/qos). Enabled (`true`) or disabled (`false`). It's useful when you can't or don't want to mine on all CPU cores to make mining hashrate more stable.
#### `numa`
NUMA support (better hashrate on multi-CPU servers and Ryzen Threadripper 1xxx/2xxx). Enabled (`true`) or disabled (`false`).
#### `scratchpad_prefetch_mode`
Which instruction to use in RandomX loop to prefetch data from scratchpad. `1` is default and fastest in most cases. Can be off (`0`), `prefetcht0` instruction (`1`), `prefetchnta` instruction (`2`, a bit faster on Coffee Lake and a few other CPUs), `mov` instruction (`3`).
## Shared options ## Shared options
#### `enabled` #### `enabled`
@@ -83,23 +114,32 @@ Enable (`true`) or disable (`false`) CPU backend, by default `true`.
#### `huge-pages` #### `huge-pages`
Enable (`true`) or disable (`false`) huge pages support, by default `true`. Enable (`true`) or disable (`false`) huge pages support, by default `true`.
#### `huge-pages-jit`
Enable (`true`) or disable (`false`) huge pages support for RandomX JIT code, by default `false`. It gives a very small boost on Ryzen CPUs, but hashrate is unstable between launches. Use with caution.
#### `hw-aes` #### `hw-aes`
Force enable (`true`) or disable (`false`) hardware AES support. Default value `null` means miner autodetect this feature. Usually don't need change this option, this option useful for some rare cases when miner can't detect hardware AES, but it available. If you force enable this option, but your hardware not support it, miner will crash. Force enable (`true`) or disable (`false`) hardware AES support. Default value `null` means miner autodetect this feature. Usually don't need change this option, this option useful for some rare cases when miner can't detect hardware AES, but it available. If you force enable this option, but your hardware not support it, miner will crash.
#### `priority` #### `priority`
Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all. Mining threads priority, value from `1` (lowest priority) to `5` (highest possible priority). Default value `null` means miner don't change threads priority at all. Setting priority higher than 2 can make your PC unresponsive.
#### `memory-pool` (since v4.3.0)
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm switching. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages. It helps to avoid loosing huge pages for scratchpads when RandomX dataset is updated and mining threads restart after a 2-3 days of mining.
#### `yield` (since v5.1.1)
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.
#### `asm` #### `asm`
Enable/configure or disable ASM optimizations. Possible values: `true`, `false`, `"intel"`, `"ryzen"`, `"bulldozer"`. Enable/configure or disable ASM optimizations. Possible values: `true`, `false`, `"intel"`, `"ryzen"`, `"bulldozer"`.
#### `argon2-impl` (since v3.1.0) #### `argon2-impl` (since v3.1.0)
Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards, if you CPU not support required instuctions, miner will crash. Allow override automatically detected Argon2 implementation, this option added mostly for debug purposes, default value `null` means autodetect. This is used in RandomX dataset initialization and also in some other mining algorithms. Other possible values: `"x86_64"`, `"SSE2"`, `"SSSE3"`, `"XOP"`, `"AVX2"`, `"AVX-512F"`. Manual selection has no safe guards - if your CPU doesn't support required instuctions, miner will crash.
#### `astrobwt-max-size`
AstroBWT algorithm: skip hashes with large stage 2 size, default: `550`, min: `400`, max: `1200`. Optimal value depends on your CPU/GPU
#### `astrobwt-avx2`
AstroBWT algorithm: use AVX2 code. It's faster on some CPUs and slower on other
#### `max-threads-hint` (since v4.2.0) #### `max-threads-hint` (since v4.2.0)
Maximum CPU threads count (in percentage) hint for autoconfig. [CPU_MAX_USAGE.md](CPU_MAX_USAGE.md) Maximum CPU threads count (in percentage) hint for autoconfig. [CPU_MAX_USAGE.md](CPU_MAX_USAGE.md)
#### `memory-pool` (since v4.3.0)
Use continuous, persistent memory block for mining threads, useful for preserve huge pages allocation while algorithm swithing. Possible values `false` (feature disabled, by default) or `true` or specific count of 2 MB huge pages.
#### `yield` (since v5.1.1)
Prefer system better system response/stability `true` (default value) or maximum hashrate `false`.

View File

@@ -1,5 +0,0 @@
6bb1a2e3a0fbca5195be6022f2a9fbff8a353c37c7542e7ab89420cb45b64505 xmrig-5.0.1-gcc-win32.zip
24dba9ec281acfb2ea2c401ebd0e4e2d1f1ee5fd557da5ff3c7049020c1f78b6 xmrig-5.0.1-gcc-win64.zip
86d65c6693ec9e35cd7547329580638b85c9eb0cf8383892a1c15199de5b556f xmrig-5.0.1-msvc-cuda10_1-win64.zip
0fbfe518b1c4b6993b0f66ff01302626375b15620ccf8f64d6fb97845068ffca xmrig-5.0.1-msvc-win64.zip
aa34890738a3494de2fa0e44db346937fea7339852f5f10b5d4655f95e2d8f1f xmrig-5.0.1-xenial-x64.tar.gz

View File

@@ -1,11 +0,0 @@
-----BEGIN PGP SIGNATURE-----
iQEzBAABCgAdFiEEmsTOqOZuNaXHzdwbRGpTY4vpRAkFAl3VcsoACgkQRGpTY4vp
RAm9vQgA1MyTUU2jley2TCYLUzQy2Fffc8fbXYv64r44jbWOjC/6qo2iIlRgPhIc
oVyPKr5TYS3QjDzCEm8IvozS0YudS6soESbPzqDonboK8pd0K4bsML9TQY2feV7A
NL5vln0rfVHp1wxLLrQpfBqAgvJUXEyaHece6gFQN79JOGhEo2bHL2NyrOl+FViS
b2BaMtXq410Fh+XT6ShnOaG/2EuO8ZqSGdCO6A/2LHQw1UY+mZiCvue6P6B06HmB
WD/urOv38V389v+V+Sp4UlEW6VpBOOjvtChoVWtLt+tKzydrnt2EmoWWWg475pka
4G6whHuMWS8CTt5/PDhJpvVXNQTIOw==
=C764
-----END PGP SIGNATURE-----

View File

@@ -1,4 +1,4 @@
@echo off @echo off
cd %~dp0 cd /d "%~dp0"
xmrig.exe --bench=10M --submit xmrig.exe --bench=10M --submit
pause pause

View File

@@ -1,4 +1,4 @@
@echo off @echo off
cd %~dp0 cd /d "%~dp0"
xmrig.exe --bench=1M --submit xmrig.exe --bench=1M --submit
pause pause

View File

@@ -1,6 +1,10 @@
#!/bin/bash -e #!/bin/sh -e
HWLOC_VERSION="2.4.0" HWLOC_VERSION_MAJOR="2"
HWLOC_VERSION_MINOR="12"
HWLOC_VERSION_PATCH="1"
HWLOC_VERSION="${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}.${HWLOC_VERSION_PATCH}"
mkdir -p deps mkdir -p deps
mkdir -p deps/include mkdir -p deps/include
@@ -8,7 +12,7 @@ mkdir -p deps/lib
mkdir -p build && cd build mkdir -p build && cd build
wget https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz wget https://download.open-mpi.org/release/hwloc/v${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
tar -xzf hwloc-${HWLOC_VERSION}.tar.gz tar -xzf hwloc-${HWLOC_VERSION}.tar.gz
cd hwloc-${HWLOC_VERSION} cd hwloc-${HWLOC_VERSION}
@@ -16,4 +20,4 @@ cd hwloc-${HWLOC_VERSION}
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu) make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
cp -fr include ../../deps cp -fr include ../../deps
cp hwloc/.libs/libhwloc.a ../../deps/lib cp hwloc/.libs/libhwloc.a ../../deps/lib
cd .. cd ..

View File

@@ -1,4 +1,4 @@
#!/bin/bash -e #!/bin/sh -e
HWLOC_VERSION="1.11.13" HWLOC_VERSION="1.11.13"

View File

@@ -1,6 +1,6 @@
#!/bin/bash -e #!/bin/sh -e
LIBRESSL_VERSION="3.0.2" LIBRESSL_VERSION="3.5.2"
mkdir -p deps mkdir -p deps
mkdir -p deps/include mkdir -p deps/include
@@ -17,4 +17,4 @@ make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
cp -fr include ../../deps cp -fr include ../../deps
cp crypto/.libs/libcrypto.a ../../deps/lib cp crypto/.libs/libcrypto.a ../../deps/lib
cp ssl/.libs/libssl.a ../../deps/lib cp ssl/.libs/libssl.a ../../deps/lib
cd .. cd ..

View File

@@ -1,6 +1,6 @@
#!/bin/bash -e #!/bin/sh -e
OPENSSL_VERSION="1.1.1i" OPENSSL_VERSION="1.1.1u"
mkdir -p deps mkdir -p deps
mkdir -p deps/include mkdir -p deps/include
@@ -8,7 +8,7 @@ mkdir -p deps/lib
mkdir -p build && cd build mkdir -p build && cd build
wget https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz wget https://openssl.org/source/old/1.1.1/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
tar -xzf openssl-${OPENSSL_VERSION}.tar.gz tar -xzf openssl-${OPENSSL_VERSION}.tar.gz
cd openssl-${OPENSSL_VERSION} cd openssl-${OPENSSL_VERSION}
@@ -17,4 +17,4 @@ make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
cp -fr include ../../deps cp -fr include ../../deps
cp libcrypto.a ../../deps/lib cp libcrypto.a ../../deps/lib
cp libssl.a ../../deps/lib cp libssl.a ../../deps/lib
cd .. cd ..

20
scripts/build.openssl3.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/sh -e
OPENSSL_VERSION="3.0.16"
mkdir -p deps
mkdir -p deps/include
mkdir -p deps/lib
mkdir -p build && cd build
wget https://github.com/openssl/openssl/releases/download/openssl-${OPENSSL_VERSION}/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
tar -xzf openssl-${OPENSSL_VERSION}.tar.gz
cd openssl-${OPENSSL_VERSION}
./config -no-shared -no-asm -no-zlib -no-comp -no-dgram -no-filenames -no-cms
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
cp -fr include ../../deps
cp libcrypto.a ../../deps/lib
cp libssl.a ../../deps/lib
cd ..

View File

@@ -1,6 +1,6 @@
#!/bin/bash -e #!/bin/sh -e
UV_VERSION="1.40.0" UV_VERSION="1.51.0"
mkdir -p deps mkdir -p deps
mkdir -p deps/include mkdir -p deps/include
@@ -8,13 +8,13 @@ mkdir -p deps/lib
mkdir -p build && cd build mkdir -p build && cd build
wget https://github.com/libuv/libuv/archive/v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz wget https://dist.libuv.org/dist/v${UV_VERSION}/libuv-v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz
tar -xzf v${UV_VERSION}.tar.gz tar -xzf v${UV_VERSION}.tar.gz
cd libuv-${UV_VERSION} cd libuv-v${UV_VERSION}
sh autogen.sh sh autogen.sh
./configure --disable-shared ./configure --disable-shared
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu) make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
cp -fr include ../../deps cp -fr include ../../deps
cp .libs/libuv.a ../../deps/lib cp .libs/libuv.a ../../deps/lib
cd .. cd ..

View File

@@ -1,5 +1,5 @@
#!/bin/bash -e #!/bin/sh -e
./build.uv.sh ./build.uv.sh
./build.hwloc.sh ./build.hwloc.sh
./build.openssl.sh ./build.openssl3.sh

View File

@@ -1,4 +1,4 @@
#!/bin/bash -e #!/bin/sh -e
# https://xmrig.com/docs/miner/hugepages#onegb-huge-pages # https://xmrig.com/docs/miner/hugepages#onegb-huge-pages

View File

@@ -6,7 +6,6 @@ const fs = require('fs');
const path = require('path'); const path = require('path');
const { text2h, text2h_bundle, addIncludes } = require('./js/opencl'); const { text2h, text2h_bundle, addIncludes } = require('./js/opencl');
const { opencl_minify } = require('./js/opencl_minify'); const { opencl_minify } = require('./js/opencl_minify');
const cwd = process.cwd();
function cn() function cn()
@@ -50,7 +49,7 @@ function rx()
'randomx_constants_monero.h', 'randomx_constants_monero.h',
'randomx_constants_wow.h', 'randomx_constants_wow.h',
'randomx_constants_arqma.h', 'randomx_constants_arqma.h',
'randomx_constants_keva.h', 'randomx_constants_graft.h',
'aes.cl', 'aes.cl',
'blake2b.cl', 'blake2b.cl',
'randomx_vm.cl', 'randomx_vm.cl',
@@ -66,15 +65,6 @@ function rx()
} }
function astrobwt()
{
const astrobwt = opencl_minify(addIncludes('astrobwt.cl', [ 'BWT.cl', 'salsa20.cl', 'sha3.cl' ]));
// fs.writeFileSync('astrobwt_gen.cl', astrobwt);
fs.writeFileSync('astrobwt_cl.h', text2h(astrobwt, 'xmrig', 'astrobwt_cl'));
}
function kawpow() function kawpow()
{ {
const kawpow = opencl_minify(addIncludes('kawpow.cl', [ 'defs.h' ])); const kawpow = opencl_minify(addIncludes('kawpow.cl', [ 'defs.h' ]));
@@ -85,23 +75,24 @@ function kawpow()
fs.writeFileSync('kawpow_dag_cl.h', text2h(kawpow_dag, 'xmrig', 'kawpow_dag_cl')); fs.writeFileSync('kawpow_dag_cl.h', text2h(kawpow_dag, 'xmrig', 'kawpow_dag_cl'));
} }
for (let i = 0; i < 2; i++) {
if (fs.existsSync('src/backend/opencl/cl/OclSource.h')) {
break;
}
process.chdir(path.resolve('src/backend/opencl/cl/cn')); process.chdir('..');
}
process.chdir(path.resolve('src/backend/opencl/cl'));
const cwd = process.cwd();
process.chdir(path.resolve(cwd, 'cn'));
cn(); cn();
cn_r(); cn_r();
process.chdir(cwd); process.chdir(path.resolve(cwd, 'rx'));
process.chdir(path.resolve('src/backend/opencl/cl/rx'));
rx(); rx();
process.chdir(cwd); process.chdir(path.resolve(cwd, 'kawpow'));
process.chdir(path.resolve('src/backend/opencl/cl/astrobwt'));
astrobwt();
process.chdir(cwd);
process.chdir(path.resolve('src/backend/opencl/cl/kawpow'));
kawpow(); kawpow();

View File

@@ -15,6 +15,6 @@
:: Choose pools outside of top 5 to help Monero network be more decentralized! :: Choose pools outside of top 5 to help Monero network be more decentralized!
:: Smaller pools also often have smaller fees/payout limits. :: Smaller pools also often have smaller fees/payout limits.
cd %~dp0 cd /d "%~dp0"
xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
pause pause

View File

@@ -1,28 +1,52 @@
#!/bin/bash #!/bin/sh -e
modprobe msr MSR_FILE=/sys/module/msr/parameters/allow_writes
if cat /proc/cpuinfo | grep "AMD Ryzen" > /dev/null; if test -e "$MSR_FILE"; then
echo on > $MSR_FILE
else
modprobe msr allow_writes=on
fi
if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
then then
if cat /proc/cpuinfo | grep "cpu family[[:space:]]:[[:space:]]25" > /dev/null; if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
then then
echo "Detected Ryzen (Zen3)" if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
wrmsr -a 0xc0011020 0x4480000000000 then
wrmsr -a 0xc0011021 0x1c000200000040 echo "Detected Zen4 CPU"
wrmsr -a 0xc0011022 0xc000000401500000 wrmsr -a 0xc0011020 0x4400000000000
wrmsr -a 0xc001102b 0x2000cc14 wrmsr -a 0xc0011021 0x4000000000040
echo "MSR register values for Ryzen (Zen3) applied" wrmsr -a 0xc0011022 0x8680000401570000
wrmsr -a 0xc001102b 0x2040cc10
echo "MSR register values for Zen4 applied"
else
echo "Detected Zen3 CPU"
wrmsr -a 0xc0011020 0x4480000000000
wrmsr -a 0xc0011021 0x1c000200000040
wrmsr -a 0xc0011022 0xc000000401570000
wrmsr -a 0xc001102b 0x2000cc10
echo "MSR register values for Zen3 applied"
fi
elif grep "cpu family[[:space:]]\{1,\}:[[:space:]]26" /proc/cpuinfo > /dev/null;
then
echo "Detected Zen5 CPU"
wrmsr -a 0xc0011020 0x4400000000000
wrmsr -a 0xc0011021 0x4000000000040
wrmsr -a 0xc0011022 0x8680000401570000
wrmsr -a 0xc001102b 0x2040cc10
echo "MSR register values for Zen5 applied"
else else
echo "Detected Ryzen (Zen1/Zen2)" echo "Detected Zen1/Zen2 CPU"
wrmsr -a 0xc0011020 0 wrmsr -a 0xc0011020 0
wrmsr -a 0xc0011021 0x40 wrmsr -a 0xc0011021 0x40
wrmsr -a 0xc0011022 0x1510000 wrmsr -a 0xc0011022 0x1510000
wrmsr -a 0xc001102b 0x2000cc16 wrmsr -a 0xc001102b 0x2000cc16
echo "MSR register values for Ryzen (Zen1/Zen2) applied" echo "MSR register values for Zen1/Zen2 applied"
fi fi
elif cat /proc/cpuinfo | grep "Intel" > /dev/null; elif grep "Intel" /proc/cpuinfo > /dev/null;
then then
echo "Detected Intel" echo "Detected Intel CPU"
wrmsr -a 0x1a4 0xf wrmsr -a 0x1a4 0xf
echo "MSR register values for Intel applied" echo "MSR register values for Intel applied"
else else

View File

@@ -0,0 +1,23 @@
:: Example batch file for mining Raptoreum at a pool
::
:: Format:
:: xmrig.exe -a gr -o <pool address>:<pool port> -u <pool username/wallet> -p <pool password>
::
:: Fields:
:: pool address The host name of the pool stratum or its IP address, for example raptoreumemporium.com
:: pool port The port of the pool's stratum to connect to, for example 3333. Check your pool's getting started page.
:: pool username/wallet For most pools, this is the wallet address you want to mine to. Some pools require a username
:: pool password For most pools this can be just 'x'. For pools using usernames, you may need to provide a password as configured on the pool.
::
:: List of Raptoreum mining pools:
:: https://miningpoolstats.stream/raptoreum
::
:: Choose pools outside of top 5 to help Raptoreum network be more decentralized!
:: Smaller pools also often have smaller fees/payout limits.
cd /d "%~dp0"
:: Use this command line to connect to non-SSL port
xmrig.exe -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS -p x
:: Or use this command line to connect to an SSL port
:: xmrig.exe -a gr -o rtm.suprnova.cc:4273 --tls -u WALLET_ADDRESS -p x
pause

View File

@@ -11,6 +11,6 @@
:: Mining solo is the best way to help Monero network be more decentralized! :: Mining solo is the best way to help Monero network be more decentralized!
:: But you will only get a payout when you find a block which can take more than a year for a single low-end PC. :: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.
cd %~dp0 cd /d "%~dp0"
xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
pause pause

View File

@@ -44,7 +44,7 @@ extern "C" {
typedef cl_uint cl_dx9_media_adapter_type_khr; typedef cl_uint cl_dx9_media_adapter_type_khr;
typedef cl_uint cl_dx9_media_adapter_set_khr; typedef cl_uint cl_dx9_media_adapter_set_khr;
#if defined(_WIN32) #if defined(_WIN32)
#include <d3d9.h> #include <d3d9.h>
typedef struct _cl_dx9_surface_info_khr typedef struct _cl_dx9_surface_info_khr
@@ -105,7 +105,7 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
cl_mem_flags flags, cl_mem_flags flags,
cl_dx9_media_adapter_type_khr adapter_type, cl_dx9_media_adapter_type_khr adapter_type,
void * surface_info, void * surface_info,
cl_uint plane, cl_uint plane,
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2; cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)( typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(

View File

@@ -35,7 +35,7 @@ extern "C" {
#include <CL/cl_gl.h> #include <CL/cl_gl.h>
/* /*
* cl_khr_gl_event extension * cl_khr_gl_event extension
*/ */
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D #define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D

View File

@@ -1471,7 +1471,7 @@ typedef enum _ADLProfilePropertyType
#define ADL_HDR_FREESYNC_HDR 0x0004 ///< FreeSync HDR supported #define ADL_HDR_FREESYNC_HDR 0x0004 ///< FreeSync HDR supported
/// @} /// @}
/// \defgroup define_FreesyncFlags ADLDDCInfo2 Freesync HDR flags /// \defgroup define_FreesyncFlags ADLDDCInfo2 Freesync HDR flags
/// @{ /// @{
/// defines for iFreesyncFlags in ADLDDCInfo2 /// defines for iFreesyncFlags in ADLDDCInfo2
#define ADL_HDR_FREESYNC_BACKLIGHT_SUPPORT 0x0001 ///< Global backlight control supported #define ADL_HDR_FREESYNC_BACKLIGHT_SUPPORT 0x0001 ///< Global backlight control supported
@@ -1738,7 +1738,7 @@ enum ADLODNDPMMaskType
ADL_ODN_DPM_MASK = 1 << 2, ADL_ODN_DPM_MASK = 1 << 2,
}; };
//ODN features Bits for ADLODNCapabilitiesX2 //ODN features Bits for ADLODNCapabilitiesX2
enum ADLODNFeatureControl enum ADLODNFeatureControl
{ {
ADL_ODN_SCLK_DPM = 1 << 0, ADL_ODN_SCLK_DPM = 1 << 0,
@@ -1764,7 +1764,7 @@ enum ADLODNFeatureControl
//If any new feature is added, PPLIB only needs to add ext feature ID and Item ID(Seeting ID). These IDs should match the drive defined in CWDDEPM.h //If any new feature is added, PPLIB only needs to add ext feature ID and Item ID(Seeting ID). These IDs should match the drive defined in CWDDEPM.h
enum ADLODNExtFeatureControl enum ADLODNExtFeatureControl
{ {
ADL_ODN_EXT_FEATURE_MEMORY_TIMING_TUNE = 1 << 0, ADL_ODN_EXT_FEATURE_MEMORY_TIMING_TUNE = 1 << 0,
ADL_ODN_EXT_FEATURE_FAN_ZERO_RPM_CONTROL = 1 << 1, ADL_ODN_EXT_FEATURE_FAN_ZERO_RPM_CONTROL = 1 << 1,
ADL_ODN_EXT_FEATURE_AUTO_UV_ENGINE = 1 << 2, //Auto under voltage ADL_ODN_EXT_FEATURE_AUTO_UV_ENGINE = 1 << 2, //Auto under voltage
@@ -1794,7 +1794,7 @@ enum ADLODNExtSettingId
ADL_ODN_PARAMETER_FAN_CURVE_SPEED_5, ADL_ODN_PARAMETER_FAN_CURVE_SPEED_5,
ADL_ODN_POWERGAUGE, ADL_ODN_POWERGAUGE,
ODN_COUNT ODN_COUNT
} ; } ;
//OD8 Capability features bits //OD8 Capability features bits
@@ -1811,7 +1811,7 @@ enum ADLOD8FeatureControl
ADL_OD8_MEMORY_TIMING_TUNE = 1 << 8, ADL_OD8_MEMORY_TIMING_TUNE = 1 << 8,
ADL_OD8_FAN_ZERO_RPM_CONTROL = 1 << 9 , ADL_OD8_FAN_ZERO_RPM_CONTROL = 1 << 9 ,
ADL_OD8_AUTO_UV_ENGINE = 1 << 10, //Auto under voltage ADL_OD8_AUTO_UV_ENGINE = 1 << 10, //Auto under voltage
ADL_OD8_AUTO_OC_ENGINE = 1 << 11, //Auto overclock engine ADL_OD8_AUTO_OC_ENGINE = 1 << 11, //Auto overclock engine
ADL_OD8_AUTO_OC_MEMORY = 1 << 12, //Auto overclock memory ADL_OD8_AUTO_OC_MEMORY = 1 << 12, //Auto overclock memory
ADL_OD8_FAN_CURVE = 1 << 13, //Fan curve ADL_OD8_FAN_CURVE = 1 << 13, //Fan curve
ADL_OD8_WS_AUTO_FAN_ACOUSTIC_LIMIT = 1 << 14, //Workstation Manual Fan controller ADL_OD8_WS_AUTO_FAN_ACOUSTIC_LIMIT = 1 << 14, //Workstation Manual Fan controller
@@ -1888,7 +1888,7 @@ typedef enum _ADLSensorType
PMLOG_TEMPERATURE_VRSOC = 24, PMLOG_TEMPERATURE_VRSOC = 24,
PMLOG_TEMPERATURE_VRMVDD0 = 25, PMLOG_TEMPERATURE_VRMVDD0 = 25,
PMLOG_TEMPERATURE_VRMVDD1 = 26, PMLOG_TEMPERATURE_VRMVDD1 = 26,
PMLOG_TEMPERATURE_HOTSPOT = 27, PMLOG_TEMPERATURE_HOTSPOT = 27,
PMLOG_TEMPERATURE_GFX = 28, PMLOG_TEMPERATURE_GFX = 28,
PMLOG_TEMPERATURE_SOC = 29, PMLOG_TEMPERATURE_SOC = 29,
PMLOG_GFX_POWER = 30, PMLOG_GFX_POWER = 30,

View File

@@ -37,7 +37,7 @@
#define __stdcall #define __stdcall
#endif /* (LINUX) */ #endif /* (LINUX) */
/// Memory Allocation Call back /// Memory Allocation Call back
typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int ); typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );

View File

@@ -1753,7 +1753,7 @@ typedef struct ADLPXConfigCaps
///\brief Enum containing PX or HG type ///\brief Enum containing PX or HG type
/// ///
/// This enum is used to get PX or hG type /// This enum is used to get PX or hG type
/// ///
/// \nosubgrouping /// \nosubgrouping
////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////
enum ADLPxType enum ADLPxType

View File

@@ -1,4 +1,4 @@
cmake_minimum_required(VERSION 2.8.12) cmake_minimum_required(VERSION 3.10)
project(argon2 C) project(argon2 C)
set(CMAKE_C_STANDARD 99) set(CMAKE_C_STANDARD 99)

25
src/3rdparty/epee/LICENSE.txt vendored Normal file
View File

@@ -0,0 +1,25 @@
Copyright (c) 2006-2013, Andrey N. Sabelnikov, www.sabelnikov.net
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
* Neither the name of the Andrey N. Sabelnikov nor the
names of its contributors may be used to endorse or promote products
derived from this software without specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL Andrey N. Sabelnikov BE LIABLE FOR ANY
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

1
src/3rdparty/epee/README.md vendored Normal file
View File

@@ -0,0 +1 @@
epee - is a small library of helpers, wrappers, tools and so on, used to make my life easier.

176
src/3rdparty/epee/span.h vendored Normal file
View File

@@ -0,0 +1,176 @@
// Copyright (c) 2017-2020, The Monero Project
//
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without modification, are
// permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this list of
// conditions and the following disclaimer.
//
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
// of conditions and the following disclaimer in the documentation and/or other
// materials provided with the distribution.
//
// 3. Neither the name of the copyright holder nor the names of its contributors may be
// used to endorse or promote products derived from this software without specific
// prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once
#include <algorithm>
#include <cstdint>
#include <memory>
#include <string>
#include <type_traits>
namespace epee
{
/*!
\brief Non-owning sequence of data. Does not deep copy
Inspired by `gsl::span` and/or `boost::iterator_range`. This class is
intended to be used as a parameter type for functions that need to take a
writable or read-only sequence of data. Most common cases are `span<char>`
and `span<std::uint8_t>`. Using as a class member is only recommended if
clearly documented as not doing a deep-copy. C-arrays are easily convertible
to this type.
\note Conversion from C string literal to `span<const char>` will include
the NULL-terminator.
\note Never allows derived-to-base pointer conversion; an array of derived
types is not an array of base types.
*/
template<typename T>
class span
{
template<typename U>
static constexpr bool safe_conversion() noexcept
{
// Allow exact matches or `T*` -> `const T*`.
using with_const = typename std::add_const<U>::type;
return std::is_same<T, U>() ||
(std::is_const<T>() && std::is_same<T, with_const>());
}
public:
using value_type = T;
using size_type = std::size_t;
using difference_type = std::ptrdiff_t;
using pointer = T*;
using const_pointer = const T*;
using reference = T&;
using const_reference = const T&;
using iterator = pointer;
using const_iterator = const_pointer;
constexpr span() noexcept : ptr(nullptr), len(0) {}
constexpr span(std::nullptr_t) noexcept : span() {}
//! Prevent derived-to-base conversions; invalid in this context.
template<typename U, typename = typename std::enable_if<safe_conversion<U>()>::type>
constexpr span(U* const src_ptr, const std::size_t count) noexcept
: ptr(src_ptr), len(count) {}
//! Conversion from C-array. Prevents common bugs with sizeof + arrays.
template<std::size_t N>
constexpr span(T (&src)[N]) noexcept : span(src, N) {}
constexpr span(const span&) noexcept = default;
span& operator=(const span&) noexcept = default;
/*! Try to remove `amount` elements from beginning of span.
\return Number of elements removed. */
std::size_t remove_prefix(std::size_t amount) noexcept
{
amount = std::min(len, amount);
ptr += amount;
len -= amount;
return amount;
}
constexpr iterator begin() const noexcept { return ptr; }
constexpr const_iterator cbegin() const noexcept { return ptr; }
constexpr iterator end() const noexcept { return begin() + size(); }
constexpr const_iterator cend() const noexcept { return cbegin() + size(); }
constexpr bool empty() const noexcept { return size() == 0; }
constexpr pointer data() const noexcept { return ptr; }
constexpr std::size_t size() const noexcept { return len; }
constexpr std::size_t size_bytes() const noexcept { return size() * sizeof(value_type); }
T &operator[](size_t idx) noexcept { return ptr[idx]; }
const T &operator[](size_t idx) const noexcept { return ptr[idx]; }
private:
T* ptr;
std::size_t len;
};
//! \return `span<const T::value_type>` from a STL compatible `src`.
template<typename T>
constexpr span<const typename T::value_type> to_span(const T& src)
{
// compiler provides diagnostic if size() is not size_t.
return {src.data(), src.size()};
}
//! \return `span<T::value_type>` from a STL compatible `src`.
template<typename T>
constexpr span<typename T::value_type> to_mut_span(T& src)
{
// compiler provides diagnostic if size() is not size_t.
return {src.data(), src.size()};
}
template<typename T>
constexpr bool has_padding() noexcept
{
return !std::is_standard_layout<T>() || alignof(T) != 1;
}
//! \return Cast data from `src` as `span<const std::uint8_t>`.
template<typename T>
span<const std::uint8_t> to_byte_span(const span<const T> src) noexcept
{
static_assert(!has_padding<T>(), "source type may have padding");
return {reinterpret_cast<const std::uint8_t*>(src.data()), src.size_bytes()};
}
//! \return `span<const std::uint8_t>` which represents the bytes at `&src`.
template<typename T>
span<const std::uint8_t> as_byte_span(const T& src) noexcept
{
static_assert(!std::is_empty<T>(), "empty types will not work -> sizeof == 1");
static_assert(!has_padding<T>(), "source type may have padding");
return {reinterpret_cast<const std::uint8_t*>(std::addressof(src)), sizeof(T)};
}
//! \return `span<std::uint8_t>` which represents the bytes at `&src`.
template<typename T>
span<std::uint8_t> as_mut_byte_span(T& src) noexcept
{
static_assert(!std::is_empty<T>(), "empty types will not work -> sizeof == 1");
static_assert(!has_padding<T>(), "source type may have padding");
return {reinterpret_cast<std::uint8_t*>(std::addressof(src)), sizeof(T)};
}
//! make a span from a std::string
template<typename T>
span<const T> strspan(const std::string &s) noexcept
{
static_assert(std::is_same<T, char>() || std::is_same<T, unsigned char>() || std::is_same<T, int8_t>() || std::is_same<T, uint8_t>(), "Unexpected type");
return {reinterpret_cast<const T*>(s.data()), s.size()};
}
}

View File

@@ -81,7 +81,7 @@ Examples
.. code:: c++ .. code:: c++
#include <fmt/core.h> #include <fmt/core.h>
int main() { int main() {
fmt::print("Hello, world!\n"); fmt::print("Hello, world!\n");
} }
@@ -293,11 +293,11 @@ Projects using this library
An open-source library for mathematical programming An open-source library for mathematical programming
* `Aseprite <https://github.com/aseprite/aseprite>`_: * `Aseprite <https://github.com/aseprite/aseprite>`_:
Animated sprite editor & pixel art tool Animated sprite editor & pixel art tool
* `AvioBook <https://www.aviobook.aero/en>`_: A comprehensive aircraft * `AvioBook <https://www.aviobook.aero/en>`_: A comprehensive aircraft
operations suite operations suite
* `Celestia <https://celestia.space/>`_: Real-time 3D visualization of space * `Celestia <https://celestia.space/>`_: Real-time 3D visualization of space
* `Ceph <https://ceph.com/>`_: A scalable distributed storage system * `Ceph <https://ceph.com/>`_: A scalable distributed storage system
@@ -351,7 +351,7 @@ Projects using this library
* `quasardb <https://www.quasardb.net/>`_: A distributed, high-performance, * `quasardb <https://www.quasardb.net/>`_: A distributed, high-performance,
associative database associative database
* `Quill <https://github.com/odygrd/quill>`_: Asynchronous low-latency logging library * `Quill <https://github.com/odygrd/quill>`_: Asynchronous low-latency logging library
* `QKW <https://github.com/ravijanjam/qkw>`_: Generalizing aliasing to simplify * `QKW <https://github.com/ravijanjam/qkw>`_: Generalizing aliasing to simplify

View File

@@ -3,9 +3,9 @@
* DISCLAIMER * DISCLAIMER
* This file is part of the mingw-w64 runtime package. * This file is part of the mingw-w64 runtime package.
* *
* The mingw-w64 runtime package and its code is distributed in the hope that it * The mingw-w64 runtime package and its code is distributed in the hope that it
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR * will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to * IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. * warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
*/ */
/* /*
@@ -109,11 +109,7 @@ char *optarg; /* argument associated with option */
extern char __declspec(dllimport) *__progname; extern char __declspec(dllimport) *__progname;
#endif #endif
#ifdef __CYGWIN__
static char EMSG[] = ""; static char EMSG[] = "";
#else
#define EMSG ""
#endif
static int getopt_internal(int, char * const *, const char *, static int getopt_internal(int, char * const *, const char *,
const struct option *, int *, int); const struct option *, int *, int);

View File

@@ -1,68 +0,0 @@
# Authors ordered by first contribution.
Ryan Dahl <ry@tinyclouds.org>
Jeremy Hinegardner <jeremy@hinegardner.org>
Sergey Shepelev <temotor@gmail.com>
Joe Damato <ice799@gmail.com>
tomika <tomika_nospam@freemail.hu>
Phoenix Sol <phoenix@burninglabs.com>
Cliff Frey <cliff@meraki.com>
Ewen Cheslack-Postava <ewencp@cs.stanford.edu>
Santiago Gala <sgala@apache.org>
Tim Becker <tim.becker@syngenio.de>
Jeff Terrace <jterrace@gmail.com>
Ben Noordhuis <info@bnoordhuis.nl>
Nathan Rajlich <nathan@tootallnate.net>
Mark Nottingham <mnot@mnot.net>
Aman Gupta <aman@tmm1.net>
Tim Becker <tim.becker@kuriositaet.de>
Sean Cunningham <sean.cunningham@mandiant.com>
Peter Griess <pg@std.in>
Salman Haq <salman.haq@asti-usa.com>
Cliff Frey <clifffrey@gmail.com>
Jon Kolb <jon@b0g.us>
Fouad Mardini <f.mardini@gmail.com>
Paul Querna <pquerna@apache.org>
Felix Geisendörfer <felix@debuggable.com>
koichik <koichik@improvement.jp>
Andre Caron <andre.l.caron@gmail.com>
Ivo Raisr <ivosh@ivosh.net>
James McLaughlin <jamie@lacewing-project.org>
David Gwynne <loki@animata.net>
Thomas LE ROUX <thomas@november-eleven.fr>
Randy Rizun <rrizun@ortivawireless.com>
Andre Louis Caron <andre.louis.caron@usherbrooke.ca>
Simon Zimmermann <simonz05@gmail.com>
Erik Dubbelboer <erik@dubbelboer.com>
Martell Malone <martellmalone@gmail.com>
Bertrand Paquet <bpaquet@octo.com>
BogDan Vatra <bogdan@kde.org>
Peter Faiman <peter@thepicard.org>
Corey Richardson <corey@octayn.net>
Tóth Tamás <tomika_nospam@freemail.hu>
Cam Swords <cam.swords@gmail.com>
Chris Dickinson <christopher.s.dickinson@gmail.com>
Uli Köhler <ukoehler@btronik.de>
Charlie Somerville <charlie@charliesomerville.com>
Patrik Stutz <patrik.stutz@gmail.com>
Fedor Indutny <fedor.indutny@gmail.com>
runner <runner.mei@gmail.com>
Alexis Campailla <alexis@janeasystems.com>
David Wragg <david@wragg.org>
Vinnie Falco <vinnie.falco@gmail.com>
Alex Butum <alexbutum@linux.com>
Rex Feng <rexfeng@gmail.com>
Alex Kocharin <alex@kocharin.ru>
Mark Koopman <markmontymark@yahoo.com>
Helge Heß <me@helgehess.eu>
Alexis La Goutte <alexis.lagoutte@gmail.com>
George Miroshnykov <george.miroshnykov@gmail.com>
Maciej Małecki <me@mmalecki.com>
Marc O'Morain <github.com@marcomorain.com>
Jeff Pinner <jpinner@twitter.com>
Timothy J Fontaine <tjfontaine@gmail.com>
Akagi201 <akagi201@gmail.com>
Romain Giraud <giraud.romain@gmail.com>
Jay Satiro <raysatiro@yahoo.com>
Arne Steen <Arne.Steen@gmx.de>
Kjell Schubert <kjell.schubert@gmail.com>
Olivier Mengué <dolmen@cpan.org>

View File

@@ -1,19 +0,0 @@
Copyright Joyent, Inc. and other Node contributors.
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to
deal in the Software without restriction, including without limitation the
rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
sell copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
IN THE SOFTWARE.

View File

@@ -1,246 +0,0 @@
HTTP Parser
===========
[![Build Status](https://api.travis-ci.org/nodejs/http-parser.svg?branch=master)](https://travis-ci.org/nodejs/http-parser)
This is a parser for HTTP messages written in C. It parses both requests and
responses. The parser is designed to be used in performance HTTP
applications. It does not make any syscalls nor allocations, it does not
buffer data, it can be interrupted at anytime. Depending on your
architecture, it only requires about 40 bytes of data per message
stream (in a web server that is per connection).
Features:
* No dependencies
* Handles persistent streams (keep-alive).
* Decodes chunked encoding.
* Upgrade support
* Defends against buffer overflow attacks.
The parser extracts the following information from HTTP messages:
* Header fields and values
* Content-Length
* Request method
* Response status code
* Transfer-Encoding
* HTTP version
* Request URL
* Message body
Usage
-----
One `http_parser` object is used per TCP connection. Initialize the struct
using `http_parser_init()` and set the callbacks. That might look something
like this for a request parser:
```c
http_parser_settings settings;
settings.on_url = my_url_callback;
settings.on_header_field = my_header_field_callback;
/* ... */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST);
parser->data = my_socket;
```
When data is received on the socket execute the parser and check for errors.
```c
size_t len = 80*1024, nparsed;
char buf[len];
ssize_t recved;
recved = recv(fd, buf, len, 0);
if (recved < 0) {
/* Handle error. */
}
/* Start up / continue the parser.
* Note we pass recved==0 to signal that EOF has been received.
*/
nparsed = http_parser_execute(parser, &settings, buf, recved);
if (parser->upgrade) {
/* handle new protocol */
} else if (nparsed != recved) {
/* Handle error. Usually just close the connection. */
}
```
`http_parser` needs to know where the end of the stream is. For example, sometimes
servers send responses without Content-Length and expect the client to
consume input (for the body) until EOF. To tell `http_parser` about EOF, give
`0` as the fourth parameter to `http_parser_execute()`. Callbacks and errors
can still be encountered during an EOF, so one must still be prepared
to receive them.
Scalar valued message information such as `status_code`, `method`, and the
HTTP version are stored in the parser structure. This data is only
temporally stored in `http_parser` and gets reset on each new message. If
this information is needed later, copy it out of the structure during the
`headers_complete` callback.
The parser decodes the transfer-encoding for both requests and responses
transparently. That is, a chunked encoding is decoded before being sent to
the on_body callback.
The Special Problem of Upgrade
------------------------------
`http_parser` supports upgrading the connection to a different protocol. An
increasingly common example of this is the WebSocket protocol which sends
a request like
GET /demo HTTP/1.1
Upgrade: WebSocket
Connection: Upgrade
Host: example.com
Origin: http://example.com
WebSocket-Protocol: sample
followed by non-HTTP data.
(See [RFC6455](https://tools.ietf.org/html/rfc6455) for more information the
WebSocket protocol.)
To support this, the parser will treat this as a normal HTTP message without a
body, issuing both on_headers_complete and on_message_complete callbacks. However
http_parser_execute() will stop parsing at the end of the headers and return.
The user is expected to check if `parser->upgrade` has been set to 1 after
`http_parser_execute()` returns. Non-HTTP data begins at the buffer supplied
offset by the return value of `http_parser_execute()`.
Callbacks
---------
During the `http_parser_execute()` call, the callbacks set in
`http_parser_settings` will be executed. The parser maintains state and
never looks behind, so buffering the data is not necessary. If you need to
save certain data for later usage, you can do that from the callbacks.
There are two types of callbacks:
* notification `typedef int (*http_cb) (http_parser*);`
Callbacks: on_message_begin, on_headers_complete, on_message_complete.
* data `typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);`
Callbacks: (requests only) on_url,
(common) on_header_field, on_header_value, on_body;
Callbacks must return 0 on success. Returning a non-zero value indicates
error to the parser, making it exit immediately.
For cases where it is necessary to pass local information to/from a callback,
the `http_parser` object's `data` field can be used.
An example of such a case is when using threads to handle a socket connection,
parse a request, and then give a response over that socket. By instantiation
of a thread-local struct containing relevant data (e.g. accepted socket,
allocated memory for callbacks to write into, etc), a parser's callbacks are
able to communicate data between the scope of the thread and the scope of the
callback in a threadsafe manner. This allows `http_parser` to be used in
multi-threaded contexts.
Example:
```c
typedef struct {
socket_t sock;
void* buffer;
int buf_len;
} custom_data_t;
int my_url_callback(http_parser* parser, const char *at, size_t length) {
/* access to thread local custom_data_t struct.
Use this access save parsed data for later use into thread local
buffer, or communicate over socket
*/
parser->data;
...
return 0;
}
...
void http_parser_thread(socket_t sock) {
int nparsed = 0;
/* allocate memory for user data */
custom_data_t *my_data = malloc(sizeof(custom_data_t));
/* some information for use by callbacks.
* achieves thread -> callback information flow */
my_data->sock = sock;
/* instantiate a thread-local parser */
http_parser *parser = malloc(sizeof(http_parser));
http_parser_init(parser, HTTP_REQUEST); /* initialise parser */
/* this custom data reference is accessible through the reference to the
parser supplied to callback functions */
parser->data = my_data;
http_parser_settings settings; /* set up callbacks */
settings.on_url = my_url_callback;
/* execute parser */
nparsed = http_parser_execute(parser, &settings, buf, recved);
...
/* parsed information copied from callback.
can now perform action on data copied into thread-local memory from callbacks.
achieves callback -> thread information flow */
my_data->buffer;
...
}
```
In case you parse HTTP message in chunks (i.e. `read()` request line
from socket, parse, read half headers, parse, etc) your data callbacks
may be called more than once. `http_parser` guarantees that data pointer is only
valid for the lifetime of callback. You can also `read()` into a heap allocated
buffer to avoid copying memory around if this fits your application.
Reading headers may be a tricky task if you read/parse headers partially.
Basically, you need to remember whether last header callback was field or value
and apply the following logic:
(on_header_field and on_header_value shortened to on_h_*)
------------------------ ------------ --------------------------------------------
| State (prev. callback) | Callback | Description/action |
------------------------ ------------ --------------------------------------------
| nothing (first call) | on_h_field | Allocate new buffer and copy callback data |
| | | into it |
------------------------ ------------ --------------------------------------------
| value | on_h_field | New header started. |
| | | Copy current name,value buffers to headers |
| | | list and allocate new buffer for new name |
------------------------ ------------ --------------------------------------------
| field | on_h_field | Previous name continues. Reallocate name |
| | | buffer and append callback data to it |
------------------------ ------------ --------------------------------------------
| field | on_h_value | Value for current header started. Allocate |
| | | new buffer and copy callback data to it |
------------------------ ------------ --------------------------------------------
| value | on_h_value | Value continues. Reallocate value buffer |
| | | and append callback data to it |
------------------------ ------------ --------------------------------------------
Parsing URLs
------------
A simplistic zero-copy URL parser is provided as `http_parser_parse_url()`.
Users of this library may wish to use it to parse URLs constructed from
consecutive `on_url` callbacks.
See examples of reading in headers:
* [partial example](http://gist.github.com/155877) in C
* [from http-parser tests](http://github.com/joyent/http-parser/blob/37a0ff8/test.c#L403) in C
* [from Node library](http://github.com/joyent/node/blob/842eaf4/src/http.js#L284) in Javascript

File diff suppressed because it is too large Load Diff

View File

@@ -1,442 +0,0 @@
/* Copyright Joyent, Inc. and other Node contributors. All rights reserved.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to
* deal in the Software without restriction, including without limitation the
* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
* sell copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
* IN THE SOFTWARE.
*/
#ifndef http_parser_h
#define http_parser_h
#ifdef __cplusplus
extern "C" {
#endif
/* Also update SONAME in the Makefile whenever you change these. */
#define HTTP_PARSER_VERSION_MAJOR 2
#define HTTP_PARSER_VERSION_MINOR 9
#define HTTP_PARSER_VERSION_PATCH 3
#include <stddef.h>
#if defined(_WIN32) && !defined(__MINGW32__) && \
(!defined(_MSC_VER) || _MSC_VER<1600) && !defined(__WINE__)
#include <BaseTsd.h>
typedef __int8 int8_t;
typedef unsigned __int8 uint8_t;
typedef __int16 int16_t;
typedef unsigned __int16 uint16_t;
typedef __int32 int32_t;
typedef unsigned __int32 uint32_t;
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
#else
#include <stdint.h>
#endif
/* Compile with -DHTTP_PARSER_STRICT=0 to make less checks, but run
* faster
*/
#ifndef HTTP_PARSER_STRICT
# define HTTP_PARSER_STRICT 1
#endif
/* Maximium header size allowed. If the macro is not defined
* before including this header then the default is used. To
* change the maximum header size, define the macro in the build
* environment (e.g. -DHTTP_MAX_HEADER_SIZE=<value>). To remove
* the effective limit on the size of the header, define the macro
* to a very large number (e.g. -DHTTP_MAX_HEADER_SIZE=0x7fffffff)
*/
#ifndef HTTP_MAX_HEADER_SIZE
# define HTTP_MAX_HEADER_SIZE (80*1024)
#endif
typedef struct http_parser http_parser;
typedef struct http_parser_settings http_parser_settings;
/* Callbacks should return non-zero to indicate an error. The parser will
* then halt execution.
*
* The one exception is on_headers_complete. In a HTTP_RESPONSE parser
* returning '1' from on_headers_complete will tell the parser that it
* should not expect a body. This is used when receiving a response to a
* HEAD request which may contain 'Content-Length' or 'Transfer-Encoding:
* chunked' headers that indicate the presence of a body.
*
* Returning `2` from on_headers_complete will tell parser that it should not
* expect neither a body nor any futher responses on this connection. This is
* useful for handling responses to a CONNECT request which may not contain
* `Upgrade` or `Connection: upgrade` headers.
*
* http_data_cb does not return data chunks. It will be called arbitrarily
* many times for each string. E.G. you might get 10 callbacks for "on_url"
* each providing just a few characters more data.
*/
typedef int (*http_data_cb) (http_parser*, const char *at, size_t length);
typedef int (*http_cb) (http_parser*);
/* Status Codes */
#define HTTP_STATUS_MAP(XX) \
XX(100, CONTINUE, Continue) \
XX(101, SWITCHING_PROTOCOLS, Switching Protocols) \
XX(102, PROCESSING, Processing) \
XX(200, OK, OK) \
XX(201, CREATED, Created) \
XX(202, ACCEPTED, Accepted) \
XX(203, NON_AUTHORITATIVE_INFORMATION, Non-Authoritative Information) \
XX(204, NO_CONTENT, No Content) \
XX(205, RESET_CONTENT, Reset Content) \
XX(206, PARTIAL_CONTENT, Partial Content) \
XX(207, MULTI_STATUS, Multi-Status) \
XX(208, ALREADY_REPORTED, Already Reported) \
XX(226, IM_USED, IM Used) \
XX(300, MULTIPLE_CHOICES, Multiple Choices) \
XX(301, MOVED_PERMANENTLY, Moved Permanently) \
XX(302, FOUND, Found) \
XX(303, SEE_OTHER, See Other) \
XX(304, NOT_MODIFIED, Not Modified) \
XX(305, USE_PROXY, Use Proxy) \
XX(307, TEMPORARY_REDIRECT, Temporary Redirect) \
XX(308, PERMANENT_REDIRECT, Permanent Redirect) \
XX(400, BAD_REQUEST, Bad Request) \
XX(401, UNAUTHORIZED, Unauthorized) \
XX(402, PAYMENT_REQUIRED, Payment Required) \
XX(403, FORBIDDEN, Forbidden) \
XX(404, NOT_FOUND, Not Found) \
XX(405, METHOD_NOT_ALLOWED, Method Not Allowed) \
XX(406, NOT_ACCEPTABLE, Not Acceptable) \
XX(407, PROXY_AUTHENTICATION_REQUIRED, Proxy Authentication Required) \
XX(408, REQUEST_TIMEOUT, Request Timeout) \
XX(409, CONFLICT, Conflict) \
XX(410, GONE, Gone) \
XX(411, LENGTH_REQUIRED, Length Required) \
XX(412, PRECONDITION_FAILED, Precondition Failed) \
XX(413, PAYLOAD_TOO_LARGE, Payload Too Large) \
XX(414, URI_TOO_LONG, URI Too Long) \
XX(415, UNSUPPORTED_MEDIA_TYPE, Unsupported Media Type) \
XX(416, RANGE_NOT_SATISFIABLE, Range Not Satisfiable) \
XX(417, EXPECTATION_FAILED, Expectation Failed) \
XX(421, MISDIRECTED_REQUEST, Misdirected Request) \
XX(422, UNPROCESSABLE_ENTITY, Unprocessable Entity) \
XX(423, LOCKED, Locked) \
XX(424, FAILED_DEPENDENCY, Failed Dependency) \
XX(426, UPGRADE_REQUIRED, Upgrade Required) \
XX(428, PRECONDITION_REQUIRED, Precondition Required) \
XX(429, TOO_MANY_REQUESTS, Too Many Requests) \
XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, Request Header Fields Too Large) \
XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, Unavailable For Legal Reasons) \
XX(500, INTERNAL_SERVER_ERROR, Internal Server Error) \
XX(501, NOT_IMPLEMENTED, Not Implemented) \
XX(502, BAD_GATEWAY, Bad Gateway) \
XX(503, SERVICE_UNAVAILABLE, Service Unavailable) \
XX(504, GATEWAY_TIMEOUT, Gateway Timeout) \
XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP Version Not Supported) \
XX(506, VARIANT_ALSO_NEGOTIATES, Variant Also Negotiates) \
XX(507, INSUFFICIENT_STORAGE, Insufficient Storage) \
XX(508, LOOP_DETECTED, Loop Detected) \
XX(510, NOT_EXTENDED, Not Extended) \
XX(511, NETWORK_AUTHENTICATION_REQUIRED, Network Authentication Required) \
enum http_status
{
#define XX(num, name, string) HTTP_STATUS_##name = num,
HTTP_STATUS_MAP(XX)
#undef XX
};
/* Request Methods */
#define HTTP_METHOD_MAP(XX) \
XX(0, DELETE, DELETE) \
XX(1, GET, GET) \
XX(2, HEAD, HEAD) \
XX(3, POST, POST) \
XX(4, PUT, PUT) \
/* pathological */ \
XX(5, CONNECT, CONNECT) \
XX(6, OPTIONS, OPTIONS) \
XX(7, TRACE, TRACE) \
/* WebDAV */ \
XX(8, COPY, COPY) \
XX(9, LOCK, LOCK) \
XX(10, MKCOL, MKCOL) \
XX(11, MOVE, MOVE) \
XX(12, PROPFIND, PROPFIND) \
XX(13, PROPPATCH, PROPPATCH) \
XX(14, SEARCH, SEARCH) \
XX(15, UNLOCK, UNLOCK) \
XX(16, BIND, BIND) \
XX(17, REBIND, REBIND) \
XX(18, UNBIND, UNBIND) \
XX(19, ACL, ACL) \
/* subversion */ \
XX(20, REPORT, REPORT) \
XX(21, MKACTIVITY, MKACTIVITY) \
XX(22, CHECKOUT, CHECKOUT) \
XX(23, MERGE, MERGE) \
/* upnp */ \
XX(24, MSEARCH, M-SEARCH) \
XX(25, NOTIFY, NOTIFY) \
XX(26, SUBSCRIBE, SUBSCRIBE) \
XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \
/* RFC-5789 */ \
XX(28, PATCH, PATCH) \
XX(29, PURGE, PURGE) \
/* CalDAV */ \
XX(30, MKCALENDAR, MKCALENDAR) \
/* RFC-2068, section 19.6.1.2 */ \
XX(31, LINK, LINK) \
XX(32, UNLINK, UNLINK) \
/* icecast */ \
XX(33, SOURCE, SOURCE) \
enum http_method
{
#define XX(num, name, string) HTTP_##name = num,
HTTP_METHOD_MAP(XX)
#undef XX
};
enum http_parser_type { HTTP_REQUEST, HTTP_RESPONSE, HTTP_BOTH };
/* Flag values for http_parser.flags field */
enum flags
{ F_CHUNKED = 1 << 0
, F_CONNECTION_KEEP_ALIVE = 1 << 1
, F_CONNECTION_CLOSE = 1 << 2
, F_CONNECTION_UPGRADE = 1 << 3
, F_TRAILING = 1 << 4
, F_UPGRADE = 1 << 5
, F_SKIPBODY = 1 << 6
, F_CONTENTLENGTH = 1 << 7
, F_TRANSFER_ENCODING = 1 << 8
};
/* Map for errno-related constants
*
* The provided argument should be a macro that takes 2 arguments.
*/
#define HTTP_ERRNO_MAP(XX) \
/* No error */ \
XX(OK, "success") \
\
/* Callback-related errors */ \
XX(CB_message_begin, "the on_message_begin callback failed") \
XX(CB_url, "the on_url callback failed") \
XX(CB_header_field, "the on_header_field callback failed") \
XX(CB_header_value, "the on_header_value callback failed") \
XX(CB_headers_complete, "the on_headers_complete callback failed") \
XX(CB_body, "the on_body callback failed") \
XX(CB_message_complete, "the on_message_complete callback failed") \
XX(CB_status, "the on_status callback failed") \
XX(CB_chunk_header, "the on_chunk_header callback failed") \
XX(CB_chunk_complete, "the on_chunk_complete callback failed") \
\
/* Parsing-related errors */ \
XX(INVALID_EOF_STATE, "stream ended at an unexpected time") \
XX(HEADER_OVERFLOW, \
"too many header bytes seen; overflow detected") \
XX(CLOSED_CONNECTION, \
"data received after completed connection: close message") \
XX(INVALID_VERSION, "invalid HTTP version") \
XX(INVALID_STATUS, "invalid HTTP status code") \
XX(INVALID_METHOD, "invalid HTTP method") \
XX(INVALID_URL, "invalid URL") \
XX(INVALID_HOST, "invalid host") \
XX(INVALID_PORT, "invalid port") \
XX(INVALID_PATH, "invalid path") \
XX(INVALID_QUERY_STRING, "invalid query string") \
XX(INVALID_FRAGMENT, "invalid fragment") \
XX(LF_EXPECTED, "LF character expected") \
XX(INVALID_HEADER_TOKEN, "invalid character in header") \
XX(INVALID_CONTENT_LENGTH, \
"invalid character in content-length header") \
XX(UNEXPECTED_CONTENT_LENGTH, \
"unexpected content-length header") \
XX(INVALID_CHUNK_SIZE, \
"invalid character in chunk size header") \
XX(INVALID_TRANSFER_ENCODING, \
"request has invalid transfer-encoding") \
XX(INVALID_CONSTANT, "invalid constant string") \
XX(INVALID_INTERNAL_STATE, "encountered unexpected internal state")\
XX(STRICT, "strict mode assertion failed") \
XX(PAUSED, "parser is paused") \
XX(UNKNOWN, "an unknown error occurred")
/* Define HPE_* values for each errno value above */
#define HTTP_ERRNO_GEN(n, s) HPE_##n,
enum http_errno {
HTTP_ERRNO_MAP(HTTP_ERRNO_GEN)
};
#undef HTTP_ERRNO_GEN
/* Get an http_errno value from an http_parser */
#define HTTP_PARSER_ERRNO(p) ((enum http_errno) (p)->http_errno)
struct http_parser {
/** PRIVATE **/
unsigned int type : 2; /* enum http_parser_type */
unsigned int state : 7; /* enum state from http_parser.c */
unsigned int header_state : 7; /* enum header_state from http_parser.c */
unsigned int index : 7; /* index into current matcher */
unsigned int lenient_http_headers : 1;
unsigned int flags : 16; /* F_* values from 'flags' enum; semi-public */
uint32_t nread; /* # bytes read in various scenarios */
uint64_t content_length; /* # bytes in body (0 if no Content-Length header) */
/** READ-ONLY **/
unsigned short http_major;
unsigned short http_minor;
unsigned int status_code : 16; /* responses only */
unsigned int method : 8; /* requests only */
unsigned int http_errno : 7;
/* 1 = Upgrade header was present and the parser has exited because of that.
* 0 = No upgrade header present.
* Should be checked when http_parser_execute() returns in addition to
* error checking.
*/
unsigned int upgrade : 1;
/** PUBLIC **/
void *data; /* A pointer to get hook to the "connection" or "socket" object */
};
struct http_parser_settings {
http_cb on_message_begin;
http_data_cb on_url;
http_data_cb on_status;
http_data_cb on_header_field;
http_data_cb on_header_value;
http_cb on_headers_complete;
http_data_cb on_body;
http_cb on_message_complete;
/* When on_chunk_header is called, the current chunk length is stored
* in parser->content_length.
*/
http_cb on_chunk_header;
http_cb on_chunk_complete;
};
enum http_parser_url_fields
{ UF_SCHEMA = 0
, UF_HOST = 1
, UF_PORT = 2
, UF_PATH = 3
, UF_QUERY = 4
, UF_FRAGMENT = 5
, UF_USERINFO = 6
, UF_MAX = 7
};
/* Result structure for http_parser_parse_url().
*
* Callers should index into field_data[] with UF_* values iff field_set
* has the relevant (1 << UF_*) bit set. As a courtesy to clients (and
* because we probably have padding left over), we convert any port to
* a uint16_t.
*/
struct http_parser_url {
uint16_t field_set; /* Bitmask of (1 << UF_*) values */
uint16_t port; /* Converted UF_PORT string */
struct {
uint16_t off; /* Offset into buffer in which field starts */
uint16_t len; /* Length of run in buffer */
} field_data[UF_MAX];
};
/* Returns the library version. Bits 16-23 contain the major version number,
* bits 8-15 the minor version number and bits 0-7 the patch level.
* Usage example:
*
* unsigned long version = http_parser_version();
* unsigned major = (version >> 16) & 255;
* unsigned minor = (version >> 8) & 255;
* unsigned patch = version & 255;
* printf("http_parser v%u.%u.%u\n", major, minor, patch);
*/
unsigned long http_parser_version(void);
void http_parser_init(http_parser *parser, enum http_parser_type type);
/* Initialize http_parser_settings members to 0
*/
void http_parser_settings_init(http_parser_settings *settings);
/* Executes the parser. Returns number of parsed bytes. Sets
* `parser->http_errno` on error. */
size_t http_parser_execute(http_parser *parser,
const http_parser_settings *settings,
const char *data,
size_t len);
/* If http_should_keep_alive() in the on_headers_complete or
* on_message_complete callback returns 0, then this should be
* the last message on the connection.
* If you are the server, respond with the "Connection: close" header.
* If you are the client, close the connection.
*/
int http_should_keep_alive(const http_parser *parser);
/* Returns a string version of the HTTP method. */
const char *http_method_str(enum http_method m);
/* Returns a string version of the HTTP status code. */
const char *http_status_str(enum http_status s);
/* Return a string name of the given error */
const char *http_errno_name(enum http_errno err);
/* Return a string description of the given error */
const char *http_errno_description(enum http_errno err);
/* Initialize all http_parser_url members to 0 */
void http_parser_url_init(struct http_parser_url *u);
/* Parse a URL; return nonzero on failure */
int http_parser_parse_url(const char *buf, size_t buflen,
int is_connect,
struct http_parser_url *u);
/* Pause or un-pause the parser; a nonzero value pauses */
void http_parser_pause(http_parser *parser, int paused);
/* Checks if this is the final chunk of the body. */
int http_body_is_final(const http_parser *parser);
/* Change the maximum header size provided at compile time. */
void http_parser_set_max_header_size(uint32_t size);
#ifdef __cplusplus
}
#endif
#endif

View File

@@ -1,4 +1,4 @@
cmake_minimum_required (VERSION 2.8.12) cmake_minimum_required(VERSION 3.10)
project (hwloc C) project (hwloc C)
include_directories(include) include_directories(include)

View File

@@ -1,5 +1,5 @@
Copyright © 2009 CNRS Copyright © 2009 CNRS
Copyright © 2009-2020 Inria. All rights reserved. Copyright © 2009-2025 Inria. All rights reserved.
Copyright © 2009-2013 Université Bordeaux Copyright © 2009-2013 Université Bordeaux
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved. Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
@@ -17,6 +17,418 @@ bug fixes (and other actions) for each version of hwloc since version
0.9. 0.9.
Version 2.12.1
--------------
* Add hwloc-calc's --default-nodes option to hwloc-bind and hwloc-info.
* Improve the --best-memattr "default" fallback, try to use "default"
memory nodes, and add verbose messages and warnings if some
performance info are incomplete or missing.
Thanks to Antoine Morvan for the report.
* Fix CPU and memory binding on different locations,
thanks to Antoine Morvan for the report.
* Add HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY and enable it by
default in hwloc-calc --local-memory for finding local NUMA nodes
that do not exactly match input locations.
Thanks to Antoine Morvan for the report.
* Fix a possible crash in the x86 backend when Qemu is configured to
expose multicore/thread CPUs that are actually single-core/thread.
Thanks to Georg Pfuetzenreuter.
Version 2.12.0
--------------
* Add hwloc_topology_get_default_nodeset() for the set of default
NUMA nodes.
- hwloc-calc now has --default-nodes option.
* Rework oneAPI LevelZero support to use zesInit() and avoid the need
to set ZES_ENABLE_SYSMAN=1 in the environment.
- zesDriverGetDeviceByUuidExp() is now required in the L0 runtime.
- ZES/Sysman variants were added in hwloc/levelzero.h to specifically
handle ZES/Sysman device handles.
* Fix the locality of AMD GPU partitions, thanks to Edgar Leon for
reporting and debugging the issue.
* Better detect Cray Slingshot NICs, thanks to Edgar Leon.
* Add support for Die objects and Module groups on Windows.
* Only filter-out Dies that are identical to their Packages
when it applies to all Dies.
* Improve hwloc-calc to handle CPU-less NUMA nodes or platforms with
heterogeneous memory without requiring --nodeset-output.
* hwloc-calc now accepts counting/listing cpukinds and memory tiers
with -N and -I cpukind/memorytier.
* The systemd-dbus-api output of hwloc-calc has changed, and
--nodeset-output-format was added, to support NUMA node outputs.
Thanks to Pierre Neyron.
* Update NVLink bandwidth and CUDA capabilities up to NVIDIA Blackwell.
* Fix some NUMA syscalls on Linux for platforms with old libc headers.
* Some minor fixes in distances.
Version 2.11.2
--------------
* Add missing CPU info attrs on aarch64 on Linux.
* Use ACPI CPPC on Linux to get better information about cpukinds,
at least on AMD CPUs.
* Fix crash when manipulating cpukinds after topology
duplication, thanks to Hadrien Grasland for the report.
* Fix missing input target checks in memattr functions,
thanks to Hadrien Grasland for the report.
* Fix a memory leak when ignoring NUMA distances on FreeBSD.
* Fix build failure on old Linux distributions without accessat().
* Fix non-Windows importing of XML topologies and CPUID dumps exported
on Windows.
* hwloc-calc --cpuset-output-format systemd-dbus-api now allows
to generate AllowedCPUs information for systemd slices.
See the hwloc-calc manpage for examples. Thanks to Pierre Neyron.
* Some fixes in manpage EXAMPLES and split them into subsections.
Version 2.11.1
--------------
* Fix bash completions, thanks Tavis Rudd.
Version 2.11.0
--------------
* API
+ Add HWLOC_MEMBIND_WEIGHTED_INTERLEAVE memory binding policy on
Linux 6.9+. Thanks to Honggyu Kim for the patch.
- weighted_interleave_membind is added to membind support bits.
- The "weighted" policy is added to the hwloc-bind tool.
+ Add hwloc_obj_set_subtype(). Thanks to Hadrien Grasland for the report.
* GPU support
+ Don't hide the GPU NUMA node on NVIDIA Grace Hopper.
+ Get Intel GPU OpenCL device locality.
+ Add bandwidths between subdevices in the LevelZero XeLinkBandwidth
matrix.
+ Fix PCI Gen4+ link speed of NVIDIA GPU obtained from NVML,
thanks to Akram Sbaih for the report.
* Windows support
+ Fix Windows support when UNICODE is enabled, several hwloc features
were missing, thanks to Martin for the report.
+ Fix the enabling of CUDA in Windows CMake build,
Thanks to Moritz Kreutzer for the patch.
+ Fix CUDA/OpenCL test source path in Windows CMake.
* Tools
+ Option --best-memattr may now return multiple nodes. Additional
configuration flags may be given to tweak its behavior.
+ hwloc-info has a new --get-attr option to get a single attribute.
+ hwloc-info now supports "levels", "support" and "topology"
special keywords for backward compatibility for hwloc 3.0.
+ The --taskset command-line option is superseded by the new
--cpuset-output-format which also allows to export as list.
+ hwloc-calc may now import bitmasks described as a list of bits
with the new "--cpuset-input-format list".
* Misc
+ The MemoryTiersNr info attribute in the root object now says how many
memory tiers were built. Thanks to Antoine Morvan for the report.
+ Fix the management of infinite cpusets in the bitmap printf/sscanf
API as well as in command-line tools.
+ Add section "Compiling software on top of hwloc's C API" in the
documentation with examples for GNU Make and CMake,
thanks to Florent Pruvost for the help.
Version 2.10.0
--------------
* Heterogeneous Memory core improvements
+ Better heuristics to identify the subtype of memory such as HBM,
DRAM, NVM, CXL-DRAM, etc.
+ Build memory tiers, i.e. sets of NUMA nodes with the same subtype
and similar performance.
- NUMA node tier ranks are exposed in the new MemoryTier info
attribute (starts from 0 for highest bandwidth tier)..
+ See the new Heterogeneous Memory section in the documentation.
* API
+ Add hwloc_topology_free_group_object() to discard a Group created
by hwloc_topology_alloc_group_object().
* Linux backend
+ Fix cpukinds on NVIDIA Grace to report identical cores even if they
actually have very small frequency differences.
Thanks to John C. Linford for the report.
+ Add CXLDevice attributes to CXL DAX objects and NUMA nodes to show
which PCI device implements which window.
+ Ignore buggy memory-side caches and memory attributes when fake NUMA
emulation is enabled on the Linux kernel command-line.
+ Add more info attributes in MemoryModule Misc objects,
thanks to Zubiao Xiong for the patch.
+ Get CPUModel and CPUFamily info attributes on LoongArch platforms.
* x86 backend
+ Add support for new AMD CPUID leaf 0x80000026 for better detection
of Core Complex and Die on Zen4 processors.
+ Improve Zhaoxin CPU topology detection.
* Tools
+ Input locations and many command-line options (e.g. hwloc-calc -I -N -H,
lstopo --only) now accept filters such as "NUMA[HBM]" so that only
objects are that type and subtype are considered.
- NUMA[tier=1] is also accepted for selecting NUMA nodes depending
on their MemoryTier info attribute.
+ Add --object-output to hwloc-calc to report the type as a prefix to
object indexes, e.g. Core:2 instead of 2 in the output of -I.
+ hwloc-info --ancestor and --descendants now accepts kinds of objects
instead of single types.
- The new --first option only shows the first matching object.
+ Add --children-of-pid to hwloc-ps to show a hierarchy of processes.
Thanks to Antoine Morvan for the suggestion.
+ Add --misc-from to lstopo to add Misc objects described in a file.
- To be combined with the new hwloc-ps --lstopo-misc for a customizable
lstopo --top replacement.
* Misc
+ lstopo may now configure the layout of memory object placed above,
for instance with --children-order memory:above:vert.
+ Fix XML import from memory or stdin when using libxml2 2.12.
+ Fix installation failures when configuring with --target,
thanks to Clement Foyer for the patch.
+ Fix support for 128bit pointer architectures.
+ Remove Netloc.
Version 2.9.3
-------------
* Handle Linux glibc allocation errors in binding routines (CVE-2022-47022).
* Fix hwloc-calc when searching objects on heterogeneous memory platforms,
thanks to Antoine Morvan for the report.
* Fix hwloc_get_next_child() when there are some memory-side caches.
* Don't crash if the topology is empty because Linux cgroups are wrong.
* Improve some hwloc-bind warnings in case of command-line parsing errors.
* Many documentation improvements all over the place, including:
+ hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder
children, causing the logical indexes of objects to change.
Version 2.9.2
-------------
* Don't forget L3i when defining filters for multiple levels of caches
with hwloc_topology_set_cache/icache_types_filter().
* Fix object total_memory after hwloc_topology_insert_group_object().
* Fix the (non-yet) exporting in synthetic description for complex memory
hierarchies with memory-side caches, etc.
* Fix some default size attributes when building synthetic topologies.
* Fix size units in hwloc-annotate.
* Improve bitmap reallocation error management in many functions.
* Documentation improvements:
+ Better document return values of functions.
+ Add "Error reporting" section (in hwloc.h and in the doxygen doc).
+ Add FAQ entry "What may I disable to make hwloc faster?"
+ Improve FAQ entries "Why is lstopo slow?" and
"I only need ..., why should I use hwloc?"
+ Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind
manpages.
Version 2.9.1
-------------
* Don't forget to apply object type filters to "perflevel" caches detected
on recent Mac OS X releases, thanks to Michel Lesoinne for the report.
* Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes
are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are.
Thanks to Mark Grondona for reporting the issue.
* Mark HPE Cray Slingshot NICs with subtype "Slingshot".
Version 2.9.0
-------------
* Backends
+ Expose the memory size of CXL memory devices (Type 3) on Linux.
+ The LevelZero backend now reports the "XeLinkBandwidth" distance
matrix between L0 devices (and subdevices) when available.
+ Add support for CUDA compute capability up to 9.0.
* Tools
+ lstopo now switches to console mode when its output is redirected.
Graphical window mode may be forced back with --of window.
+ hwloc-calc now accepts "numa" in -H, and I/O subtypes such as "gpu"
in -I and -N.
Version 2.8.0
-------------
* API
+ Add HWLOC_TOPOLOGY_FLAG_NO_DISTANCES, _NO_MEMATTRS and _NO_CPUKINDS
to reduce the overhead when unneeded.
+ Add separate Read/Write Bandwidth/Latency memory attributes and
implement them on Linux.
* Backends
+ NUMA nodes may now have a subtype such as DRAM, HBM, SPM, or NVM
on heterogeneous memory platforms on Linux.
- Add DAXType and DAXParent attributes on Linux to tell where a
DAX device or its corresponding NUMA node come from (SPM for
Specific-Purpose or NVM for Non-Volatile Memory).
+ Detect heterogeneous caches in hybrid CPUs on MacOS X,
thanks to Paul Bone for the help.
+ Max frequencies are not ignored in Linux cpukinds anymore (they were
ignored in hwloc 2.7.0), but they may be slightly adjusted to avoid
reporting hybrid CPUs because Intel Turbo Boost Max 3.0.
- See the documentation of environment variable HWLOC_CPUKINDS_MAXFREQ.
+ Hardwire the PCI locality of HPE Cray EX235a nodes.
* Tools
+ lstopo and other tools may now load Linux and x86 cpuid topology files
from a tarball.
+ lstopo may now replace the P# and L# index prefixes with custom strings
thanks to --os-index-prefix and --logical-index-prefix options.
* Misc
+ Add --disable-readme to avoid regenerating the top-level hwloc README
file from the documentation.
Version 2.7.2
-------------
* Fix a crash when LevelZero devices have multiple subdevices,
e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton.
* Fix a leak when importing cpukinds from XML,
thanks to Hui Zhou.
Version 2.7.1
-------------
* Workaround crashes when virtual machines report incoherent x86 CPUID
information about numbers of cores and threads.
Thanks to Peter Bense for the report.
* Use setenv() instead of putenv() when trying to force enable oneAPI L0
support, to avoid issues with applications that touch the environment,
thanks to Josh Hursey for the patch.
* Add some warnings at the end of configure when GPU libraries are
missing on the system or their path is missing in the environment.
Version 2.7.0
-------------
* Backends
+ Add support for NUMA nodes and caches with more than 64 PUs across
multiple processor groups on Windows 11 and Windows Server 2022.
+ Group objects are not created for Windows processor groups anymore,
except if HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS=1 in the environment.
+ Expose "Cluster" group objects on Linux kernel 5.16+ for CPUs
that share some internal cache or bus. This can be equivalent
to the L2 Cache level on some platforms (e.g. x86) or a specific
level between L2 and L3 on others (e.g. ARM Kungpeng 920).
Thanks to Jonathan Cameron for the help.
- HWLOC_DONT_MERGE_CLUSTER_GROUPS=1 may be set in the environment
to prevent these groups from being merged with identical caches, etc.
+ Improve the oneAPI LevelZero backend:
- Expose subdevices such as "ze0.1" inside root OS devices ("ze0")
when the hardware contains multiple subdevices.
- Add many new attributes to describe device type, and the
numbers of slices, subslices, execution units and threads.
- Expose the memory information as LevelZeroHBM/DDR/MemorySize infos.
+ Ignore the max frequencies of cores in Linux cpukinds when the
base frequencies are available (to avoid exposing hybrid CPUs
when Intel Turbo Boost Max 3.0 gives slightly different max
frequencies to CPU cores).
- May be reverted by setting HWLOC_CPUKINDS_MAXFREQ=1 in the environment.
* Tools
+ Add --grey and --palette options to switch lstopo to greyscale or
white-background-only graphics, or to tune individual colors.
* Build
+ Windows CMake builds now support non-MSVC compilers, detect several
features at build time, can build/run tests, etc.
Thanks to Michael Hirsch and Alexander Neumann .
Version 2.6.0
-------------
* Backends
+ Expose two cpukinds for energy-efficient cores (icestorm) and
high-performance cores (firestorm) on Apple M1 on Mac OS X.
+ Use sysfs CPU "capacity" to rank hybrid cores by efficiency
on Linux when available (mostly on recent ARM platforms for now).
+ Improve HWLOC_MEMBIND_BIND (without the STRICT flag) on Linux kernel
>= 5.15: If more than one node is given, the kernel may now use all
of them instead of only the first one before falling back to others.
+ Expose cache os_index when available on Linux, it may be needed
when using resctrl to configure cache partitioning, memory bandwidth
monitoring, etc.
+ Add a "XGMIHops" distances matrix in the RSMI backend for AMD GPU
interconnected through XGMI links.
+ Expose AMD GPU memory information (VRAM and GTT) in the RSMI backend.
+ Add OS devices such as "bxi0" for Atos/Bull BXI HCAs on Linux.
* Tools
+ lstopo has a better placement algorithm with respect to I/O
objects, see --children-order in the manpage for details.
+ hwloc-annotate may now change object subtypes and cache or memory
sizes.
* Build
+ Allow to specify the ROCm installation for building the RSMI backend:
- Use a custom installation path if specified with --with-rocm=<dir>.
- Use /opt/rocm-<version> if specified with --with-rocm-version=<version>
or the ROCM_VERSION environment variable.
- Try /opt/rocm if it exists.
- See "How do I enable ROCm SMI and select which version to use?"
in the FAQ for details.
+ Add a CMakeLists for Windows under contrib/windows-cmake/ .
* Documentation
+ Add FAQ entry "How do I create a custom heterogeneous and
asymmetric topology?"
Version 2.5.0
-------------
* API
+ Add hwloc/windows.h to query Windows processor groups.
+ Add hwloc_get_obj_with_same_locality() to convert between objects
with same locality, for instance NUMA nodes and Packages,
or OS devices within a PCI device.
+ Add hwloc_distances_transform() to modify distances structures.
- hwloc-annotate and lstopo have new distances-transform options.
+ hwloc_distances_add() is replaced with _add_create() followed by
_add_values() and _add_commit(). See hwloc/distances.h for details.
+ Add topology flags to mitigate binding modifications during
hwloc discovery, especially on Windows:
- HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING and _MEMBINDING
restrict discovery to PUs and NUMA nodes inside the binding.
- HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING prevents from ever
changing the binding during discovery.
* Backends
+ Add a levelzero backend for oneAPI L0 devices, exposed as OS devices
of subtype "LevelZero" and name such as "ze0".
- Add hwloc/levelzero.h for interoperability between converting
between L0 API devices and hwloc cpusets or OS devices.
+ Expose NEC Vector Engine cards on Linux as OS devices of subtype
"VectorEngine" and name "ve0", etc.
Thanks to Anara Kozhokanova, Tim Cramer and Erich Focht for the help.
+ Add a NVLinkBandwidth distances structure between NVIDIA GPUs
(and POWER processor or NVSwitches) in the NVML backend,
and a XGMIBandwidth distances structure between AMD GPUs
in the RSMI backends.
- See "Topology Attributes: Distances, Memory Attributes and CPU Kinds"
in the documentation for details about these new distances.
+ Add support for NUMA node 0 being offline in Linux, thanks to Jirka Hladky.
* Build
+ Add --with-cuda-version=<version> or look at the CUDA_VERSION
environment variable to find the appropriate CUDA pkg-config files.
Thanks to Stephen Herbein for the suggestion.
- Also add --with-cuda=<dir> to specify the CUDA installation path
manually (and its NVML and OpenCL components).
Thanks to Andrea Bocci for the suggestion.
- See "How do I enable CUDA and select which CUDA version to use?"
in the FAQ for details.
* Tools
+ lstopo now has a --windows-processor-groups option on Windows.
+ hwloc-ps now has a --short-name option to avoid long/truncated
command path.
+ hwloc-ps now has a --single-ancestor option to return a single
(possibly too large) object where a process is bound.
+ hwloc-ps --pid-cmd may now query environment variables,
including MPI-specific variables to find out process ranks.
Version 2.4.1
-------------
* Fix AMD OpenCL device locality when PCI bus or device number >= 128.
Thanks to Edgar Leon for reporting the issue.
+ Applications using any of the following inline functions must
be recompiled to get the fix: hwloc_opencl_get_device_pci_busid()
hwloc_opencl_get_device_cpuset(), hwloc_opencl_get_device_osdev().
* Fix the ranking of cpukinds on non-Windows systems,
thanks to Ivan Kochin for the report.
* Fix the insertion of custom Groups after loading the topology,
thanks to Scott Hicks.
* Add support for CPU0 being offline in Linux, thanks to Garrett Clay.
* Fix missing x86 Package and Core objects FreeBSD/NetBSD.
Thanks to Thibault Payet and Yuri Victorovich for the report.
* Fix the import of very large distances with heterogeneous object types.
* Fix a memory leak in the Linux backend,
thanks to Perceval Anichini.
Version 2.4.0 Version 2.4.0
------------- -------------
* API * API

View File

@@ -1,4 +1,8 @@
Introduction This is a truncated and poorly-formatted version of the documentation main page.
See https://www.open-mpi.org/projects/hwloc/doc/ for more.
hwloc Overview
The Hardware Locality (hwloc) software project aims at easing the process of The Hardware Locality (hwloc) software project aims at easing the process of
discovering hardware resources in parallel architectures. It offers discovering hardware resources in parallel architectures. It offers
@@ -8,66 +12,450 @@ high-performance computing (HPC) applications, but is also applicable to any
project seeking to exploit code and/or data locality on modern computing project seeking to exploit code and/or data locality on modern computing
platforms. platforms.
hwloc is actually made of two subprojects distributed together: hwloc provides command line tools and a C API to obtain the hierarchical map of
key computing elements within a node, such as: NUMA memory nodes, shared
caches, processor packages, dies and cores, processing units (logical
processors or "threads") and even I/O devices. hwloc also gathers various
attributes such as cache and memory information, and is portable across a
variety of different operating systems and platforms.
* The original hwloc project for describing the internals of computing nodes. hwloc primarily aims at helping high-performance computing (HPC) applications,
It is described in details starting at section Hardware Locality (hwloc) but is also applicable to any project seeking to exploit code and/or data
Introduction. locality on modern computing platforms.
* The network-oriented companion called netloc (Network Locality), described
in details starting with section Network Locality (netloc).
See also the Related pages tab above for links to other sections. hwloc supports the following operating systems:
Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc * Linux (with knowledge of cgroups and cpusets, memory targets/initiators,
APIs are documented after these sections. etc.) on all supported hardware, including Intel Xeon Phi, ScaleMP vSMP,
and NumaScale NumaConnect.
* Solaris (with support for processor sets and logical domains)
* AIX
* Darwin / OS X
* FreeBSD and its variants (such as kFreeBSD/GNU)
* NetBSD
* HP-UX
* Microsoft Windows
* IBM BlueGene/Q Compute Node Kernel (CNK)
Installation Since it uses standard Operating System information, hwloc's support is mostly
independant from the processor type (x86, powerpc, ...) and just relies on the
Operating System support. The main exception is BSD operating systems (NetBSD,
FreeBSD, etc.) because they do not provide support topology information, hence
hwloc uses an x86-only CPUID-based backend (which can be used for other OSes
too, see the Components and plugins section).
hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD To check whether hwloc works on a particular machine, just try to build it and
license. It is hosted as a sub-project of the overall Open MPI project (https:/ run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus
/www.open-mpi.org/). Note that hwloc does not require any functionality from or missing cache information), see Questions and Bugs.
Open MPI -- it is a wholly separate (and much smaller!) project and code base.
It just happens to be hosted as part of the overall Open MPI project.
Basic Installation hwloc only reports the number of processors on unsupported operating systems;
no topology information is available.
Installation is the fairly common GNU-based process: For development and debugging purposes, hwloc also offers the ability to work
on "fake" topologies:
shell$ ./configure --prefix=... * Symmetrical tree of resources generated from a list of level arities, see
shell$ make Synthetic topologies.
shell$ make install * Remote machine simulation through the gathering of topology as XML files,
see Importing and exporting topologies from/to XML files.
hwloc- and netloc-specific configure options and requirements are documented in hwloc can display the topology in a human-readable format, either in graphical
sections hwloc Installation and Netloc Installation respectively. mode (X11), or by exporting in one of several different formats, including:
plain text, LaTeX tikzpicture, PDF, PNG, and FIG (see Command-line Examples
below). Note that some of the export formats require additional support
libraries.
Also note that if you install supplemental libraries in non-standard locations, hwloc offers a programming interface for manipulating topologies and objects.
hwloc's configure script may not be able to find them without some help. You It also brings a powerful CPU bitmap API that is used to describe topology
may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on objects location on physical/logical processors. See the Programming Interface
the configure command line. below. It may also be used to binding applications onto certain cores or memory
nodes. Several utility programs are also provided to ease command-line
manipulation of topology objects, binding of processes, and so on.
For example, if libpciaccess was installed into /opt/pciaccess, hwloc's Bindings for several other languages are available from the project website.
configure script may not find it be default. Try adding PKG_CONFIG_PATH to the
./configure command line, like this:
./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ... Command-line Examples
Running the "lstopo" tool is a good way to check as a graphical output whether On a 4-package 2-core machine with hyper-threading, the lstopo tool may show
hwloc properly detected the architecture of your node. Netloc command-line the following graphical output:
tools can be used to display the network topology interconnecting your nodes.
Installing from a Git clone [dudley]
Additionally, the code can be directly cloned from Git: Here's the equivalent output in textual form:
shell$ git clone https://github.com/open-mpi/hwloc.git Machine
shell$ cd hwloc NUMANode L#0 (P#0)
shell$ ./autogen.sh Package L#0 + L3 L#0 (4096KB)
L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0
PU L#0 (P#0)
PU L#1 (P#8)
L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1
PU L#2 (P#4)
PU L#3 (P#12)
Package L#1 + L3 L#1 (4096KB)
L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2
PU L#4 (P#1)
PU L#5 (P#9)
L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3
PU L#6 (P#5)
PU L#7 (P#13)
Package L#2 + L3 L#2 (4096KB)
L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4
PU L#8 (P#2)
PU L#9 (P#10)
L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5
PU L#10 (P#6)
PU L#11 (P#14)
Package L#3 + L3 L#3 (4096KB)
L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6
PU L#12 (P#3)
PU L#13 (P#11)
L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7
PU L#14 (P#7)
PU L#15 (P#15)
Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required Note that there is also an equivalent output in XML that is meant for exporting
when building from a Git clone. /importing topologies but it is hardly readable to human-beings (see Importing
and exporting topologies from/to XML files for details).
Nightly development snapshots are available on the web site, they can be On a 4-package 2-core Opteron NUMA machine (with two core cores disallowed by
configured and built without any need for Git or GNU Autotools. the administrator), the lstopo tool may show the following graphical output
(with --disallowed for displaying disallowed objects):
[hagrid]
Here's the equivalent output in textual form:
Machine (32GB total)
Package L#0
NUMANode L#0 (P#0 8190MB)
L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0)
L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1)
Package L#1
NUMANode L#1 (P#1 8192MB)
L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2)
L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3)
Package L#2
NUMANode L#2 (P#2 8192MB)
L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4)
L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5)
Package L#3
NUMANode L#3 (P#3 8192MB)
L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6)
L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7)
On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each
package):
[emmett]
Here's the same output in textual form:
Machine (total 16GB)
NUMANode L#0 (P#0 16GB)
Package L#0
L2 L#0 (4096KB)
L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0)
L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4)
L2 L#1 (4096KB)
L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2)
L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6)
Package L#1
L2 L#2 (4096KB)
L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1)
L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5)
L2 L#3 (4096KB)
L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3)
L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7)
Programming Interface
The basic interface is available in hwloc.h. Some higher-level functions are
available in hwloc/helper.h to reduce the need to manually manipulate objects
and follow links between them. Documentation for all these is provided later in
this document. Developers may also want to look at hwloc/inlines.h which
contains the actual inline code of some hwloc.h routines, and at this document,
which provides good higher-level topology traversal examples.
To precisely define the vocabulary used by hwloc, a Terms and Definitions
section is available and should probably be read first.
Each hwloc object contains a cpuset describing the list of processing units
that it contains. These bitmaps may be used for CPU binding and Memory binding.
hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h.
Moreover, hwloc also comes with additional helpers for interoperability with
several commonly used environments. See the Interoperability With Other
Software section for details.
The complete API documentation is available in a full set of HTML pages, man
pages, and self-contained PDF files (formatted for both both US letter and A4
formats) in the source tarball in doc/doxygen-doc/.
NOTE: If you are building the documentation from a Git clone, you will need to
have Doxygen and pdflatex installed -- the documentation will be built during
the normal "make" process. The documentation is installed during "make install"
to $prefix/share/doc/hwloc/ and your systems default man page tree (under
$prefix, of course).
Portability
Operating System have varying support for CPU and memory binding, e.g. while
some Operating Systems provide interfaces for all kinds of CPU and memory
bindings, some others provide only interfaces for a limited number of kinds of
CPU and memory binding, and some do not provide any binding interface at all.
Hwloc's binding functions would then simply return the ENOSYS error (Function
not implemented), meaning that the underlying Operating System does not provide
any interface for them. CPU binding and Memory binding provide more information
on which hwloc binding functions should be preferred because interfaces for
them are usually available on the supported Operating Systems.
Similarly, the ability of reporting topology information varies from one
platform to another. As shown in Command-line Examples, hwloc can obtain
information on a wide variety of hardware topologies. However, some platforms
and/or operating system versions will only report a subset of this information.
For example, on an PPC64-based system with 8 cores (each with 2 hardware
threads) running a default 2.6.18-based kernel from RHEL 5.4, hwloc is only
able to glean information about NUMA nodes and processor units (PUs). No
information about caches, packages, or cores is available.
Here's the graphical output from lstopo on this platform when Simultaneous
Multi-Threading (SMT) is enabled:
[ppc64-with]
And here's the graphical output from lstopo on this platform when SMT is
disabled:
[ppc64-with]
Notice that hwloc only sees half the PUs when SMT is disabled. PU L#6, for
example, seems to change location from NUMA node #0 to #1. In reality, no PUs
"moved" -- they were simply re-numbered when hwloc only saw half as many (see
also Logical index in Indexes and Sets). Hence, PU L#6 in the SMT-disabled
picture probably corresponds to PU L#12 in the SMT-enabled picture.
This same "PUs have disappeared" effect can be seen on other platforms -- even
platforms / OSs that provide much more information than the above PPC64 system.
This is an unfortunate side-effect of how operating systems report information
to hwloc.
Note that upgrading the Linux kernel on the same PPC64 system mentioned above
to 2.6.34, hwloc is able to discover all the topology information. The
following picture shows the entire topology layout when SMT is enabled:
[ppc64-full]
Developers using the hwloc API or XML output for portable applications should
therefore be extremely careful to not make any assumptions about the structure
of data that is returned. For example, per the above reported PPC topology, it
is not safe to assume that PUs will always be descendants of cores.
Additionally, future hardware may insert new topology elements that are not
available in this version of hwloc. Long-lived applications that are meant to
span multiple different hardware platforms should also be careful about making
structure assumptions. For example, a new element may someday exist between a
core and a PU.
API Example
The following small C example (available in the source tree as ``doc/examples/
hwloc-hello.c'') prints the topology of the machine and performs some thread
and memory binding. More examples are available in the doc/examples/ directory
of the source tree.
/* Example hwloc API program.
*
* See other examples under doc/examples/ in the source tree
* for more details.
*
* Copyright (c) 2009-2016 Inria. All rights reserved.
* Copyright (c) 2009-2011 Universit?eacute; Bordeaux
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory.
*
* hwloc-hello.c
*/
#include "hwloc.h"
#include <errno.h>
#include <stdio.h>
#include <string.h>
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj,
int depth)
{
char type[32], attr[1024];
unsigned i;
hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
printf("%*s%s", 2*depth, "", type);
if (obj->os_index != (unsigned) -1)
printf("#%u", obj->os_index);
hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0);
if (*attr)
printf("(%s)", attr);
printf("\n");
for (i = 0; i < obj->arity; i++) {
print_children(topology, obj->children[i], depth + 1);
}
}
int main(void)
{
int depth;
unsigned i, n;
unsigned long size;
int levels;
char string[128];
int topodepth;
void *m;
hwloc_topology_t topology;
hwloc_cpuset_t cpuset;
hwloc_obj_t obj;
/* Allocate and initialize topology object. */
hwloc_topology_init(&topology);
/* ... Optionally, put detection configuration here to ignore
some objects types, define a synthetic topology, etc....
The default is to detect all the objects of the machine that
the caller is allowed to access. See Configure Topology
Detection. */
/* Perform the topology detection. */
hwloc_topology_load(topology);
/* Optionally, get some additional topology information
in case we need the topology depth later. */
topodepth = hwloc_topology_get_depth(topology);
/*****************************************************************
* First example:
* Walk the topology with an array style, from level 0 (always
* the system level) to the lowest level (always the proc level).
*****************************************************************/
for (depth = 0; depth < topodepth; depth++) {
printf("*** Objects at level %d\n", depth);
for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth);
i++) {
hwloc_obj_type_snprintf(string, sizeof(string),
hwloc_get_obj_by_depth(topology, depth, i), 0);
printf("Index %u: %s\n", i, string);
}
}
/*****************************************************************
* Second example:
* Walk the topology with a tree style.
*****************************************************************/
printf("*** Printing overall tree\n");
print_children(topology, hwloc_get_root_obj(topology), 0);
/*****************************************************************
* Third example:
* Print the number of packages.
*****************************************************************/
depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
printf("*** The number of packages is unknown\n");
} else {
printf("*** %u package(s)\n",
hwloc_get_nbobjs_by_depth(topology, depth));
}
/*****************************************************************
* Fourth example:
* Compute the amount of cache that the first logical processor
* has above it.
*****************************************************************/
levels = 0;
size = 0;
for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
obj;
obj = obj->parent)
if (hwloc_obj_type_is_cache(obj->type)) {
levels++;
size += obj->attr->cache.size;
}
printf("*** Logical processor 0 has %d caches totaling %luKB\n",
levels, size / 1024);
/*****************************************************************
* Fifth example:
* Bind to only one thread of the last core of the machine.
*
* First find out where cores are, or else smaller sets of CPUs if
* the OS doesn't have the notion of a "core".
*****************************************************************/
depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);
/* Get last core. */
obj = hwloc_get_obj_by_depth(topology, depth,
hwloc_get_nbobjs_by_depth(topology, depth) - 1);
if (obj) {
/* Get a copy of its cpuset that we may modify. */
cpuset = hwloc_bitmap_dup(obj->cpuset);
/* Get only one logical processor (in case the core is
SMT/hyper-threaded). */
hwloc_bitmap_singlify(cpuset);
/* And try to bind ourself there. */
if (hwloc_set_cpubind(topology, cpuset, 0)) {
char *str;
int error = errno;
hwloc_bitmap_asprintf(&str, obj->cpuset);
printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
free(str);
}
/* Free our cpuset copy */
hwloc_bitmap_free(cpuset);
}
/*****************************************************************
* Sixth example:
* Allocate some memory on the last NUMA node, bind some existing
* memory to the last NUMA node.
*****************************************************************/
/* Get last node. There's always at least one. */
n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);
size = 1024*1024;
m = hwloc_alloc_membind(topology, size, obj->nodeset,
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
hwloc_free(topology, m, size);
m = malloc(size);
hwloc_set_area_membind(topology, m, size, obj->nodeset,
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
free(m);
/* Destroy topology object. */
hwloc_topology_destroy(topology);
return 0;
}
hwloc provides a pkg-config executable to obtain relevant compiler and linker
flags. See Compiling software on top of hwloc's C API for details on building
program on top of hwloc's API using GNU Make or CMake.
On a machine 2 processor packages -- each package of which has two processing
cores -- the output from running hwloc-hello could be something like the
following:
shell$ ./hwloc-hello
*** Objects at level 0
Index 0: Machine
*** Objects at level 1
Index 0: Package#0
Index 1: Package#1
*** Objects at level 2
Index 0: Core#0
Index 1: Core#1
Index 2: Core#3
Index 3: Core#2
*** Objects at level 3
Index 0: PU#0
Index 1: PU#1
Index 2: PU#2
Index 3: PU#3
*** Printing overall tree
Machine
Package#0
Core#0
PU#0
Core#1
PU#1
Package#1
Core#3
PU#2
Core#2
PU#3
*** 2 package(s)
*** Logical processor 0 has 0 caches totaling 0KB
shell$
Questions and Bugs Questions and Bugs
@@ -78,8 +466,22 @@ debug and report issues.
Questions may be sent to the users or developers mailing lists (https:// Questions may be sent to the users or developers mailing lists (https://
www.open-mpi.org/community/lists/hwloc.php). www.open-mpi.org/community/lists/hwloc.php).
There is also a #hwloc IRC channel on Freenode (irc.freenode.net). There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat).
History / Credits
hwloc is the evolution and merger of the libtopology project and the Portable
Linux Processor Affinity (PLPA) (https://www.open-mpi.org/projects/plpa/)
project. Because of functional and ideological overlap, these two code bases
and ideas were merged and released under the name "hwloc" as an Open MPI
sub-project.
libtopology was initially developed by the Inria Runtime Team-Project. PLPA was
initially developed by the Open MPI development team as a sub-project. Both are
now deprecated in favor of hwloc, which is distributed as an Open MPI
sub-project.
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation. See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation,
actual links to related pages, images, etc.

View File

@@ -8,8 +8,8 @@
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too. # Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
major=2 major=2
minor=4 minor=12
release=0 release=1
# greek is used for alpha or beta release tags. If it is non-empty, # greek is used for alpha or beta release tags. If it is non-empty,
# it will be appended to the version number. It does not have to be # it will be appended to the version number. It does not have to be
@@ -22,7 +22,7 @@ greek=
# The date when this release was created # The date when this release was created
date="Nov 26, 2020" date="May 12, 2025"
# If snapshot=1, then use the value from snapshot_version as the # If snapshot=1, then use the value from snapshot_version as the
# entire hwloc version (i.e., ignore major, minor, release, and # entire hwloc version (i.e., ignore major, minor, release, and
@@ -41,7 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
# 2. Version numbers are described in the Libtool current:revision:age # 2. Version numbers are described in the Libtool current:revision:age
# format. # format.
libhwloc_so_version=19:0:4 libhwloc_so_version=25:0:10
libnetloc_so_version=0:0:0
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj # Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj

File diff suppressed because it is too large Load Diff

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2025 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -11,10 +11,10 @@
#ifndef HWLOC_CONFIG_H #ifndef HWLOC_CONFIG_H
#define HWLOC_CONFIG_H #define HWLOC_CONFIG_H
#define HWLOC_VERSION "2.4.0" #define HWLOC_VERSION "2.12.1"
#define HWLOC_VERSION_MAJOR 2 #define HWLOC_VERSION_MAJOR 2
#define HWLOC_VERSION_MINOR 4 #define HWLOC_VERSION_MINOR 12
#define HWLOC_VERSION_RELEASE 0 #define HWLOC_VERSION_RELEASE 1
#define HWLOC_VERSION_GREEK "" #define HWLOC_VERSION_GREEK ""
#define __hwloc_restrict #define __hwloc_restrict

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -50,9 +50,10 @@ extern "C" {
* hwloc_bitmap_free(set); * hwloc_bitmap_free(set);
* \endcode * \endcode
* *
* \note Most functions below return an int that may be negative in case of * \note Most functions below return 0 on success and -1 on error.
* error. The usual error case would be an internal failure to realloc/extend * The usual error case would be an internal failure to realloc/extend
* the storage of the bitmap (\p errno would be set to \c ENOMEM). * the storage of the bitmap (\p errno would be set to \c ENOMEM).
* See also \ref hwlocality_api_error_reporting.
* *
* \note Several examples of using the bitmap API are available under the * \note Several examples of using the bitmap API are available under the
* doc/examples/ directory in the source tree. * doc/examples/ directory in the source tree.
@@ -83,7 +84,13 @@ typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
*/ */
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc; HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
/** \brief Allocate a new full bitmap. */ /** \brief Allocate a new full bitmap.
*
* \returns A valid bitmap or \c NULL.
*
* The bitmap should be freed by a corresponding call to
* hwloc_bitmap_free().
*/
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc; HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
/** \brief Free bitmap \p bitmap. /** \brief Free bitmap \p bitmap.
@@ -106,73 +113,134 @@ HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t sr
* Bitmap/String Conversion * Bitmap/String Conversion
*/ */
/** \brief Stringify a bitmap. /** \brief Stringify a bitmap in the default hwloc format.
*
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
*
* Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
* *
* Up to \p buflen characters may be written in buffer \p buf. * Up to \p buflen characters may be written in buffer \p buf.
* *
* If \p buflen is 0, \p buf may safely be \c NULL. * If \p buflen is 0, \p buf may safely be \c NULL.
* *
* \return the number of character that were actually written if not truncating, * \return the number of characters that were actually written if not truncating,
* or that would have been written (not including the ending \\0). * or that would have been written (not including the ending \c \0).
* \return -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
/** \brief Stringify a bitmap into a newly allocated string. /** \brief Stringify a bitmap into a newly allocated string in the default hwloc format.
* *
* \return -1 on error. * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
*
* Print the bits set inside a bitmap as a comma-separated list of hexadecimal 32-bit blocks.
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff,0x00000006,0x00000002"</tt>.
*
* \return the number of characters that were written (not including the ending \c \0).
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
/** \brief Parse a bitmap string and stores it in bitmap \p bitmap. /** \brief Parse a bitmap string as the default hwloc format and stores it in bitmap \p bitmap.
*
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
*
* The input string should be a comma-separared list of hexadecimal 32-bit blocks.
* String <tt>"0xffffffff,0x6,0x2"</tt> is parsed as a bitmap containing all bits between 64 and 95,
* and bits 33, 34 and 1.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
/** \brief Stringify a bitmap in the list format. /** \brief Stringify a bitmap in the list format.
*
* <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
* *
* Lists are comma-separated indexes or ranges. * Lists are comma-separated indexes or ranges.
* Ranges are dash separated indexes. * Ranges are dash separated indexes.
* The last range may not have an ending indexes if the bitmap is infinitely set. * A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
* The last range may not have an ending index if the bitmap is infinitely set.
* *
* Up to \p buflen characters may be written in buffer \p buf. * Up to \p buflen characters may be written in buffer \p buf.
* *
* If \p buflen is 0, \p buf may safely be \c NULL. * If \p buflen is 0, \p buf may safely be \c NULL.
* *
* \return the number of character that were actually written if not truncating, * \return the number of characters that were actually written if not truncating,
* or that would have been written (not including the ending \\0). * or that would have been written (not including the ending \c \0).
* \return -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
/** \brief Stringify a bitmap into a newly allocated list string. /** \brief Stringify a bitmap into a newly allocated list string.
* *
* \return -1 on error. * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
*
* Lists are comma-separated indexes or ranges.
* Ranges are dash separated indexes.
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"1,33-34,64-95"</tt>.
* The last range may not have an ending index if the bitmap is infinitely set.
*
* \return the number of characters that were written (not including the ending \c \0).
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
/** \brief Parse a list string and stores it in bitmap \p bitmap. /** \brief Parse a list string and stores it in bitmap \p bitmap.
*
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
*
* Lists are comma-separated indexes or ranges.
* Ranges are dash separated indexes.
* String <tt>"1,33-34,64-95"</tt> is parsed as a bitmap containing bits 1, 33, 34, and all from 64 to 95.
* The last range may not have an ending index if the bitmap is infinitely set.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
/** \brief Stringify a bitmap in the taskset-specific format. /** \brief Stringify a bitmap in the taskset-specific format.
* *
* The taskset command manipulates bitmap strings that contain a single * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
*
* The taskset program manipulates bitmap strings that contain a single
* (possible very long) hexadecimal number starting with 0x. * (possible very long) hexadecimal number starting with 0x.
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as </tt>"0xffffffff0000000600000002"</tt>.
* *
* Up to \p buflen characters may be written in buffer \p buf. * Up to \p buflen characters may be written in buffer \p buf.
* *
* If \p buflen is 0, \p buf may safely be \c NULL. * If \p buflen is 0, \p buf may safely be \c NULL.
* *
* \return the number of character that were actually written if not truncating, * \return the number of characters that were actually written if not truncating,
* or that would have been written (not including the ending \\0). * or that would have been written (not including the ending \c \0).
* \return -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
/** \brief Stringify a bitmap into a newly allocated taskset-specific string. /** \brief Stringify a bitmap into a newly allocated taskset-specific string.
* *
* \return -1 on error. * <b>Note that if the bitmap is a CPU or nodeset, it contains physical indexes.</b>
*
* The taskset program manipulates bitmap strings that contain a single
* (possible very long) hexadecimal number starting with 0x.
* A bitmap containing bits 1, 33, 34, and all from 64 to 95 is printed as <tt>"0xffffffff0000000600000002"</tt>.
*
* \return the number of characters that were written (not including the ending \c \0).
* \return -1 on error, for instance with \p errno set to \c ENOMEM on failure to allocate the output string.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap); HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap. /** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
*
* <b>Note that if the bitmap is a CPU or nodeset, the input string must contain physical indexes.</b>
*
* The taskset program manipulates bitmap strings that contain a single
* (possible very long) hexadecimal number starting with 0x.
* String <tt>"0xffffffff0000000600000002"</tt> is parsed as a bitmap containing all bits between 64 and 95,
* and bits 33, 34 and 1.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string); HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
@@ -279,6 +347,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned
* When called on the output of hwloc_topology_get_topology_cpuset(), * When called on the output of hwloc_topology_get_topology_cpuset(),
* the returned number is large enough for all cpusets of the topology. * the returned number is large enough for all cpusets of the topology.
* *
* \return the number of unsigned longs required.
* \return -1 if \p bitmap is infinite. * \return -1 if \p bitmap is infinite.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
@@ -305,21 +374,23 @@ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attr
/** \brief Compute the first index (least significant bit) in bitmap \p bitmap /** \brief Compute the first index (least significant bit) in bitmap \p bitmap
* *
* \return -1 if no index is set in \p bitmap. * \return the first index set in \p bitmap.
* \return -1 if \p bitmap is empty.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev /** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
* *
* If \p prev is -1, the first index is returned. * \return the first index set in \p bitmap if \p prev is \c -1.
* * \return the next index set in \p bitmap if \p prev is not \c -1.
* \return -1 if no index with higher index is set in \p bitmap. * \return -1 if no index with higher index is set in \p bitmap.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
/** \brief Compute the last index (most significant bit) in bitmap \p bitmap /** \brief Compute the last index (most significant bit) in bitmap \p bitmap
* *
* \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set. * \return the last index set in \p bitmap.
* \return -1 if \p bitmap is empty, or if \p bitmap is infinitely set.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
@@ -327,28 +398,29 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib
* indexes that are in the bitmap). * indexes that are in the bitmap).
* *
* \return the number of indexes that are in the bitmap. * \return the number of indexes that are in the bitmap.
*
* \return -1 if \p bitmap is infinitely set. * \return -1 if \p bitmap is infinitely set.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
/** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap /** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap
* *
* \return -1 if no index is unset in \p bitmap. * \return the first unset index in \p bitmap.
* \return -1 if \p bitmap is full.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
/** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev /** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev
* *
* If \p prev is -1, the first unset index is returned. * \return the first index unset in \p bitmap if \p prev is \c -1.
* * \return the next index unset in \p bitmap if \p prev is not \c -1.
* \return -1 if no index with higher index is unset in \p bitmap. * \return -1 if no index with higher index is unset in \p bitmap.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
/** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap /** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap
* *
* \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set. * \return the last index unset in \p bitmap.
* \return -1 if \p bitmap is full, or if \p bitmap is not infinitely set.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
@@ -357,11 +429,11 @@ HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_
* The loop must start with hwloc_bitmap_foreach_begin() and end * The loop must start with hwloc_bitmap_foreach_begin() and end
* with hwloc_bitmap_foreach_end() followed by a terminating ';'. * with hwloc_bitmap_foreach_end() followed by a terminating ';'.
* *
* \p index is the loop variable; it should be an unsigned int. The * \p id is the loop variable; it should be an unsigned int. The
* first iteration will set \p index to the lowest index in the bitmap. * first iteration will set \p id to the lowest index in the bitmap.
* Successive iterations will iterate through, in order, all remaining * Successive iterations will iterate through, in order, all remaining
* indexes set in the bitmap. To be specific: each iteration will return a * indexes set in the bitmap. To be specific: each iteration will return a
* value for \p index such that hwloc_bitmap_isset(bitmap, index) is true. * value for \p id such that hwloc_bitmap_isset(bitmap, id) is true.
* *
* The assert prevents the loop from being infinite if the bitmap is infinitely set. * The assert prevents the loop from being infinite if the bitmap is infinitely set.
* *
@@ -428,6 +500,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bi
/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects. /** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects.
* *
* \return 1 if bitmaps intersect, 0 otherwise. * \return 1 if bitmaps intersect, 0 otherwise.
*
* \note The empty bitmap does not intersect any other bitmap.
*/ */
HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure; HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2020 Inria. All rights reserved. * Copyright © 2020-2021 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -42,18 +42,23 @@ extern "C" {
* (for instance the "CoreType" and "FrequencyMaxMHz", * (for instance the "CoreType" and "FrequencyMaxMHz",
* see \ref topoattrs_cpukinds). * see \ref topoattrs_cpukinds).
* *
* A higher efficiency value means intrinsic greater performance * A higher efficiency value means greater intrinsic performance
* (and possibly less performance/power efficiency). * (and possibly less performance/power efficiency).
* Kinds with lower efficiency are ranked first: * Kinds with lower efficiency values are ranked first:
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will * Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will
* return information about the less efficient CPU kind. * return information about the CPU kind with lower performance
* but higher energy-efficiency.
* Higher \p kind_index values would rather return information
* about power-hungry high-performance cores.
* *
* When available, efficiency values are gathered from the operating * When available, efficiency values are gathered from the operating system.
* system (when \p cpukind_efficiency is set in the * If so, \p cpukind_efficiency is set in the struct hwloc_topology_discovery_support array.
* struct hwloc_topology_discovery_support array, only on Windows 10 for now). * This is currently available on Windows 10, Mac OS X (Darwin),
* Otherwise hwloc tries to compute efficiencies * and on some Linux platforms where core "capacity" is exposed in sysfs.
* by comparing CPU kinds using frequencies (on ARM), *
* or core types and frequencies (on other architectures). * If the operating system does not expose core efficiencies natively,
* hwloc tries to compute efficiencies by comparing CPU kinds using
* frequencies (on ARM), or core types and frequencies (on other architectures).
* The environment variable HWLOC_CPUKINDS_RANKING may be used * The environment variable HWLOC_CPUKINDS_RANKING may be used
* to change this heuristics, see \ref envvar. * to change this heuristics, see \ref envvar.
* *

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2023 Inria. All rights reserved.
* Copyright © 2010-2011 Université Bordeaux * Copyright © 2010-2011 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -42,6 +42,9 @@ extern "C" {
/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice. /** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
* *
* Device \p cudevice must match the local machine. * Device \p cudevice must match the local machine.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -75,7 +78,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
/** \brief Get the CPU set of processors that are physically /** \brief Get the CPU set of processors that are physically
* close to device \p cudevice. * close to device \p cudevice.
* *
* Return the CPU set describing the locality of the CUDA device \p cudevice. * Store in \p set the CPU-set describing the locality of the CUDA device \p cudevice.
* *
* Topology \p topology and device \p cudevice must match the local machine. * Topology \p topology and device \p cudevice must match the local machine.
* I/O devices detection and the CUDA component are not needed in the topology. * I/O devices detection and the CUDA component are not needed in the topology.
@@ -87,6 +90,9 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
* *
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset. * Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -120,8 +126,8 @@ hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
/** \brief Get the hwloc PCI device object corresponding to the /** \brief Get the hwloc PCI device object corresponding to the
* CUDA device \p cudevice. * CUDA device \p cudevice.
* *
* Return the PCI device object describing the CUDA device \p cudevice. * \return The hwloc PCI device object describing the CUDA device \p cudevice.
* Return NULL if there is none. * \return \c NULL if none could be found.
* *
* Topology \p topology and device \p cudevice must match the local machine. * Topology \p topology and device \p cudevice must match the local machine.
* I/O devices detection must be enabled in topology \p topology. * I/O devices detection must be enabled in topology \p topology.
@@ -140,8 +146,8 @@ hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice. /** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
* *
* Return the hwloc OS device object that describes the given * \return The hwloc OS device object that describes the given CUDA device \p cudevice.
* CUDA device \p cudevice. Return NULL if there is none. * \return \c NULL if none could be found.
* *
* Topology \p topology and device \p cudevice must match the local machine. * Topology \p topology and device \p cudevice must match the local machine.
* I/O devices detection and the CUDA component must be enabled in the topology. * I/O devices detection and the CUDA component must be enabled in the topology.
@@ -183,8 +189,8 @@ hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* CUDA device whose index is \p idx. * CUDA device whose index is \p idx.
* *
* Return the OS device object describing the CUDA device whose * \return The hwloc OS device object describing the CUDA device whose index is \p idx.
* index is \p idx. Return NULL if there is none. * \return \c NULL if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2023 Inria. All rights reserved.
* Copyright © 2010-2011 Université Bordeaux * Copyright © 2010-2011 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -43,6 +43,9 @@ extern "C" {
/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx. /** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
* *
* Device index \p idx must match the local machine. * Device index \p idx must match the local machine.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -72,7 +75,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
/** \brief Get the CPU set of processors that are physically /** \brief Get the CPU set of processors that are physically
* close to device \p idx. * close to device \p idx.
* *
* Return the CPU set describing the locality of the CUDA device * Store in \p set the CPU-set describing the locality of the CUDA device
* whose index is \p idx. * whose index is \p idx.
* *
* Topology \p topology and device \p idx must match the local machine. * Topology \p topology and device \p idx must match the local machine.
@@ -84,6 +87,9 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
* *
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset. * Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -117,8 +123,8 @@ hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
/** \brief Get the hwloc PCI device object corresponding to the /** \brief Get the hwloc PCI device object corresponding to the
* CUDA device whose index is \p idx. * CUDA device whose index is \p idx.
* *
* Return the PCI device object describing the CUDA device whose * \return The hwloc PCI device object describing the CUDA device whose index is \p idx.
* index is \p idx. Return NULL if there is none. * \return \c NULL if none could be found.
* *
* Topology \p topology and device \p idx must match the local machine. * Topology \p topology and device \p idx must match the local machine.
* I/O devices detection must be enabled in topology \p topology. * I/O devices detection must be enabled in topology \p topology.
@@ -138,8 +144,8 @@ hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* CUDA device whose index is \p idx. * CUDA device whose index is \p idx.
* *
* Return the OS device object describing the CUDA device whose * \return The hwloc OS device object describing the CUDA device whose index is \p idx.
* index is \p idx. Return NULL if there is none. * \return \c NULL if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2018 Inria. All rights reserved. * Copyright © 2009-2022 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -30,6 +30,15 @@ extern "C" {
/* backward compat with v1.10 before Node->NUMANode clarification */ /* backward compat with v1.10 before Node->NUMANode clarification */
#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE #define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE
/** \brief Add a distances structure.
*
* Superseded by hwloc_distances_add_create()+hwloc_distances_add_values()+hwloc_distances_add_commit()
* in v2.5.
*/
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
unsigned long kind, unsigned long flags) __hwloc_attribute_deprecated;
/** \brief Insert a misc object by parent. /** \brief Insert a misc object by parent.
* *
* Identical to hwloc_topology_insert_misc_object(). * Identical to hwloc_topology_insert_misc_object().
@@ -46,7 +55,7 @@ hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj
* *
* If \p size is 0, \p string may safely be \c NULL. * If \p size is 0, \p string may safely be \c NULL.
* *
* \return the number of character that were actually written if not truncating, * \return the number of characters that were actually written if not truncating,
* or that would have been written (not including the ending \\0). * or that would have been written (not including the ending \\0).
*/ */
static __hwloc_inline int static __hwloc_inline int

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2013-2020 Inria. All rights reserved. * Copyright © 2013-2024 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -222,6 +222,8 @@ enum hwloc_topology_diff_apply_flags_e {
HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags); HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
/** \brief Destroy a list of topology differences. /** \brief Destroy a list of topology differences.
*
* \return 0.
*/ */
HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff); HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
@@ -233,6 +235,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
* This identifier is usually the name of the other XML file * This identifier is usually the name of the other XML file
* that contains the reference topology. * that contains the reference topology.
* *
* \return 0 on success, -1 on error.
*
* \note the pointer returned in refname should later be freed * \note the pointer returned in refname should later be freed
* by the caller. * by the caller.
*/ */
@@ -246,10 +250,17 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topol
* This identifier is usually the name of the other XML file * This identifier is usually the name of the other XML file
* that contains the reference topology. * that contains the reference topology.
* This attribute is given back when reading the diff from XML. * This attribute is given back when reading the diff from XML.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath); HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
/** \brief Load a list of topology differences from a XML buffer. /** \brief Load a list of topology differences from a XML buffer.
*
* Build a list of differences from the XML memory buffer given
* at \p xmlbuffer and of length \p buflen (including an ending \c \0).
* This buffer may have been filled earlier with
* hwloc_topology_diff_export_xmlbuffer().
* *
* If not \c NULL, \p refname will be filled with the identifier * If not \c NULL, \p refname will be filled with the identifier
* string of the reference topology for the difference file, * string of the reference topology for the difference file,
@@ -257,6 +268,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
* This identifier is usually the name of the other XML file * This identifier is usually the name of the other XML file
* that contains the reference topology. * that contains the reference topology.
* *
* \return 0 on success, -1 on error.
*
* \note the pointer returned in refname should later be freed * \note the pointer returned in refname should later be freed
* by the caller. * by the caller.
*/ */
@@ -271,9 +284,11 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
* that contains the reference topology. * that contains the reference topology.
* This attribute is given back when reading the diff from XML. * This attribute is given back when reading the diff from XML.
* *
* The returned buffer ends with a \0 that is included in the returned * The returned buffer ends with a \c \0 that is included in the returned
* length. * length.
* *
* \return 0 on success, -1 on error.
*
* \note The XML buffer should later be freed with hwloc_free_xmlbuffer(). * \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
*/ */
HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen); HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2025 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -28,16 +28,27 @@ extern "C" {
/** \brief Matrix of distances between a set of objects. /** \brief Matrix of distances between a set of objects.
* *
* This matrix often contains latencies between NUMA nodes * The most common matrix contains latencies between NUMA nodes
* (as reported in the System Locality Distance Information Table (SLIT) * (as reported in the System Locality Distance Information Table (SLIT)
* in the ACPI specification), which may or may not be physically accurate. * in the ACPI specification), which may or may not be physically accurate.
* It corresponds to the latency for accessing the memory of one node * It corresponds to the latency for accessing the memory of one node
* from a core in another node. * from a core in another node.
* The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER. * The corresponding kind is ::HWLOC_DISTANCES_KIND_MEANS_LATENCY | ::HWLOC_DISTANCES_KIND_FROM_USER.
* The name of this distances structure is "NUMALatency". * The name of this distances structure is "NUMALatency".
* *
* The matrix may also contain bandwidths between random sets of objects, * The matrix may also contain bandwidths between random sets of objects,
* possibly provided by the user, as specified in the \p kind attribute. * possibly provided by the user, as specified in the \p kind attribute.
* Others common distance structures include and "XGMIBandwidth", "XGMIHops",
* "XeLinkBandwidth" and "NVLinkBandwidth".
*
* Pointers \p objs and \p values should not be replaced, reallocated, freed, etc.
* However callers are allowed to modify \p kind as well as the contents
* of \p objs and \p values arrays.
* For instance, if there is a single NUMA node per Package,
* hwloc_get_obj_with_same_locality() may be used to convert between them
* and replace NUMA nodes in the \p objs array with the corresponding Packages.
* See also hwloc_distances_transform() for applying some transformations
* to the structure.
*/ */
struct hwloc_distances_s { struct hwloc_distances_s {
unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */ unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */
@@ -59,11 +70,10 @@ struct hwloc_distances_s {
* The \p kind attribute of struct hwloc_distances_s is a OR'ed set * The \p kind attribute of struct hwloc_distances_s is a OR'ed set
* of kinds. * of kinds.
* *
* A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the * Each distance matrix may have only one kind among HWLOC_DISTANCES_KIND_FROM_*
* distance information comes from, if known. * specifying where distance information comes from,
* * and one kind among HWLOC_DISTANCES_KIND_MEANS_* specifying
* A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether * whether values are latencies or bandwidths.
* values are latencies or bandwidths, if applicable.
*/ */
enum hwloc_distances_kind_e { enum hwloc_distances_kind_e {
/** \brief These distances were obtained from the operating system or hardware. /** \brief These distances were obtained from the operating system or hardware.
@@ -91,6 +101,8 @@ enum hwloc_distances_kind_e {
HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3), HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
/** \brief This distances structure covers objects of different types. /** \brief This distances structure covers objects of different types.
* This may apply to the "NVLinkBandwidth" structure in presence
* of a NVSwitch or POWER processor NVLink port.
* \hideinitializer * \hideinitializer
*/ */
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4) HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
@@ -118,6 +130,8 @@ enum hwloc_distances_kind_e {
* *
* Each distance matrix returned in the \p distances array should be released * Each distance matrix returned in the \p distances array should be released
* by the caller using hwloc_distances_release(). * by the caller using hwloc_distances_release().
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_distances_get(hwloc_topology_t topology, hwloc_distances_get(hwloc_topology_t topology,
@@ -127,6 +141,8 @@ hwloc_distances_get(hwloc_topology_t topology,
/** \brief Retrieve distance matrices for object at a specific depth in the topology. /** \brief Retrieve distance matrices for object at a specific depth in the topology.
* *
* Identical to hwloc_distances_get() with the additional \p depth filter. * Identical to hwloc_distances_get() with the additional \p depth filter.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth, hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
@@ -136,6 +152,8 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
/** \brief Retrieve distance matrices for object of a specific type. /** \brief Retrieve distance matrices for object of a specific type.
* *
* Identical to hwloc_distances_get() with the additional \p type filter. * Identical to hwloc_distances_get() with the additional \p type filter.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type, hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
@@ -147,6 +165,10 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
* Usually only one distances structure may match a given name. * Usually only one distances structure may match a given name.
* *
* The name of the most common structure is "NUMALatency". * The name of the most common structure is "NUMALatency".
* Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
* and "NVLinkBandwidth".
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name, hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
@@ -156,7 +178,12 @@ hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
/** \brief Get a description of what a distances structure contains. /** \brief Get a description of what a distances structure contains.
* *
* For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT), * For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
* or NULL if unknown. * or \c NULL if unknown.
*
* \return the constant string with the name of the distance structure.
*
* \note The returned name should not be freed by the caller,
* it belongs to the hwloc library.
*/ */
HWLOC_DECLSPEC const char * HWLOC_DECLSPEC const char *
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances); hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
@@ -168,6 +195,94 @@ hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *di
HWLOC_DECLSPEC void HWLOC_DECLSPEC void
hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances); hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
/** \brief Transformations of distances structures. */
enum hwloc_distances_transform_e {
/** \brief Remove \c NULL objects from the distances structure.
*
* Every object that was replaced with \c NULL in the \p objs array
* is removed and the \p values array is updated accordingly.
*
* At least \c 2 objects must remain, otherwise hwloc_distances_transform()
* will return \c -1 with \p errno set to \c EINVAL.
*
* \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES
* according to the remaining objects.
*
* \hideinitializer
*/
HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0,
/** \brief Replace bandwidth values with a number of links.
*
* Usually all values will be either \c 0 (no link) or \c 1 (one link).
* However some matrices could get larger values if some pairs of
* peers are connected by different numbers of links.
*
* Values on the diagonal are set to \c 0.
*
* This transformation only applies to bandwidth matrices.
*
* \hideinitializer
*/
HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
/** \brief Merge switches with multiple ports into a single object.
*
* This currently only applies to NVSwitches where GPUs seem connected
* to different switch ports. Switch ports must be objects with subtype
* "NVSwitch" as in the NVLinkBandwidth matrix.
*
* This transformation will replace all ports with only the first one,
* now connected to all GPUs. Other ports are removed by applying
* ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
* \hideinitializer
*/
HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
/** \brief Apply a transitive closure to the matrix to connect objects across switches.
*
* All pairs of GPUs will be reported as directly connected instead GPUs being
* only connected to switches.
*
* Switch ports must be objects with subtype "NVSwitch" as in the NVLinkBandwidth matrix.
* \hideinitializer
*/
HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
};
/** \brief Apply a transformation to a distances structure.
*
* Modify a distances structure that was previously obtained with
* hwloc_distances_get() or one of its variants.
*
* This modifies the local copy of the distances structures but does
* not modify the distances information stored inside the topology
* (retrieved by another call to hwloc_distances_get() or exported to XML).
* To do so, one should add a new distances structure with same
* name, kind, objects and values (see \ref hwlocality_distances_add)
* and then remove this old one with hwloc_distances_release_remove().
*
* \p transform must be one of the transformations listed
* in ::hwloc_distances_transform_e.
*
* These transformations may modify the contents of the \p objs or \p values arrays.
*
* \p transform_attr must be \c NULL for now.
*
* \p flags must be \c 0 for now.
*
* \return 0 on success, -1 on error for instance if flags are invalid.
*
* \note Objects in distances array \p objs may be directly modified
* in place without using hwloc_distances_transform().
* One may use hwloc_get_obj_with_same_locality() to easily convert
* between similar objects of different types.
*/
HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances,
enum hwloc_distances_transform_e transform,
void *transform_attr,
unsigned long flags);
/** @} */ /** @} */
@@ -178,6 +293,7 @@ hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *dis
/** \brief Find the index of an object in a distances structure. /** \brief Find the index of an object in a distances structure.
* *
* \return the index of the object in the distances structure if any.
* \return -1 if object \p obj is not involved in structure \p distances. * \return -1 if object \p obj is not involved in structure \p distances.
*/ */
static __hwloc_inline int static __hwloc_inline int
@@ -195,6 +311,7 @@ hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
* The distance from \p obj1 to \p obj2 is stored in the value pointed by * The distance from \p obj1 to \p obj2 is stored in the value pointed by
* \p value1to2 and reciprocally. * \p value1to2 and reciprocally.
* *
* \return 0 on success.
* \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances. * \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
*/ */
static __hwloc_inline int static __hwloc_inline int
@@ -215,13 +332,87 @@ hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances,
/** \defgroup hwlocality_distances_add Add or remove distances between objects /** \defgroup hwlocality_distances_add Add distances between objects
*
* The usual way to add distances is:
* \code
* hwloc_distances_add_handle_t handle;
* int err = -1;
* handle = hwloc_distances_add_create(topology, "name", kind, 0);
* if (handle) {
* err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
* if (!err)
* err = hwloc_distances_add_commit(topology, handle, flags);
* }
* \endcode
* If \p err is \c 0 at the end, then addition was successful.
*
* @{ * @{
*/ */
/** \brief Handle to a new distances structure during its addition to the topology. */
typedef void * hwloc_distances_add_handle_t;
/** \brief Create a new empty distances structure.
*
* Create an empty distances structure
* to be filled with hwloc_distances_add_values()
* and then committed with hwloc_distances_add_commit().
*
* Parameter \p name is optional, it may be \c NULL.
* Otherwise, it will be copied internally and may later be freed by the caller.
*
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
* Only one kind of meaning and one kind of provenance may be given if appropriate
* (e.g. ::HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH and ::HWLOC_DISTANCES_KIND_FROM_USER).
* Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set
* according to objects having different types in hwloc_distances_add_values().
*
* \p flags must be \c 0 for now.
*
* \return A hwloc_distances_add_handle_t that should then be passed
* to hwloc_distances_add_values() and hwloc_distances_add_commit().
*
* \return \c NULL on error.
*/
HWLOC_DECLSPEC hwloc_distances_add_handle_t
hwloc_distances_add_create(hwloc_topology_t topology,
const char *name, unsigned long kind,
unsigned long flags);
/** \brief Specify the objects and values in a new empty distances structure.
*
* Specify the objects and values for a new distances structure
* that was returned as a handle by hwloc_distances_add_create().
* The structure must then be committed with hwloc_distances_add_commit().
*
* The number of objects is \p nbobjs and the array of objects is \p objs.
* Distance values are stored as a one-dimension array in \p values.
* The distance from object i to object j is in slot i*nbobjs+j.
*
* \p nbobjs must be at least 2.
*
* Arrays \p objs and \p values will be copied internally,
* they may later be freed by the caller.
*
* On error, the temporary distances structure and its content are destroyed.
*
* \p flags must be \c 0 for now.
*
* \return 0 on success.
* \return -1 on error.
*/
HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
hwloc_distances_add_handle_t handle,
unsigned nbobjs, hwloc_obj_t *objs,
hwloc_uint64_t *values,
unsigned long flags);
/** \brief Flags for adding a new distances to a topology. */ /** \brief Flags for adding a new distances to a topology. */
enum hwloc_distances_add_flag_e { enum hwloc_distances_add_flag_e {
/** \brief Try to group objects based on the newly provided distance information. /** \brief Try to group objects based on the newly provided distance information.
* Grouping is only performed when the distances structure contains latencies,
* and when all objects are of the same type.
* \hideinitializer * \hideinitializer
*/ */
HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0), HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0),
@@ -233,23 +424,33 @@ enum hwloc_distances_add_flag_e {
HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1) HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1)
}; };
/** \brief Provide a new distance matrix. /** \brief Commit a new distances structure.
* *
* Provide the matrix of distances between a set of objects given by \p nbobjs * This function finalizes the distances structure and inserts in it the topology.
* and the \p objs array. \p nbobjs must be at least 2.
* The distances are stored as a one-dimension array in \p values.
* The distance from object i to object j is in slot i*nbobjs+j.
* *
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e. * Parameter \p handle was previously returned by hwloc_distances_add_create().
* Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added * Then objects and values were specified with hwloc_distances_add_values().
* if objects of different types are given.
* *
* \p flags configures the behavior of the function using an optional OR'ed set of * \p flags configures the behavior of the function using an optional OR'ed set of
* ::hwloc_distances_add_flag_e. * ::hwloc_distances_add_flag_e.
* It may be used to request the grouping of existing objects based on distances.
*
* On error, the temporary distances structure and its content are destroyed.
*
* \return 0 on success.
* \return -1 on error.
*/
HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
hwloc_distances_add_handle_t handle,
unsigned long flags);
/** @} */
/** \defgroup hwlocality_distances_remove Remove distances between objects
* @{
*/ */
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
unsigned long kind, unsigned long flags);
/** \brief Remove all distance matrices from a topology. /** \brief Remove all distance matrices from a topology.
* *
@@ -258,18 +459,24 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
* *
* If these distances were used to group objects, these additional * If these distances were used to group objects, these additional
* Group objects are not removed from the topology. * Group objects are not removed from the topology.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology); HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
/** \brief Remove distance matrices for objects at a specific depth in the topology. /** \brief Remove distance matrices for objects at a specific depth in the topology.
* *
* Identical to hwloc_distances_remove() but only applies to one level of the topology. * Identical to hwloc_distances_remove() but only applies to one level of the topology.
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth); HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
/** \brief Remove distance matrices for objects of a specific type in the topology. /** \brief Remove distance matrices for objects of a specific type in the topology.
* *
* Identical to hwloc_distances_remove() but only applies to one level of the topology. * Identical to hwloc_distances_remove() but only applies to one level of the topology.
*
* \return 0 on success, -1 on error.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type) hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
@@ -283,6 +490,8 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
/** \brief Release and remove the given distance matrice from the topology. /** \brief Release and remove the given distance matrice from the topology.
* *
* This function includes a call to hwloc_distances_release(). * This function includes a call to hwloc_distances_release().
*
* \return 0 on success, -1 on error.
*/ */
HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances); HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);

View File

@@ -55,7 +55,7 @@ enum hwloc_topology_export_xml_flags_e {
* *
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
* *
* \return -1 if a failure occured. * \return 0 on success, or -1 on error.
* *
* \note See also hwloc_topology_set_userdata_export_callback() * \note See also hwloc_topology_set_userdata_export_callback()
* for exporting application-specific object userdata. * for exporting application-specific object userdata.
@@ -91,7 +91,7 @@ HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const ch
* *
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e. * \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
* *
* \return -1 if a failure occured. * \return 0 on success, or -1 on error.
* *
* \note See also hwloc_topology_set_userdata_export_callback() * \note See also hwloc_topology_set_userdata_export_callback()
* for exporting application-specific object userdata. * for exporting application-specific object userdata.
@@ -145,13 +145,15 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t
* that were given to the export callback. * that were given to the export callback.
* *
* Only printable characters may be exported to XML string attributes. * Only printable characters may be exported to XML string attributes.
* If a non-printable character is passed in \p name or \p buffer,
* the function returns -1 with errno set to EINVAL.
* *
* If exporting binary data, the application should first encode into * If exporting binary data, the application should first encode into
* printable characters only (or use hwloc_export_obj_userdata_base64()). * printable characters only (or use hwloc_export_obj_userdata_base64()).
* It should also take care of portability issues if the export may * It should also take care of portability issues if the export may
* be reimported on a different architecture. * be reimported on a different architecture.
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if a non-printable character is
* passed in \p name or \b buffer.
*/ */
HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
@@ -165,8 +167,14 @@ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t to
* This function may only be called from within the export() callback passed * This function may only be called from within the export() callback passed
* to hwloc_topology_set_userdata_export_callback(). * to hwloc_topology_set_userdata_export_callback().
* *
* The name must be made of printable characters for export to XML string attributes.
*
* The function does not take care of portability issues if the export * The function does not take care of portability issues if the export
* may be reimported on a different architecture. * may be reimported on a different architecture.
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if a non-printable character is
* passed in \p name.
*/ */
HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length); HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2012 Blue Brain Project, EPFL. All rights reserved. * Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
* Copyright © 2012-2013 Inria. All rights reserved. * Copyright © 2012-2023 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -39,9 +39,9 @@ extern "C" {
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* OpenGL display given by port and device index. * OpenGL display given by port and device index.
* *
* Return the OS device object describing the OpenGL display * \return The hwloc OS device object describing the OpenGL display
* whose port (server) is \p port and device (screen) is \p device. * whose port (server) is \p port and device (screen) is \p device.
* Return NULL if there is none. * \return \c NULL if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.
@@ -70,9 +70,9 @@ hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology,
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* OpenGL display given by name. * OpenGL display given by name.
* *
* Return the OS device object describing the OpenGL display * \return The hwloc OS device object describing the OpenGL display
* whose name is \p name, built as ":port.device" such as ":0.0" . * whose name is \p name, built as ":port.device" such as ":0.0" .
* Return NULL if there is none. * \return \c NULL if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.
@@ -99,9 +99,11 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
/** \brief Get the OpenGL display port and device corresponding /** \brief Get the OpenGL display port and device corresponding
* to the given hwloc OS object. * to the given hwloc OS object.
* *
* Return the OpenGL display port (server) in \p port and device (screen) * Retrieves the OpenGL display port (server) in \p port and device (screen)
* in \p screen that correspond to the given hwloc OS device object. * in \p screen that correspond to the given hwloc OS device object.
* Return \c -1 if there is none. *
* \return 0 on success.
* \return -1 if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -52,6 +52,8 @@ extern "C" {
* that takes a cpu_set_t as input parameter. * that takes a cpu_set_t as input parameter.
* *
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
*
* \return 0.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset, hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
@@ -80,6 +82,9 @@ hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute
* that takes a cpu_set_t as input parameter. * that takes a cpu_set_t as input parameter.
* *
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC * \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
*
* \return 0 on success.
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset, hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
@@ -95,7 +100,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
cpu = 0; cpu = 0;
while (count) { while (count) {
if (CPU_ISSET_S(cpu, schedsetsize, schedset)) { if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
hwloc_bitmap_set(hwlocset, cpu); if (hwloc_bitmap_set(hwlocset, cpu) < 0)
return -1;
count--; count--;
} }
cpu++; cpu++;
@@ -107,7 +113,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
assert(schedsetsize == sizeof(cpu_set_t)); assert(schedsetsize == sizeof(cpu_set_t));
for(cpu=0; cpu<CPU_SETSIZE; cpu++) for(cpu=0; cpu<CPU_SETSIZE; cpu++)
if (CPU_ISSET(cpu, schedset)) if (CPU_ISSET(cpu, schedset))
hwloc_bitmap_set(hwlocset, cpu); if (hwloc_bitmap_set(hwlocset, cpu) < 0)
return -1;
#endif /* !CPU_ZERO_S */ #endif /* !CPU_ZERO_S */
return 0; return 0;
} }

File diff suppressed because it is too large Load Diff

View File

@@ -1,136 +0,0 @@
/*
* Copyright © 2013-2016 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
/** \file
* \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC).
*
* Applications that use both hwloc and Intel Xeon Phi (MIC) may want to
* include this file so as to get topology information for MIC devices.
*/
#ifndef HWLOC_INTEL_MIC_H
#define HWLOC_INTEL_MIC_H
#include "hwloc.h"
#include "hwloc/autogen/config.h"
#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
#include "hwloc/linux.h"
#include <dirent.h>
#include <string.h>
#endif
#include <stdio.h>
#include <stdlib.h>
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC)
*
* This interface offers ways to retrieve topology information about
* Intel Xeon Phi (MIC) devices.
*
* @{
*/
/** \brief Get the CPU set of logical processors that are physically
* close to MIC device whose index is \p idx.
*
* Return the CPU set describing the locality of the MIC device whose index is \p idx.
*
* Topology \p topology and device index \p idx must match the local machine.
* I/O devices detection is not needed in the topology.
*
* The function only returns the locality of the device.
* If more information about the device is needed, OS objects should
* be used instead, see hwloc_intel_mic_get_device_osdev_by_index().
*
* This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset.
*/
static __hwloc_inline int
hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
int idx __hwloc_attribute_unused,
hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX];
DIR *sysdir = NULL;
struct dirent *dirent;
unsigned pcibus, pcidev, pcifunc;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return -1;
}
sprintf(path, "/sys/class/mic/mic%d", idx);
sysdir = opendir(path);
if (!sysdir)
return -1;
while ((dirent = readdir(sysdir)) != NULL) {
if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) {
sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
break;
}
}
closedir(sysdir);
#else
/* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
return 0;
}
/** \brief Get the hwloc OS device object corresponding to the
* MIC device for the given index.
*
* Return the OS device object describing the MIC device whose index is \p idx.
* Return NULL if there is none.
*
* The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host.
* I/O devices detection must be enabled in the topology.
*
* \note The corresponding PCI device object can be obtained by looking
* at the OS device parent object.
*/
static __hwloc_inline hwloc_obj_t
hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology,
unsigned idx)
{
hwloc_obj_t osdev = NULL;
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
&& osdev->name
&& !strncmp("mic", osdev->name, 3)
&& atoi(osdev->name + 3) == (int) idx)
return osdev;
}
return NULL;
}
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HWLOC_INTEL_MIC_H */

View File

@@ -0,0 +1,298 @@
/*
* Copyright © 2021-2024 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
/** \file
* \brief Macros to help interaction between hwloc and the oneAPI Level Zero interface.
*
* Applications that use both hwloc and Level Zero may want to
* include this file so as to get topology information for L0 devices.
*/
#ifndef HWLOC_LEVELZERO_H
#define HWLOC_LEVELZERO_H
#include "hwloc.h"
#include "hwloc/autogen/config.h"
#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
#include "hwloc/linux.h"
#endif
#include <level_zero/ze_api.h>
#include <level_zero/zes_api.h>
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface.
*
* This interface offers ways to retrieve topology information about
* devices managed by the Level Zero API, both for main Core devices (ZE API)
* and the Sysman devices (ZES API).
*
* @{
*/
/** \brief Get the CPU set of logical processors that are physically
* close to the Level Zero device \p device
*
* Store in \p set the CPU-set describing the locality of
* the Level Zero device \p device.
*
* Topology \p topology and device \p device must match the local machine.
* The Level Zero library must have been initialized with zeInit().
* I/O devices detection and the Level Zero component are not needed in the
* topology.
*
* The function only returns the locality of the device.
* If more information about the device is needed, OS objects should
* be used instead, see hwloc_levelzero_get_device_osdev().
*
* This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*
* \note zeDevicePciGetPropertiesExt() must be supported, or the entire machine
* locality will be returned.
*/
static __hwloc_inline int
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
ze_device_handle_t device, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
ze_pci_ext_properties_t pci;
ze_result_t res;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return -1;
}
pci.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
pci.pNext = NULL;
res = zeDevicePciGetPropertiesExt(device, &pci);
if (res != ZE_RESULT_SUCCESS) {
errno = EINVAL;
return -1;
}
sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
pci.address.domain, pci.address.bus, pci.address.device, pci.address.function);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#else
/* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
return 0;
}
/** \brief Get the CPU set of logical processors that are physically
* close to the Level Zero Sysman device \p device
*
* Store in \p set the CPU-set describing the locality of
* the Level Zero device \p device.
*
* Topology \p topology and device \p device must match the local machine.
* The Level Zero library must have been initialized with Sysman enabled
* with zesInit().
* I/O devices detection and the Level Zero component are not needed in the
* topology.
*
* The function only returns the locality of the device.
* If more information about the device is needed, OS objects should
* be used instead, see hwloc_levelzero_get_device_osdev().
*
* This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/
static __hwloc_inline int
hwloc_levelzero_get_sysman_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
zes_device_handle_t device, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
zes_pci_properties_t pci;
ze_result_t res;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return -1;
}
res = zesDevicePciGetProperties(device, &pci);
if (res != ZE_RESULT_SUCCESS) {
errno = EINVAL;
return -1;
}
sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
pci.address.domain, pci.address.bus, pci.address.device, pci.address.function);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#else
/* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
return 0;
}
/** \brief Get the hwloc OS device object corresponding to Level Zero device
* \p device.
*
* \return The hwloc OS device object that describes the given Level Zero device \p device.
* \return \c NULL if none could be found.
*
* Topology \p topology and device \p dv_ind must match the local machine.
* The Level Zero library must have been initialized with zeInit().
* I/O devices detection and the Level Zero component must be enabled in the
* topology. If not, the locality of the object may still be found using
* hwloc_levelzero_get_device_cpuset().
*
* \note If the input ZE device is actually a subdevice, then its parent
* (root device) is actually translated, i.e. the main hwloc OS device
* is returned instead of one of its children.
*
* \note The corresponding hwloc PCI device may be found by looking
* at the result parent pointer (unless PCI devices are filtered out).
*
* \note zeDevicePciGetPropertiesExt() must be supported.
*/
static __hwloc_inline hwloc_obj_t
hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
{
ze_pci_ext_properties_t pci;
ze_result_t res;
hwloc_obj_t osdev;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return NULL;
}
pci.stype = ZE_STRUCTURE_TYPE_PCI_EXT_PROPERTIES;
pci.pNext = NULL;
res = zeDevicePciGetPropertiesExt(device, &pci);
if (res != ZE_RESULT_SUCCESS) {
errno = EINVAL;
return NULL;
}
osdev = NULL;
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
hwloc_obj_t pcidev;
if (strncmp(osdev->name, "ze", 2))
continue;
pcidev = osdev;
while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
pcidev = pcidev->parent;
if (!pcidev)
continue;
if (pcidev
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
&& pcidev->attr->pcidev.domain == pci.address.domain
&& pcidev->attr->pcidev.bus == pci.address.bus
&& pcidev->attr->pcidev.dev == pci.address.device
&& pcidev->attr->pcidev.func == pci.address.function)
return osdev;
/* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */
}
return NULL;
}
/** \brief Get the hwloc OS device object corresponding to Level Zero Sysman device
* \p device.
*
* \return The hwloc OS device object that describes the given Level Zero device \p device.
* \return \c NULL if none could be found.
*
* Topology \p topology and device \p dv_ind must match the local machine.
* The Level Zero library must have been initialized with Sysman enabled
* with zesInit().
* I/O devices detection and the Level Zero component must be enabled in the
* topology. If not, the locality of the object may still be found using
* hwloc_levelzero_get_device_cpuset().
*
* \note If the input ZES device is actually a subdevice, then its parent
* (root device) is actually translated, i.e. the main hwloc OS device
* is returned instead of one of its children.
*
* \note The corresponding hwloc PCI device may be found by looking
* at the result parent pointer (unless PCI devices are filtered out).
*/
static __hwloc_inline hwloc_obj_t
hwloc_levelzero_get_sysman_device_osdev(hwloc_topology_t topology, zes_device_handle_t device)
{
zes_pci_properties_t pci;
ze_result_t res;
hwloc_obj_t osdev;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return NULL;
}
res = zesDevicePciGetProperties(device, &pci);
if (res != ZE_RESULT_SUCCESS) {
errno = EINVAL;
return NULL;
}
osdev = NULL;
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
hwloc_obj_t pcidev;
if (strncmp(osdev->name, "ze", 2))
continue;
pcidev = osdev;
while (pcidev && pcidev->type != HWLOC_OBJ_PCI_DEVICE)
pcidev = pcidev->parent;
if (!pcidev)
continue;
if (pcidev
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
&& pcidev->attr->pcidev.domain == pci.address.domain
&& pcidev->attr->pcidev.bus == pci.address.bus
&& pcidev->attr->pcidev.dev == pci.address.device
&& pcidev->attr->pcidev.func == pci.address.function)
return osdev;
/* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */
}
return NULL;
}
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HWLOC_LEVELZERO_H */

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2017 Inria. All rights reserved. * Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2009-2010, 2012 Université Bordeaux
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -50,6 +50,8 @@ extern "C" {
* This function may be used before calling set_mempolicy, mbind, migrate_pages * This function may be used before calling set_mempolicy, mbind, migrate_pages
* or any other function that takes an array of unsigned long and a maximal * or any other function that takes an array of unsigned long and a maximal
* node number as input parameter. * node number as input parameter.
*
* \return 0.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset, hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
@@ -84,6 +86,8 @@ hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpus
* This function may be used before calling set_mempolicy, mbind, migrate_pages * This function may be used before calling set_mempolicy, mbind, migrate_pages
* or any other function that takes an array of unsigned long and a maximal * or any other function that takes an array of unsigned long and a maximal
* node number as input parameter. * node number as input parameter.
*
* \return 0.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset, hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
@@ -119,6 +123,9 @@ hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nod
* This function may be used after calling get_mempolicy or any other function * This function may be used after calling get_mempolicy or any other function
* that takes an array of unsigned long as output parameter (and possibly * that takes an array of unsigned long as output parameter (and possibly
* a maximal node number as input parameter). * a maximal node number as input parameter).
*
* \return 0 on success.
* \return -1 on error, for instance if failing an internal reallocation.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
@@ -130,7 +137,8 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
if (node->os_index < maxnode if (node->os_index < maxnode
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
hwloc_bitmap_or(cpuset, cpuset, node->cpuset); if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
return -1;
return 0; return 0;
} }
@@ -142,6 +150,9 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
* This function may be used after calling get_mempolicy or any other function * This function may be used after calling get_mempolicy or any other function
* that takes an array of unsigned long as output parameter (and possibly * that takes an array of unsigned long as output parameter (and possibly
* a maximal node number as input parameter). * a maximal node number as input parameter).
*
* \return 0 on success.
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
@@ -153,7 +164,8 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
if (node->os_index < maxnode if (node->os_index < maxnode
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8))))) && (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
hwloc_bitmap_set(nodeset, node->os_index); if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
return -1;
return 0; return 0;
} }
@@ -184,7 +196,7 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
* This function may be used before calling many numa_ functions * This function may be used before calling many numa_ functions
* that use a struct bitmask as an input parameter. * that use a struct bitmask as an input parameter.
* *
* \return newly allocated struct bitmask. * \return newly allocated struct bitmask, or \c NULL on error.
*/ */
static __hwloc_inline struct bitmask * static __hwloc_inline struct bitmask *
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc; hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
@@ -209,7 +221,7 @@ hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpu
* This function may be used before calling many numa_ functions * This function may be used before calling many numa_ functions
* that use a struct bitmask as an input parameter. * that use a struct bitmask as an input parameter.
* *
* \return newly allocated struct bitmask. * \return newly allocated struct bitmask, or \c NULL on error.
*/ */
static __hwloc_inline struct bitmask * static __hwloc_inline struct bitmask *
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc; hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
@@ -231,6 +243,9 @@ hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_no
* *
* This function may be used after calling many numa_ functions * This function may be used after calling many numa_ functions
* that use a struct bitmask as an output parameter. * that use a struct bitmask as an output parameter.
*
* \return 0 on success.
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset, hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
@@ -241,7 +256,8 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
hwloc_bitmap_zero(cpuset); hwloc_bitmap_zero(cpuset);
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
if (numa_bitmask_isbitset(bitmask, node->os_index)) if (numa_bitmask_isbitset(bitmask, node->os_index))
hwloc_bitmap_or(cpuset, cpuset, node->cpuset); if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
return -1;
return 0; return 0;
} }
@@ -249,6 +265,9 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
* *
* This function may be used after calling many numa_ functions * This function may be used after calling many numa_ functions
* that use a struct bitmask as an output parameter. * that use a struct bitmask as an output parameter.
*
* \return 0 on success.
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset, hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
@@ -259,7 +278,8 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese
hwloc_bitmap_zero(nodeset); hwloc_bitmap_zero(nodeset);
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL) while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
if (numa_bitmask_isbitset(bitmask, node->os_index)) if (numa_bitmask_isbitset(bitmask, node->os_index))
hwloc_bitmap_set(nodeset, node->os_index); if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
return -1;
return 0; return 0;
} }

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2016 Inria. All rights reserved. * Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -38,22 +38,35 @@ extern "C" {
* The behavior is exactly the same as the Linux sched_setaffinity system call, * The behavior is exactly the same as the Linux sched_setaffinity system call,
* but uses a hwloc cpuset. * but uses a hwloc cpuset.
* *
* \return 0 on success, -1 on error.
*
* \note This is equivalent to calling hwloc_set_proc_cpubind() with * \note This is equivalent to calling hwloc_set_proc_cpubind() with
* HWLOC_CPUBIND_THREAD as flags. * HWLOC_CPUBIND_THREAD as flags.
*/ */
HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set); HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
/** \brief Get the current binding of thread \p tid /** \brief Get the current binding of thread \p tid
*
* The CPU-set \p set (previously allocated by the caller)
* is filled with the list of PUs which the thread
* was last bound to.
* *
* The behavior is exactly the same as the Linux sched_getaffinity system call, * The behavior is exactly the same as the Linux sched_getaffinity system call,
* but uses a hwloc cpuset. * but uses a hwloc cpuset.
* *
* \return 0 on success, -1 on error.
*
* \note This is equivalent to calling hwloc_get_proc_cpubind() with * \note This is equivalent to calling hwloc_get_proc_cpubind() with
* ::HWLOC_CPUBIND_THREAD as flags. * ::HWLOC_CPUBIND_THREAD as flags.
*/ */
HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set); HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
/** \brief Get the last physical CPU where thread \p tid ran. /** \brief Get the last physical CPU where thread \p tid ran.
*
* The CPU-set \p set (previously allocated by the caller)
* is filled with the PU which the thread last ran on.
*
* \return 0 on success, -1 on error.
* *
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with * \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
* ::HWLOC_CPUBIND_THREAD as flags. * ::HWLOC_CPUBIND_THREAD as flags.
@@ -65,6 +78,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topolo
* Might be used when reading CPU set from sysfs attributes such as topology * Might be used when reading CPU set from sysfs attributes such as topology
* and caches for processors, or local_cpus for devices. * and caches for processors, or local_cpus for devices.
* *
* \return 0 on success, -1 on error.
*
* \note This function ignores the HWLOC_FSROOT environment variable. * \note This function ignores the HWLOC_FSROOT environment variable.
*/ */
HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set); HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set);

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2019-2020 Inria. All rights reserved. * Copyright © 2019-2025 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -54,6 +54,17 @@ extern "C" {
* Attribute values for these nodes, if any, may then be obtained with * Attribute values for these nodes, if any, may then be obtained with
* hwloc_memattr_get_value() and manually compared with the desired criteria. * hwloc_memattr_get_value() and manually compared with the desired criteria.
* *
* Memory attributes are also used internally to build Memory Tiers which provide
* an easy way to distinguish NUMA nodes of different kinds, as explained
* in \ref heteromem.
*
* Beside tiers, hwloc defines a set of "default" nodes where normal memory
* allocations should be made from (see hwloc_topology_get_default_nodeset()).
* This is also useful for dividing the machine into a set of non-overlapping
* NUMA domains, for instance for binding tasks per domain.
*
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
*
* \note The API also supports specific objects as initiator, * \note The API also supports specific objects as initiator,
* but it is currently not used internally by hwloc. * but it is currently not used internally by hwloc.
* Users may for instance use it to provide custom performance * Users may for instance use it to provide custom performance
@@ -63,21 +74,26 @@ extern "C" {
* @{ * @{
*/ */
/** \brief Memory node attributes. */ /** \brief Predefined memory attribute IDs.
* See ::hwloc_memattr_id_t for the generic definition of IDs
* for predefined or custom attributes.
*/
enum hwloc_memattr_id_e { enum hwloc_memattr_id_e {
/** \brief "Capacity". /** \brief
* The capacity is returned in bytes * The \"Capacity\" is returned in bytes (local_memory attribute in objects).
* (local_memory attribute in objects).
* *
* Best capacity nodes are nodes with <b>higher capacity</b>. * Best capacity nodes are nodes with <b>higher capacity</b>.
* *
* No initiator is involved when looking at this attribute. * No initiator is involved when looking at this attribute.
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
*
* Capacity values may not be modified using hwloc_memattr_set_value().
* \hideinitializer
*/ */
HWLOC_MEMATTR_ID_CAPACITY = 0, HWLOC_MEMATTR_ID_CAPACITY = 0,
/** \brief "Locality". /** \brief
* The locality is returned as the number of PUs in that locality * The \"Locality\" is returned as the number of PUs in that locality
* (e.g. the weight of its cpuset). * (e.g. the weight of its cpuset).
* *
* Best locality nodes are nodes with <b>smaller locality</b> * Best locality nodes are nodes with <b>smaller locality</b>
@@ -87,34 +103,108 @@ enum hwloc_memattr_id_e {
* *
* No initiator is involved when looking at this attribute. * No initiator is involved when looking at this attribute.
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST. * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
* Locality values may not be modified using hwloc_memattr_set_value().
* \hideinitializer
*/ */
HWLOC_MEMATTR_ID_LOCALITY = 1, HWLOC_MEMATTR_ID_LOCALITY = 1,
/** \brief "Bandwidth". /** \brief
* The bandwidth is returned in MiB/s, as seen from the given initiator location. * The \"Bandwidth\" is returned in MiB/s, as seen from the given initiator location.
*
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>. * Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
*
* This is the average bandwidth for read and write accesses. If the platform
* provides individual read and write bandwidths but no explicit average value,
* hwloc computes and returns the average.
* \hideinitializer
*/ */
HWLOC_MEMATTR_ID_BANDWIDTH = 2, HWLOC_MEMATTR_ID_BANDWIDTH = 2,
/** \brief "Latency". /** \brief
* The latency is returned as nanoseconds, as seen from the given initiator location. * The \"ReadBandwidth\" is returned in MiB/s, as seen from the given initiator location.
*
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
* \hideinitializer
*/
HWLOC_MEMATTR_ID_READ_BANDWIDTH = 4,
/** \brief
* The \"WriteBandwidth\" is returned in MiB/s, as seen from the given initiator location.
*
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
* \hideinitializer
*/
HWLOC_MEMATTR_ID_WRITE_BANDWIDTH = 5,
/** \brief
* The \"Latency\" is returned as nanoseconds, as seen from the given initiator location.
*
* Best latency nodes are nodes with <b>smaller latency</b>. * Best latency nodes are nodes with <b>smaller latency</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST * The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR. * and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
*
* This is the average latency for read and write accesses. If the platform
* provides individual read and write latencies but no explicit average value,
* hwloc computes and returns the average.
* \hideinitializer
*/ */
HWLOC_MEMATTR_ID_LATENCY = 3 HWLOC_MEMATTR_ID_LATENCY = 3,
/* TODO read vs write, persistence? */ /** \brief
* The \"ReadLatency\" is returned as nanoseconds, as seen from the given initiator location.
*
* Best latency nodes are nodes with <b>smaller latency</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
* \hideinitializer
*/
HWLOC_MEMATTR_ID_READ_LATENCY = 6,
/** \brief
* The \"WriteLatency\" is returned as nanoseconds, as seen from the given initiator location.
*
* Best latency nodes are nodes with <b>smaller latency</b>.
*
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
* \hideinitializer
*/
HWLOC_MEMATTR_ID_WRITE_LATENCY = 7,
/* TODO persistence? */
HWLOC_MEMATTR_ID_MAX /**< \private
* Sentinel value for predefined attributes.
* Dynamically registered custom attributes start here.
*/
}; };
/** \brief A memory attribute identifier. /** \brief A memory attribute identifier.
* May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register(). *
* hwloc predefines some commonly-used attributes in ::hwloc_memattr_id_e.
* One may then dynamically register custom ones with hwloc_memattr_register(),
* they will be assigned IDs immediately after the predefined ones.
* See \ref hwlocality_memattrs_manage for more information about
* existing attribute IDs.
*/ */
typedef unsigned hwloc_memattr_id_t; typedef unsigned hwloc_memattr_id_t;
/** \brief Return the identifier of the memory attribute with the given name. /** \brief Return the identifier of the memory attribute with the given name.
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if no such attribute exists.
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_memattr_get_by_name(hwloc_topology_t topology, hwloc_memattr_get_by_name(hwloc_topology_t topology,
@@ -160,6 +250,16 @@ enum hwloc_local_numanode_flag_e {
*/ */
HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1), HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY = (1UL<<1),
/** \breif Select NUMA nodes whose locality intersects the given cpuset.
* This includes larger and smaller localities as well as localities
* that are partially included.
* For instance, if the locality is one core of both packages, a NUMA node
* local to one package is neither larger nor smaller than this locality,
* but it intersects it.
* \hideinitializer
*/
HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY = (1UL<<3),
/** \brief Select all NUMA nodes in the topology. /** \brief Select all NUMA nodes in the topology.
* The initiator \p initiator is ignored. * The initiator \p initiator is ignored.
* \hideinitializer * \hideinitializer
@@ -184,6 +284,8 @@ enum hwloc_local_numanode_flag_e {
* or the number of nodes that would have been stored if there were * or the number of nodes that would have been stored if there were
* enough room. * enough room.
* *
* \return 0 on success or -1 on error.
*
* \note Some of these NUMA nodes may not have any memory attribute * \note Some of these NUMA nodes may not have any memory attribute
* values and hence not be reported as actual targets in other functions. * values and hence not be reported as actual targets in other functions.
* *
@@ -203,7 +305,57 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
hwloc_obj_t *nodes, hwloc_obj_t *nodes,
unsigned long flags); unsigned long flags);
/** \brief Return the set of default NUMA nodes
*
* In machines with heterogeneous memory, some NUMA nodes are considered
* the default ones, i.e. where basic allocations should be made from.
* These are usually DRAM nodes.
*
* Other nodes may be reserved for specific use (I/O device memory, e.g. GPU memory),
* small but high performance (HBM), large but slow memory (NVM), etc.
* Buffers should usually not be allocated from there unless explicitly required.
*
* This function fills \p nodeset with the bits of NUMA nodes considered default.
*
* It is guaranteed that these nodes have non-intersecting CPU sets,
* i.e. cores may not have multiple local NUMA nodes anymore.
* Hence this may be used to iterate over the platform divided into separate
* NUMA localities, for instance for binding one task per NUMA domain.
*
* Any core that had some local NUMA node(s) in the initial topology should
* still have one in the default nodeset. Corner cases where this would be
* wrong consist in asymmetric platforms with missing DRAM nodes, or topologies
* that were already restricted to less NUMA nodes.
*
* The returned nodeset may be passed to hwloc_topology_restrict() with
* ::HWLOC_RESTRICT_FLAG_BYNODESET to remove all non-default nodes from
* the topology. The resulting topology will be easier to use when iterating
* over (now homogeneous) NUMA nodes.
*
* The heuristics for finding default nodes relies on memory tiers and subtypes
* (see \ref heteromem) as well as the assumption that hardware vendors list
* default nodes first in hardware tables.
*
* \p flags must be \c 0 for now.
*
* \return 0 on success.
* \return -1 on error.
*
* \note The returned nodeset usually contains all nodes from a single memory
* tier, likely the DRAM one.
*
* \note The returned nodeset is included in the list of available nodes
* returned by hwloc_topology_get_topology_nodeset(). It is strictly smaller
* if the machine has heterogeneous memory.
*
* \note The heuristics may return a suboptimal set of nodes if hwloc could
* not guess memory types and/or if some default nodes were removed earlier
* from the topology (e.g. with hwloc_topology_restrict()).
*/
HWLOC_DECLSPEC int
hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
hwloc_nodeset_t nodeset,
unsigned long flags);
/** \brief Return an attribute value for a specific target NUMA node. /** \brief Return an attribute value for a specific target NUMA node.
* *
@@ -211,8 +363,16 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
* location \p initiator is ignored and may be \c NULL. * location \p initiator is ignored and may be \c NULL.
* *
* \p target_node cannot be \c NULL. If \p attribute is ::HWLOC_MEMATTR_ID_CAPACITY,
* \p target_node must be a NUMA node. If it is ::HWLOC_MEMATTR_ID_LOCALITY,
* \p target_node must have a CPU set.
*
* \p flags must be \c 0 for now. * \p flags must be \c 0 for now.
* *
* \return 0 on success.
* \return -1 on error, for instance with errno set to \c EINVAL if flags
* are invalid or no such attribute exists.
*
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
* when refering to accesses performed by CPU cores. * when refering to accesses performed by CPU cores.
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
@@ -244,7 +404,10 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
* *
* \p flags must be \c 0 for now. * \p flags must be \c 0 for now.
* *
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT; * \return 0 on success.
* \return -1 with errno set to \c ENOENT if there are no matching targets.
* \return -1 with errno set to \c EINVAL if flags are invalid,
* or no such attribute exists.
* *
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
* when refering to accesses performed by CPU cores. * when refering to accesses performed by CPU cores.
@@ -260,10 +423,6 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
hwloc_obj_t *best_target, hwloc_uint64_t *value); hwloc_obj_t *best_target, hwloc_uint64_t *value);
/** \brief Return the best initiator for the given attribute and target NUMA node. /** \brief Return the best initiator for the given attribute and target NUMA node.
*
* If the attribute does not relate to a specific initiator
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
* \c -1 is returned and \p errno is set to \c EINVAL.
* *
* If \p value is non \c NULL, the corresponding value is returned there. * If \p value is non \c NULL, the corresponding value is returned there.
* *
@@ -277,96 +436,22 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
* The returned initiator should not be modified or freed, * The returned initiator should not be modified or freed,
* it belongs to the topology. * it belongs to the topology.
* *
* \p target_node cannot be \c NULL.
*
* \p flags must be \c 0 for now. * \p flags must be \c 0 for now.
* *
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT; * \return 0 on success.
* \return -1 with errno set to \c ENOENT if there are no matching initiators.
* \return -1 with errno set to \c EINVAL if the attribute does not relate to a specific initiator
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR).
*/ */
HWLOC_DECLSPEC int HWLOC_DECLSPEC int
hwloc_memattr_get_best_initiator(hwloc_topology_t topology, hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
hwloc_memattr_id_t attribute, hwloc_memattr_id_t attribute,
hwloc_obj_t target, hwloc_obj_t target_node,
unsigned long flags, unsigned long flags,
struct hwloc_location *best_initiator, hwloc_uint64_t *value); struct hwloc_location *best_initiator, hwloc_uint64_t *value);
/** @} */
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
* @{
*/
/** \brief Return the name of a memory attribute.
*/
HWLOC_DECLSPEC int
hwloc_memattr_get_name(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
const char **name);
/** \brief Return the flags of the given attribute.
*
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
*/
HWLOC_DECLSPEC int
hwloc_memattr_get_flags(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
unsigned long *flags);
/** \brief Memory attribute flags.
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags().
*/
enum hwloc_memattr_flag_e {
/** \brief The best nodes for this memory attribute are those with the higher values.
* For instance Bandwidth.
*/
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0),
/** \brief The best nodes for this memory attribute are those with the lower values.
* For instance Latency.
*/
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1),
/** \brief The value returned for this memory attribute depends on the given initiator.
* For instance Bandwidth and Latency, but not Capacity.
*/
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2)
};
/** \brief Register a new memory attribute.
*
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e.
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST.
*/
HWLOC_DECLSPEC int
hwloc_memattr_register(hwloc_topology_t topology,
const char *name,
unsigned long flags,
hwloc_memattr_id_t *id);
/** \brief Set an attribute value for a specific target NUMA node.
*
* If the attribute does not relate to a specific initiator
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
* location \p initiator is ignored and may be \c NULL.
*
* The initiator will be copied into the topology,
* the caller should free anything allocated to store the initiator,
* for instance the cpuset.
*
* \p flags must be \c 0 for now.
*
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
* when refering to accesses performed by CPU cores.
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
* but users may for instance use it to provide custom information about
* host memory accesses performed by GPUs.
*/
HWLOC_DECLSPEC int
hwloc_memattr_set_value(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
hwloc_obj_t target_node,
struct hwloc_location *initiator,
unsigned long flags,
hwloc_uint64_t value);
/** \brief Return the target NUMA nodes that have some values for a given attribute. /** \brief Return the target NUMA nodes that have some values for a given attribute.
* *
* Return targets for the given attribute in the \p targets array * Return targets for the given attribute in the \p targets array
@@ -397,8 +482,10 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
* values. * values.
* *
* \return 0 on success or -1 on error.
*
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET * \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
* when refering to accesses performed by CPU cores. * when referring to accesses performed by CPU cores.
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc, * ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
* but users may for instance use it to provide custom information about * but users may for instance use it to provide custom information about
* host memory accesses performed by GPUs. * host memory accesses performed by GPUs.
@@ -408,7 +495,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
hwloc_memattr_id_t attribute, hwloc_memattr_id_t attribute,
struct hwloc_location *initiator, struct hwloc_location *initiator,
unsigned long flags, unsigned long flags,
unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values); unsigned *nr, hwloc_obj_t *targets, hwloc_uint64_t *values);
/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node. /** \brief Return the initiators that have values for a given attribute for a specific target NUMA node.
* *
@@ -428,12 +515,16 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
* The returned initiators should not be modified or freed, * The returned initiators should not be modified or freed,
* they belong to the topology. * they belong to the topology.
* *
* \p target_node cannot be \c NULL.
*
* \p flags must be \c 0 for now. * \p flags must be \c 0 for now.
* *
* If the attribute does not relate to a specific initiator * If the attribute does not relate to a specific initiator
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR), * (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
* no initiator is returned. * no initiator is returned.
* *
* \return 0 on success or -1 on error.
*
* \note This function is meant for tools and debugging (listing internal information) * \note This function is meant for tools and debugging (listing internal information)
* rather than for application queries. Applications should rather select useful * rather than for application queries. Applications should rather select useful
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute * NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
@@ -445,6 +536,131 @@ hwloc_memattr_get_initiators(hwloc_topology_t topology,
hwloc_obj_t target_node, hwloc_obj_t target_node,
unsigned long flags, unsigned long flags,
unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values); unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values);
/** @} */
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
*
* Memory attribues are identified by an ID (::hwloc_memattr_id_t)
* and a name. hwloc_memattr_get_name() and hwloc_memattr_get_by_name()
* convert between them (or return error if the attribute does not exist).
*
* The set of valid ::hwloc_memattr_id_t is a contigous set starting at \c 0.
* It first contains predefined attributes, as listed
* in ::hwloc_memattr_id_e (from \c 0 to \c HWLOC_MEMATTR_ID_MAX-1).
* Then custom attributes may be dynamically registered with
* hwloc_memattr_register(). They will get the following IDs
* (\c HWLOC_MEMATTR_ID_MAX for the first one, etc.).
*
* To iterate over all valid attributes
* (either predefined or dynamically registered custom ones),
* one may iterate over IDs starting from \c 0 until hwloc_memattr_get_name()
* or hwloc_memattr_get_flags() returns an error.
*
* The values for an existing attribute or for custom dynamically registered ones
* may be set or modified with hwloc_memattr_set_value().
*
* @{
*/
/** \brief Return the name of a memory attribute.
*
* The output pointer \p name cannot be \c NULL.
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
*/
HWLOC_DECLSPEC int
hwloc_memattr_get_name(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
const char **name);
/** \brief Return the flags of the given attribute.
*
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
*
* The output pointer \p flags cannot be \c NULL.
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
*/
HWLOC_DECLSPEC int
hwloc_memattr_get_flags(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
unsigned long *flags);
/** \brief Memory attribute flags.
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags().
*/
enum hwloc_memattr_flag_e {
/** \brief The best nodes for this memory attribute are those with the higher values.
* For instance Bandwidth.
*/
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0),
/** \brief The best nodes for this memory attribute are those with the lower values.
* For instance Latency.
*/
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1),
/** \brief The value returned for this memory attribute depends on the given initiator.
* For instance Bandwidth and Latency, but not Capacity.
*/
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2)
};
/** \brief Register a new memory attribute.
*
* Add a new custom memory attribute.
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain one of
* ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST but not both.
*
* The new attribute \p id is immediately after the last existing attribute ID
* (which is either the ID of the last registered attribute if any,
* or the ID of the last predefined attribute in ::hwloc_memattr_id_e).
*
* \return 0 on success.
* \return -1 with errno set to \c EINVAL if an invalid set of flags is given.
* \return -1 with errno set to \c EBUSY if another attribute already uses this name.
*/
HWLOC_DECLSPEC int
hwloc_memattr_register(hwloc_topology_t topology,
const char *name,
unsigned long flags,
hwloc_memattr_id_t *id);
/** \brief Set an attribute value for a specific target NUMA node.
*
* If the attribute does not relate to a specific initiator
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
* location \p initiator is ignored and may be \c NULL.
*
* The initiator will be copied into the topology,
* the caller should free anything allocated to store the initiator,
* for instance the cpuset.
*
* \p target_node cannot be \c NULL.
*
* \p attribute cannot be ::HWLOC_MEMATTR_FLAG_ID_CAPACITY or
* ::HWLOC_MEMATTR_FLAG_ID_LOCALITY.
*
* \p flags must be \c 0 for now.
*
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
* when referring to accesses performed by CPU cores.
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
* but users may for instance use it to provide custom information about
* host memory accesses performed by GPUs.
*
* \return 0 on success or -1 on error.
*/
HWLOC_DECLSPEC int
hwloc_memattr_set_value(hwloc_topology_t topology,
hwloc_memattr_id_t attribute,
hwloc_obj_t target_node,
struct hwloc_location *initiator,
unsigned long flags,
hwloc_uint64_t value);
/** @} */ /** @} */
#ifdef __cplusplus #ifdef __cplusplus

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2012-2020 Inria. All rights reserved. * Copyright © 2012-2023 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -39,7 +39,7 @@ extern "C" {
/** \brief Get the CPU set of processors that are physically /** \brief Get the CPU set of processors that are physically
* close to NVML device \p device. * close to NVML device \p device.
* *
* Return the CPU set describing the locality of the NVML device \p device. * Store in \p set the CPU-set describing the locality of the NVML device \p device.
* *
* Topology \p topology and device \p device must match the local machine. * Topology \p topology and device \p device must match the local machine.
* I/O devices detection and the NVML component are not needed in the topology. * I/O devices detection and the NVML component are not needed in the topology.
@@ -51,6 +51,9 @@ extern "C" {
* *
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset. * Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -88,8 +91,8 @@ hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* NVML device whose index is \p idx. * NVML device whose index is \p idx.
* *
* Return the OS device object describing the NVML device whose * \return The hwloc OS device object describing the NVML device whose index is \p idx.
* index is \p idx. Returns NULL if there is none. * \return \c NULL if none could be found.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.
@@ -114,8 +117,8 @@ hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
/** \brief Get the hwloc OS device object corresponding to NVML device \p device. /** \brief Get the hwloc OS device object corresponding to NVML device \p device.
* *
* Return the hwloc OS device object that describes the given * \return The hwloc OS device object that describes the given NVML device \p device.
* NVML device \p device. Return NULL if there is none. * \return \c NULL if none could be found.
* *
* Topology \p topology and device \p device must match the local machine. * Topology \p topology and device \p device must match the local machine.
* I/O devices detection and the NVML component must be enabled in the topology. * I/O devices detection and the NVML component must be enabled in the topology.

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2012-2020 Inria. All rights reserved. * Copyright © 2012-2023 Inria. All rights reserved.
* Copyright © 2013, 2018 Université Bordeaux. All right reserved. * Copyright © 2013, 2018 Université Bordeaux. All right reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -41,6 +41,15 @@ extern "C" {
*/ */
/* Copyright (c) 2008-2018 The Khronos Group Inc. */ /* Copyright (c) 2008-2018 The Khronos Group Inc. */
/* needs "cl_khr_pci_bus_info" device extension, but not strictly required for clGetDeviceInfo() */
typedef struct {
cl_uint pci_domain;
cl_uint pci_bus;
cl_uint pci_device;
cl_uint pci_function;
} hwloc_cl_device_pci_bus_info_khr;
#define HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR 0x410F
/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */ /* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037 #define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037
typedef union { typedef union {
@@ -69,22 +78,36 @@ typedef union {
/** \brief Return the domain, bus and device IDs of the OpenCL device \p device. /** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
* *
* Device \p device must match the local machine. * Device \p device must match the local machine.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_opencl_get_device_pci_busid(cl_device_id device, hwloc_opencl_get_device_pci_busid(cl_device_id device,
unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func) unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func)
{ {
hwloc_cl_device_topology_amd amdtopo; hwloc_cl_device_topology_amd amdtopo;
hwloc_cl_device_pci_bus_info_khr khrbusinfo;
cl_uint nvbus, nvslot, nvdomain; cl_uint nvbus, nvslot, nvdomain;
cl_int clret; cl_int clret;
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(khrbusinfo), &khrbusinfo, NULL);
if (CL_SUCCESS == clret) {
*domain = (unsigned) khrbusinfo.pci_domain;
*bus = (unsigned) khrbusinfo.pci_bus;
*dev = (unsigned) khrbusinfo.pci_device;
*func = (unsigned) khrbusinfo.pci_function;
return 0;
}
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL); clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
if (CL_SUCCESS == clret if (CL_SUCCESS == clret
&& HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) { && HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) {
*domain = 0; /* can't do anything better */ *domain = 0; /* can't do anything better */
*bus = (unsigned) amdtopo.pcie.bus; /* cl_device_topology_amd stores bus ID in cl_char, dont convert those signed char directly to unsigned int */
*dev = (unsigned) amdtopo.pcie.device; *bus = (unsigned) (unsigned char) amdtopo.pcie.bus;
*func = (unsigned) amdtopo.pcie.function; *dev = (unsigned) (unsigned char) amdtopo.pcie.device;
*func = (unsigned) (unsigned char) amdtopo.pcie.function;
return 0; return 0;
} }
@@ -112,7 +135,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
/** \brief Get the CPU set of processors that are physically /** \brief Get the CPU set of processors that are physically
* close to OpenCL device \p device. * close to OpenCL device \p device.
* *
* Return the CPU set describing the locality of the OpenCL device \p device. * Store in \p set the CPU-set describing the locality of the OpenCL device \p device.
* *
* Topology \p topology and device \p device must match the local machine. * Topology \p topology and device \p device must match the local machine.
* I/O devices detection and the OpenCL component are not needed in the topology. * I/O devices detection and the OpenCL component are not needed in the topology.
@@ -125,6 +148,9 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply * Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
* get a full cpuset. * get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if the device could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -161,10 +187,10 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
/** \brief Get the hwloc OS device object corresponding to the /** \brief Get the hwloc OS device object corresponding to the
* OpenCL device for the given indexes. * OpenCL device for the given indexes.
* *
* Return the OS device object describing the OpenCL device * \return The hwloc OS device object describing the OpenCL device
* whose platform index is \p platform_index, * whose platform index is \p platform_index,
* and whose device index within this platform if \p device_index. * and whose device index within this platform if \p device_index.
* Return NULL if there is none. * \return \c NULL if there is none.
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host. * machine. For instance the topology may be an XML import of a remote host.
@@ -191,8 +217,9 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
/** \brief Get the hwloc OS device object corresponding to OpenCL device \p deviceX. /** \brief Get the hwloc OS device object corresponding to OpenCL device \p deviceX.
* *
* Use OpenCL device attributes to find the corresponding hwloc OS device object. * \return The hwloc OS device object corresponding to the given OpenCL device \p device.
* Return NULL if there is none or if useful attributes are not available. * \return \c NULL if none could be found, for instance
* if required OpenCL attributes are not available.
* *
* This function currently only works on AMD and NVIDIA OpenCL devices that support * This function currently only works on AMD and NVIDIA OpenCL devices that support
* relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index() * relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index()

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2010 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -44,7 +44,7 @@ extern "C" {
/** \brief Get the CPU set of processors that are physically /** \brief Get the CPU set of processors that are physically
* close to device \p ibdev. * close to device \p ibdev.
* *
* Return the CPU set describing the locality of the OpenFabrics * Store in \p set the CPU-set describing the locality of the OpenFabrics
* device \p ibdev (InfiniBand, etc). * device \p ibdev (InfiniBand, etc).
* *
* Topology \p topology and device \p ibdev must match the local machine. * Topology \p topology and device \p ibdev must match the local machine.
@@ -57,6 +57,9 @@ extern "C" {
* *
* This function is currently only implemented in a meaningful way for * This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset. * Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/ */
static __hwloc_inline int static __hwloc_inline int
hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
@@ -88,10 +91,11 @@ hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
/** \brief Get the hwloc OS device object corresponding to the OpenFabrics /** \brief Get the hwloc OS device object corresponding to the OpenFabrics
* device named \p ibname. * device named \p ibname.
* *
* Return the OS device object describing the OpenFabrics device * \return The hwloc OS device object describing the OpenFabrics device
* (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname * (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname
* (mlx5_0, hfi1_0, usnic_0, qib0, etc). * (mlx5_0, hfi1_0, usnic_0, qib0, etc).
* Returns NULL if there is none. * \return \c NULL if none could be found.
*
* The name \p ibname is usually obtained from ibv_get_device_name(). * The name \p ibname is usually obtained from ibv_get_device_name().
* *
* The topology \p topology does not necessarily have to match the current * The topology \p topology does not necessarily have to match the current
@@ -117,8 +121,9 @@ hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology,
/** \brief Get the hwloc OS device object corresponding to the OpenFabrics /** \brief Get the hwloc OS device object corresponding to the OpenFabrics
* device \p ibdev. * device \p ibdev.
* *
* Return the OS device object describing the OpenFabrics device \p ibdev * \return The hwloc OS device object describing the OpenFabrics
* (InfiniBand, etc). Returns NULL if there is none. * device \p ibdev (InfiniBand, etc).
* \return \c NULL if none could be found.
* *
* Topology \p topology and device \p ibdev must match the local machine. * Topology \p topology and device \p ibdev must match the local machine.
* I/O devices detection must be enabled in the topology. * I/O devices detection must be enabled in the topology.

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2013-2020 Inria. All rights reserved. * Copyright © 2013-2024 Inria. All rights reserved.
* Copyright © 2016 Cisco Systems, Inc. All rights reserved. * Copyright © 2016 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -26,7 +26,10 @@ struct hwloc_backend;
/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components /** \defgroup hwlocality_disc_components Components and Plugins: Discovery components and backends
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
@@ -87,15 +90,6 @@ struct hwloc_disc_component {
struct hwloc_disc_component * next; struct hwloc_disc_component * next;
}; };
/** @} */
/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
* @{
*/
/** \brief Discovery phase */ /** \brief Discovery phase */
typedef enum hwloc_disc_phase_e { typedef enum hwloc_disc_phase_e {
/** \brief xml or synthetic, platform-specific components such as bgq. /** \brief xml or synthetic, platform-specific components such as bgq.
@@ -158,7 +152,7 @@ struct hwloc_disc_status {
*/ */
unsigned excluded_phases; unsigned excluded_phases;
/** \brief OR'ed set of hwloc_disc_status_flag_e */ /** \brief OR'ed set of ::hwloc_disc_status_flag_e */
unsigned long flags; unsigned long flags;
}; };
@@ -241,6 +235,9 @@ HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend);
/** \defgroup hwlocality_generic_components Components and Plugins: Generic components /** \defgroup hwlocality_generic_components Components and Plugins: Generic components
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
@@ -304,18 +301,98 @@ struct hwloc_component {
void * data; void * data;
}; };
/** \brief Make sure that plugins can lookup core symbols.
*
* This is a sanity check to avoid lazy-lookup failures when libhwloc
* is loaded within a plugin, and later tries to load its own plugins.
* This may fail (and abort the program) if libhwloc symbols are in a
* private namespace.
*
* \return 0 on success.
* \return -1 if the plugin cannot be successfully loaded. The caller
* plugin init() callback should return a negative error code as well.
*
* Plugins should call this function in their init() callback to avoid
* later crashes if lazy symbol resolution is used by the upper layer that
* loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
*
* \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
* building the caller as a plugin.
*
* \note This function should remain inline so plugins can call it even
* when they cannot find libhwloc symbols.
*/
static __hwloc_inline int
hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
{
#ifdef HWLOC_INSIDE_PLUGIN
void *sym;
#ifdef HWLOC_HAVE_LTDL
lt_dlhandle handle = lt_dlopen(NULL);
#else
void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
#endif
if (!handle)
/* cannot check, assume things will work */
return 0;
#ifdef HWLOC_HAVE_LTDL
sym = lt_dlsym(handle, symbol);
lt_dlclose(handle);
#else
sym = dlsym(handle, symbol);
dlclose(handle);
#endif
if (!sym) {
static int verboseenv_checked = 0;
static int verboseenv_value = 0;
if (!verboseenv_checked) {
const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
verboseenv_checked = 1;
}
if (verboseenv_value)
fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
pluginname, symbol);
return -1;
}
#endif /* HWLOC_INSIDE_PLUGIN */
return 0;
}
/** @} */ /** @} */
/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components /** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
/** \brief Check whether insertion errors are hidden */ /** \brief Check whether error messages are hidden.
*
* Callers should print critical error messages
* (e.g. invalid hw topo info, invalid config)
* only if this function returns strictly less than 2.
*
* Callers should print non-critical error messages
* (e.g. failure to initialize CUDA)
* if this function returns 0.
*
* This function return 1 by default (show critical only),
* 0 in lstopo (show all),
* or anything set in HWLOC_HIDE_ERRORS in the environment.
*
* Use macros HWLOC_SHOW_CRITICAL_ERRORS() and HWLOC_SHOW_ALL_ERRORS()
* for clarity.
*/
HWLOC_DECLSPEC int hwloc_hide_errors(void); HWLOC_DECLSPEC int hwloc_hide_errors(void);
#define HWLOC_SHOW_CRITICAL_ERRORS() (hwloc_hide_errors() < 2)
#define HWLOC_SHOW_ALL_ERRORS() (hwloc_hide_errors() == 0)
/** \brief Add an object to the topology. /** \brief Add an object to the topology.
* *
* Insert new object \p obj in the topology starting under existing object \p root * Insert new object \p obj in the topology starting under existing object \p root
@@ -391,70 +468,15 @@ HWLOC_DECLSPEC int hwloc_obj_add_children_sets(hwloc_obj_t obj);
*/ */
HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused); HWLOC_DECLSPEC int hwloc_topology_reconnect(hwloc_topology_t topology, unsigned long flags __hwloc_attribute_unused);
/** \brief Make sure that plugins can lookup core symbols.
*
* This is a sanity check to avoid lazy-lookup failures when libhwloc
* is loaded within a plugin, and later tries to load its own plugins.
* This may fail (and abort the program) if libhwloc symbols are in a
* private namespace.
*
* \return 0 on success.
* \return -1 if the plugin cannot be successfully loaded. The caller
* plugin init() callback should return a negative error code as well.
*
* Plugins should call this function in their init() callback to avoid
* later crashes if lazy symbol resolution is used by the upper layer that
* loaded hwloc (e.g. OpenCL implementations using dlopen with RTLD_LAZY).
*
* \note The build system must define HWLOC_INSIDE_PLUGIN if and only if
* building the caller as a plugin.
*
* \note This function should remain inline so plugins can call it even
* when they cannot find libhwloc symbols.
*/
static __hwloc_inline int
hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, const char *symbol __hwloc_attribute_unused)
{
#ifdef HWLOC_INSIDE_PLUGIN
void *sym;
#ifdef HWLOC_HAVE_LTDL
lt_dlhandle handle = lt_dlopen(NULL);
#else
void *handle = dlopen(NULL, RTLD_NOW|RTLD_LOCAL);
#endif
if (!handle)
/* cannot check, assume things will work */
return 0;
#ifdef HWLOC_HAVE_LTDL
sym = lt_dlsym(handle, symbol);
lt_dlclose(handle);
#else
sym = dlsym(handle, symbol);
dlclose(handle);
#endif
if (!sym) {
static int verboseenv_checked = 0;
static int verboseenv_value = 0;
if (!verboseenv_checked) {
const char *verboseenv = getenv("HWLOC_PLUGINS_VERBOSE");
verboseenv_value = verboseenv ? atoi(verboseenv) : 0;
verboseenv_checked = 1;
}
if (verboseenv_value)
fprintf(stderr, "Plugin `%s' disabling itself because it cannot find the `%s' core symbol.\n",
pluginname, symbol);
return -1;
}
#endif /* HWLOC_INSIDE_PLUGIN */
return 0;
}
/** @} */ /** @} */
/** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects /** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
@@ -469,9 +491,12 @@ hwloc_filter_check_pcidev_subtype_important(unsigned classid)
return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */ return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */
|| baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */ || baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */
|| baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */ || baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */
|| baseclass == 0x00 /* Unclassified, for Atos/Bull BXI */
|| baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */ || baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */
|| classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */ || classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */
|| classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */ || classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */
|| classid == 0x0502 /* PCI_CLASS_MEMORY_CXL */
|| baseclass == 0x06 /* PCI_BASE_CLASS_BRIDGE with non-PCI downstream. the core will drop the useless ones later */
|| baseclass == 0x12 /* Processing Accelerators */); || baseclass == 0x12 /* Processing Accelerators */);
} }
@@ -527,6 +552,9 @@ hwloc_filter_check_keep_object(hwloc_topology_t topology, hwloc_obj_t obj)
/** \defgroup hwlocality_components_pcidisc Components and Plugins: helpers for PCI discovery /** \defgroup hwlocality_components_pcidisc Components and Plugins: helpers for PCI discovery
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
@@ -578,18 +606,89 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st
/** \defgroup hwlocality_components_pcifind Components and Plugins: finding PCI objects during other discoveries /** \defgroup hwlocality_components_pcifind Components and Plugins: finding PCI objects during other discoveries
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{ * @{
*/ */
/** \brief Find the normal parent of a PCI bus ID. /** \brief Find the object or a parent of a PCI bus ID.
* *
* Look at PCI affinity to find out where the given PCI bus ID should be attached. * When attaching a new object (typically an OS device) whose locality
* is specified by PCI bus ID, this function returns the PCI object
* to use as a parent for attaching.
* *
* This function should be used to attach an I/O device under the corresponding * If the exact PCI device with this bus ID exists, it is returned.
* PCI object (if any), or under a normal (non-I/O) object with same locality. * Otherwise (for instance if it was filtered out), the function returns
* another object with similar locality (for instance a parent bridge,
* or the local CPU Package).
*/ */
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func); HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/** \brief Find the PCI device or bridge matching a PCI bus ID exactly.
*
* This is useful for adding specific information about some objects
* based on their PCI id. When it comes to attaching objects based on
* PCI locality, hwloc_pci_find_parent_by_busid() should be preferred.
*/
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/** @} */
/** \defgroup hwlocality_components_distances Components and Plugins: distances
*
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
*
* @{
*/
/** \brief Handle to a new distances structure during its addition to the topology. */
typedef void * hwloc_backend_distances_add_handle_t;
/** \brief Create a new empty distances structure.
*
* This is identical to hwloc_distances_add_create()
* but this variant is designed for backend inserting
* distances during topology discovery.
*/
HWLOC_DECLSPEC hwloc_backend_distances_add_handle_t
hwloc_backend_distances_add_create(hwloc_topology_t topology,
const char *name, unsigned long kind,
unsigned long flags);
/** \brief Specify the objects and values in a new empty distances structure.
*
* This is similar to hwloc_distances_add_values()
* but this variant is designed for backend inserting
* distances during topology discovery.
*
* The only semantical difference is that \p objs and \p values
* are not duplicated, but directly attached to the topology.
* On success, these arrays are given to the core and should not
* ever be freed by the caller anymore.
*/
HWLOC_DECLSPEC int
hwloc_backend_distances_add_values(hwloc_topology_t topology,
hwloc_backend_distances_add_handle_t handle,
unsigned nbobjs, hwloc_obj_t *objs,
hwloc_uint64_t *values,
unsigned long flags);
/** \brief Commit a new distances structure.
*
* This is similar to hwloc_distances_add_commit()
* but this variant is designed for backend inserting
* distances during topology discovery.
*/
HWLOC_DECLSPEC int
hwloc_backend_distances_add_commit(hwloc_topology_t topology,
hwloc_backend_distances_add_handle_t handle,
unsigned long flags);
/** @} */ /** @} */

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2025 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -120,6 +120,12 @@ extern "C" {
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM) #define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES) #define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
#define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT) #define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT)
#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING)
#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING)
#define HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_DONT_CHANGE_BINDING)
#define HWLOC_TOPOLOGY_FLAG_NO_DISTANCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_DISTANCES)
#define HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_MEMATTRS)
#define HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_CPUKINDS)
#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid) #define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic) #define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
@@ -170,6 +176,7 @@ extern "C" {
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object) #define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object) #define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
#define hwloc_topology_free_group_object HWLOC_NAME(topology_free_group_object)
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object) #define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets) #define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
#define hwloc_topology_refresh HWLOC_NAME(topology_refresh) #define hwloc_topology_refresh HWLOC_NAME(topology_refresh)
@@ -203,6 +210,7 @@ extern "C" {
#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name) #define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name)
#define hwloc_obj_add_info HWLOC_NAME(obj_add_info) #define hwloc_obj_add_info HWLOC_NAME(obj_add_info)
#define hwloc_obj_set_subtype HWLOC_NAME(obj_set_subtype)
#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS) #define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS)
#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD) #define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD)
@@ -225,6 +233,7 @@ extern "C" {
#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH) #define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH)
#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND) #define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND)
#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE) #define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE)
#define HWLOC_MEMBIND_WEIGHTED_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_WEIGHTED_INTERLEAVE)
#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH) #define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH)
#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED) #define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED)
@@ -356,6 +365,7 @@ extern "C" {
#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs) #define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs)
#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type) #define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type)
#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type) #define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type)
#define hwloc_get_obj_with_same_locality HWLOC_NAME(get_obj_with_same_locality)
#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e) #define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e)
#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE) #define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE)
#define hwloc_distrib HWLOC_NAME(distrib) #define hwloc_distrib HWLOC_NAME(distrib)
@@ -377,6 +387,11 @@ extern "C" {
#define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY) #define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY)
#define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH) #define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH)
#define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY) #define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY)
#define HWLOC_MEMATTR_ID_READ_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_READ_BANDWIDTH)
#define HWLOC_MEMATTR_ID_WRITE_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_BANDWIDTH)
#define HWLOC_MEMATTR_ID_READ_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_READ_LATENCY)
#define HWLOC_MEMATTR_ID_WRITE_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_LATENCY)
#define HWLOC_MEMATTR_ID_MAX HWLOC_NAME_CAPS(MEMATTR_ID_MAX)
#define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t) #define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t)
#define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name) #define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name)
@@ -394,8 +409,10 @@ extern "C" {
#define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e) #define hwloc_local_numanode_flag_e HWLOC_NAME(local_numanode_flag_e)
#define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) #define HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
#define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) #define HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY)
#define HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
#define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL) #define HWLOC_LOCAL_NUMANODE_FLAG_ALL HWLOC_NAME_CAPS(LOCAL_NUMANODE_FLAG_ALL)
#define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs) #define hwloc_get_local_numanode_objs HWLOC_NAME(get_local_numanode_objs)
#define hwloc_topology_get_default_nodeset HWLOC_NAME(topology_get_default_nodeset)
#define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name) #define hwloc_memattr_get_name HWLOC_NAME(memattr_get_name)
#define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags) #define hwloc_memattr_get_flags HWLOC_NAME(memattr_get_flags)
@@ -454,11 +471,22 @@ extern "C" {
#define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index) #define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index)
#define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values) #define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values)
#define hwloc_distances_transform_e HWLOC_NAME(distances_transform_e)
#define HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_REMOVE_NULL)
#define HWLOC_DISTANCES_TRANSFORM_LINKS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_LINKS)
#define HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS)
#define HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE)
#define hwloc_distances_transform HWLOC_NAME(distances_transform)
#define hwloc_distances_add_flag_e HWLOC_NAME(distances_add_flag_e) #define hwloc_distances_add_flag_e HWLOC_NAME(distances_add_flag_e)
#define HWLOC_DISTANCES_ADD_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP) #define HWLOC_DISTANCES_ADD_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP)
#define HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP_INACCURATE) #define HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP_INACCURATE)
#define hwloc_distances_add HWLOC_NAME(distances_add) #define hwloc_distances_add_handle_t HWLOC_NAME(distances_add_handle_t)
#define hwloc_distances_add_create HWLOC_NAME(distances_add_create)
#define hwloc_distances_add_values HWLOC_NAME(distances_add_values)
#define hwloc_distances_add_commit HWLOC_NAME(distances_add_commit)
#define hwloc_distances_remove HWLOC_NAME(distances_remove) #define hwloc_distances_remove HWLOC_NAME(distances_remove)
#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth) #define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth)
#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type) #define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type)
@@ -523,6 +551,11 @@ extern "C" {
#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location) #define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location)
#define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask) #define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask)
/* windows.h */
#define hwloc_windows_get_nr_processor_groups HWLOC_NAME(windows_get_nr_processor_groups)
#define hwloc_windows_get_processor_group_cpuset HWLOC_NAME(windows_get_processor_group_cpuset)
/* openfabrics-verbs.h */ /* openfabrics-verbs.h */
#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset) #define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset)
@@ -531,6 +564,7 @@ extern "C" {
/* opencl.h */ /* opencl.h */
#define hwloc_cl_device_pci_bus_info_khr HWLOC_NAME(cl_device_pci_bus_info_khr)
#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd) #define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd)
#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids) #define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids)
#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset) #define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
@@ -564,6 +598,13 @@ extern "C" {
#define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev) #define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev)
#define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index) #define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index)
/* levelzero.h */
#define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset)
#define hwloc_levelzero_get_sysman_device_cpuset HWLOC_NAME(levelzero_get_sysman_device_cpuset)
#define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev)
#define hwloc_levelzero_get_sysman_device_osdev HWLOC_NAME(levelzero_get_sysman_device_osdev)
/* gl.h */ /* gl.h */
#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device) #define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
@@ -620,10 +661,18 @@ extern "C" {
#define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid) #define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid)
#define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach) #define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach)
#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
#define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent) #define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent)
#define hwloc_backend_distances_add_handle_t HWLOC_NAME(backend_distances_add_handle_t)
#define hwloc_backend_distances_add_create HWLOC_NAME(backend_distances_add_create)
#define hwloc_backend_distances_add_values HWLOC_NAME(backend_distances_add_values)
#define hwloc_backend_distances_add_commit HWLOC_NAME(backend_distances_add_commit)
/* hwloc/deprecated.h */ /* hwloc/deprecated.h */
#define hwloc_distances_add HWLOC_NAME(distances_add)
#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent) #define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent)
#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf) #define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf)
#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf) #define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf)
@@ -673,6 +722,8 @@ extern "C" {
#define hwloc__obj_type_is_dcache HWLOC_NAME(_obj_type_is_dcache) #define hwloc__obj_type_is_dcache HWLOC_NAME(_obj_type_is_dcache)
#define hwloc__obj_type_is_icache HWLOC_NAME(_obj_type_is_icache) #define hwloc__obj_type_is_icache HWLOC_NAME(_obj_type_is_icache)
#define hwloc__pci_link_speed HWLOC_NAME(_pci_link_speed)
/* private/cpuid-x86.h */ /* private/cpuid-x86.h */
#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid) #define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid)
@@ -733,6 +784,7 @@ extern "C" {
#define hwloc_cuda_component HWLOC_NAME(cuda_component) #define hwloc_cuda_component HWLOC_NAME(cuda_component)
#define hwloc_gl_component HWLOC_NAME(gl_component) #define hwloc_gl_component HWLOC_NAME(gl_component)
#define hwloc_levelzero_component HWLOC_NAME(levelzero_component)
#define hwloc_nvml_component HWLOC_NAME(nvml_component) #define hwloc_nvml_component HWLOC_NAME(nvml_component)
#define hwloc_rsmi_component HWLOC_NAME(rsmi_component) #define hwloc_rsmi_component HWLOC_NAME(rsmi_component)
#define hwloc_opencl_component HWLOC_NAME(opencl_component) #define hwloc_opencl_component HWLOC_NAME(opencl_component)
@@ -765,6 +817,8 @@ extern "C" {
#define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults) #define hwloc_topology_setup_defaults HWLOC_NAME(topology_setup_defaults)
#define hwloc_topology_clear HWLOC_NAME(topology_clear) #define hwloc_topology_clear HWLOC_NAME(topology_clear)
#define hwloc__reconnect HWLOC_NAME(_reconnect)
#define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object) #define hwloc__attach_memory_object HWLOC_NAME(insert_memory_object)
#define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index) #define hwloc_get_obj_by_type_and_gp_index HWLOC_NAME(get_obj_by_type_and_gp_index)
@@ -772,7 +826,6 @@ extern "C" {
#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init) #define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare) #define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit) #define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset) #define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset)
#define hwloc__add_info HWLOC_NAME(_add_info) #define hwloc__add_info HWLOC_NAME(_add_info)
@@ -816,7 +869,6 @@ extern "C" {
#define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup) #define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup)
#define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh) #define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh)
#define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy) #define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy)
#define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add) #define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add)
#define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index) #define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index)
#define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs) #define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs)
@@ -830,6 +882,7 @@ extern "C" {
#define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy) #define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy)
#define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh) #define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh)
#define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh) #define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh)
#define hwloc_internal_memattrs_guess_memory_tiers HWLOC_NAME(internal_memattrs_guess_memory_tiers)
#define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s) #define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s)
#define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init) #define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init)

206
src/3rdparty/hwloc/include/hwloc/rsmi.h vendored Normal file
View File

@@ -0,0 +1,206 @@
/*
* Copyright © 2012-2023 Inria. All rights reserved.
* Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
* Written by Advanced Micro Devices,
* See COPYING in top-level directory.
*/
/** \file
* \brief Macros to help interaction between hwloc and the ROCm SMI Management Library.
*
* Applications that use both hwloc and the ROCm SMI Management Library may want to
* include this file so as to get topology information for AMD GPU devices.
*/
#ifndef HWLOC_RSMI_H
#define HWLOC_RSMI_H
#include "hwloc.h"
#include "hwloc/autogen/config.h"
#include "hwloc/helper.h"
#ifdef HWLOC_LINUX_SYS
#include "hwloc/linux.h"
#endif
#include <rocm_smi/rocm_smi.h>
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup hwlocality_rsmi Interoperability with the ROCm SMI Management Library
*
* This interface offers ways to retrieve topology information about
* devices managed by the ROCm SMI Management Library.
*
* @{
*/
/** \brief Get the CPU set of logical processors that are physically
* close to AMD GPU device whose index is \p dv_ind.
*
* Store in \p set the CPU-set describing the locality of the AMD GPU device
* whose index is \p dv_ind.
*
* Topology \p topology and device \p dv_ind must match the local machine.
* I/O devices detection and the ROCm SMI component are not needed in the
* topology.
*
* The function only returns the locality of the device.
* If more information about the device is needed, OS objects should
* be used instead, see hwloc_rsmi_get_device_osdev()
* and hwloc_rsmi_get_device_osdev_by_index().
*
* This function is currently only implemented in a meaningful way for
* Linux; other systems will simply get a full cpuset.
*
* \return 0 on success.
* \return -1 on error, for instance if device information could not be found.
*/
static __hwloc_inline int
hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
uint32_t dv_ind, hwloc_cpuset_t set)
{
#ifdef HWLOC_LINUX_SYS
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
#define HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX 128
char path[HWLOC_RSMI_DEVICE_SYSFS_PATH_MAX];
rsmi_status_t ret;
uint64_t bdfid = 0;
unsigned domain, device, bus;
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return -1;
}
ret = rsmi_dev_pci_id_get(dv_ind, &bdfid);
if (RSMI_STATUS_SUCCESS != ret) {
errno = EINVAL;
return -1;
}
domain = (bdfid>>32) & 0xffffffff;
bus = ((bdfid & 0xffff)>>8) & 0xff;
device = ((bdfid & 0xff)>>3) & 0x1f;
sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.0/local_cpus", domain, bus, device);
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|| hwloc_bitmap_iszero(set))
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#else
/* Non-Linux systems simply get a full cpuset */
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
#endif
return 0;
}
/** \brief Get the hwloc OS device object corresponding to the
* AMD GPU device whose index is \p dv_ind.
*
* \return The hwloc OS device object describing the AMD GPU device whose
* index is \p dv_ind.
* \return \c NULL if none could be found.
*
* The topology \p topology does not necessarily have to match the current
* machine. For instance the topology may be an XML import of a remote host.
* I/O devices detection and the ROCm SMI component must be enabled in the
* topology.
*
* \note The corresponding PCI device object can be obtained by looking
* at the OS device parent object (unless PCI devices are filtered out).
*/
static __hwloc_inline hwloc_obj_t
hwloc_rsmi_get_device_osdev_by_index(hwloc_topology_t topology, uint32_t dv_ind)
{
hwloc_obj_t osdev = NULL;
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
if (HWLOC_OBJ_OSDEV_GPU == osdev->attr->osdev.type
&& osdev->name
&& !strncmp("rsmi", osdev->name, 4)
&& atoi(osdev->name + 4) == (int) dv_ind)
return osdev;
}
return NULL;
}
/** \brief Get the hwloc OS device object corresponding to AMD GPU device,
* whose index is \p dv_ind.
*
* \return The hwloc OS device object that describes the given
* AMD GPU, whose index is \p dv_ind.
* \return \c NULL if none could be found.
*
* Topology \p topology and device \p dv_ind must match the local machine.
* I/O devices detection and the ROCm SMI component must be enabled in the
* topology. If not, the locality of the object may still be found using
* hwloc_rsmi_get_device_cpuset().
*
* \note The corresponding hwloc PCI device may be found by looking
* at the result parent pointer (unless PCI devices are filtered out).
*/
static __hwloc_inline hwloc_obj_t
hwloc_rsmi_get_device_osdev(hwloc_topology_t topology, uint32_t dv_ind)
{
hwloc_obj_t osdev;
rsmi_status_t ret;
uint64_t bdfid = 0;
unsigned domain, device, bus, func;
uint64_t id;
char uuid[64];
if (!hwloc_topology_is_thissystem(topology)) {
errno = EINVAL;
return NULL;
}
ret = rsmi_dev_pci_id_get(dv_ind, &bdfid);
if (RSMI_STATUS_SUCCESS != ret) {
errno = EINVAL;
return NULL;
}
domain = (bdfid>>32) & 0xffffffff;
bus = ((bdfid & 0xffff)>>8) & 0xff;
device = ((bdfid & 0xff)>>3) & 0x1f;
func = bdfid & 0x7;
ret = rsmi_dev_unique_id_get(dv_ind, &id);
if (RSMI_STATUS_SUCCESS != ret)
uuid[0] = '\0';
else
sprintf(uuid, "%lx", id);
osdev = NULL;
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
hwloc_obj_t pcidev = osdev->parent;
const char *info;
if (strncmp(osdev->name, "rsmi", 4))
continue;
if (pcidev
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
&& pcidev->attr->pcidev.domain == domain
&& pcidev->attr->pcidev.bus == bus
&& pcidev->attr->pcidev.dev == device
&& pcidev->attr->pcidev.func == func)
return osdev;
info = hwloc_obj_get_info_by_name(osdev, "AMDUUID");
if (info && !strcmp(info, uuid))
return osdev;
}
return NULL;
}
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HWLOC_RSMI_H */

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2013-2018 Inria. All rights reserved. * Copyright © 2013-2023 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -48,6 +48,8 @@ extern "C" {
* This length (in bytes) must be used in hwloc_shmem_topology_write() * This length (in bytes) must be used in hwloc_shmem_topology_write()
* and hwloc_shmem_topology_adopt() later. * and hwloc_shmem_topology_adopt() later.
* *
* \return the length, or -1 on error, for instance if flags are invalid.
*
* \note Flags \p flags are currently unused, must be 0. * \note Flags \p flags are currently unused, must be 0.
*/ */
HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology, HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
@@ -74,9 +76,10 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
* is not. However the caller may also allocate it manually in shared memory * is not. However the caller may also allocate it manually in shared memory
* to share it as well. * to share it as well.
* *
* \return -1 with errno set to EBUSY if the virtual memory mapping defined * \return 0 on success.
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
* by \p mmap_address and \p length isn't available in the process. * by \p mmap_address and \p length isn't available in the process.
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address * \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
* or \p length aren't page-aligned. * or \p length aren't page-aligned.
*/ */
HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology, HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
@@ -112,14 +115,16 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
* *
* \note This function takes care of calling hwloc_topology_abi_check(). * \note This function takes care of calling hwloc_topology_abi_check().
* *
* \return -1 with errno set to EBUSY if the virtual memory mapping defined * \return 0 on success.
*
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
* by \p mmap_address and \p length isn't available in the process. * by \p mmap_address and \p length isn't available in the process.
* *
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address * \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
* or \p length aren't page-aligned, or do not match what was given to * or \p length aren't page-aligned, or do not match what was given to
* hwloc_shmem_topology_write() earlier. * hwloc_shmem_topology_write() earlier.
* *
* \return -1 with errno set to EINVAL if the layout of the topology structure * \return -1 with errno set to \c EINVAL if the layout of the topology structure
* is different between the writer process and the adopter process. * is different between the writer process and the adopter process.
*/ */
HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp, HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,

View File

@@ -0,0 +1,76 @@
/*
* Copyright © 2021 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
/** \file
* \brief Macros to help interaction between hwloc and Windows.
*
* Applications that use hwloc on Windows may want to include this file
* for Windows specific hwloc features.
*/
#ifndef HWLOC_WINDOWS_H
#define HWLOC_WINDOWS_H
#include "hwloc.h"
#ifdef __cplusplus
extern "C" {
#endif
/** \defgroup hwlocality_windows Windows-specific helpers
*
* These functions query Windows processor groups.
* These groups partition the operating system into virtual sets
* of up to 64 neighbor PUs.
* Threads and processes may only be bound inside a single group.
* Although Windows processor groups may be exposed in the hwloc
* hierarchy as hwloc Groups, they are also often merged into
* existing hwloc objects such as NUMA nodes or Packages.
* This API provides explicit information about Windows processor
* groups so that applications know whether binding to a large
* set of PUs may fail because it spans over multiple Windows
* processor groups.
*
* @{
*/
/** \brief Get the number of Windows processor groups
*
* \p flags must be 0 for now.
*
* \return at least \c 1 on success.
* \return -1 on error, for instance if the topology does not match
* the current system (e.g. loaded from another machine through XML).
*/
HWLOC_DECLSPEC int hwloc_windows_get_nr_processor_groups(hwloc_topology_t topology, unsigned long flags);
/** \brief Get the CPU-set of a Windows processor group.
*
* Get the set of PU included in the processor group specified
* by \p pg_index.
* \p pg_index must be between \c 0 and the value returned
* by hwloc_windows_get_nr_processor_groups() minus 1.
*
* \p flags must be 0 for now.
*
* \return \c 0 on success.
* \return \c -1 on error, for instance if \p pg_index is invalid,
* or if the topology does not match the current system (e.g. loaded
* from another machine through XML).
*/
HWLOC_DECLSPEC int hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags);
/** @} */
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* HWLOC_WINDOWS_H */

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009, 2011, 2012 CNRS. All rights reserved. * Copyright © 2009, 2011, 2012 CNRS. All rights reserved.
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2021 Inria. All rights reserved.
* Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved. * Copyright © 2009, 2011, 2012, 2015 Université Bordeaux. All rights reserved.
* Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2020 Cisco Systems, Inc. All rights reserved.
* $COPYRIGHT$ * $COPYRIGHT$
@@ -290,10 +290,6 @@
/* Define to '1' if sysctlbyname is present and usable */ /* Define to '1' if sysctlbyname is present and usable */
/* #undef HAVE_SYSCTLBYNAME */ /* #undef HAVE_SYSCTLBYNAME */
/* Define to 1 if the system has the type
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION 1
/* Define to 1 if the system has the type /* Define to 1 if the system has the type
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */ `SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1 #define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1

View File

@@ -11,6 +11,22 @@
#ifndef HWLOC_PRIVATE_CPUID_X86_H #ifndef HWLOC_PRIVATE_CPUID_X86_H
#define HWLOC_PRIVATE_CPUID_X86_H #define HWLOC_PRIVATE_CPUID_X86_H
/* A macro for annotating memory as uninitialized when building with MSAN
* (and otherwise having no effect). See below for why this is used with
* our custom assembly.
*/
#ifdef __has_feature
#define HWLOC_HAS_FEATURE(name) __has_feature(name)
#else
#define HWLOC_HAS_FEATURE(name) 0
#endif
#if HWLOC_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
#include <sanitizer/msan_interface.h>
#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len) __msan_unpoison(ptr, len)
#else
#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len)
#endif
#if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX) #if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX)
static __hwloc_inline int hwloc_have_x86_cpuid(void) static __hwloc_inline int hwloc_have_x86_cpuid(void)
{ {
@@ -71,12 +87,18 @@ static __hwloc_inline void hwloc_x86_cpuid(unsigned *eax, unsigned *ebx, unsigne
"movl %k2,%1\n\t" "movl %k2,%1\n\t"
: "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx), : "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx),
"+c" (*ecx), "=&d" (*edx)); "+c" (*ecx), "=&d" (*edx));
/* MSAN does not recognize the effect of the above assembly on the memory operand
* (`"=m"(*ebx)`). This may get improved in MSAN at some point in the future, e.g.
* see https://github.com/llvm/llvm-project/pull/77393. */
HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx);
#elif defined(HWLOC_X86_32_ARCH) #elif defined(HWLOC_X86_32_ARCH)
__asm__( __asm__(
"mov %%ebx,%1\n\t" "mov %%ebx,%1\n\t"
"cpuid\n\t" "cpuid\n\t"
"xchg %%ebx,%1\n\t" "xchg %%ebx,%1\n\t"
: "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx)); : "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx));
/* See above. */
HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx);
#else #else
#error unknown architecture #error unknown architecture
#endif #endif

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2018-2019 Inria. All rights reserved. * Copyright © 2018-2020 Inria. All rights reserved.
* *
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -31,6 +31,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_levelzero_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component; HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2019 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* Copyright © 2009-2012 Université Bordeaux * Copyright © 2009-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -186,7 +186,7 @@ hwloc_ffsl_from_ffs32(unsigned long x)
/** /**
* flsl helpers. * flsl helpers.
*/ */
#ifdef __GNUC_____ #ifdef __GNUC__
# if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4)) # if (__GNUC__ >= 4) || ((__GNUC__ == 3) && (__GNUC_MINOR__ >= 4))
# define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0) # define hwloc_flsl(x) ((x) ? (8*sizeof(long) - __builtin_clzl(x)) : 0)
@@ -504,7 +504,7 @@ hwloc__obj_type_is_icache(hwloc_obj_type_t type)
} \ } \
} while(0) } while(0)
#else /* HAVE_USELOCALE */ #else /* HAVE_USELOCALE */
#if __HWLOC_HAVE_ATTRIBUTE_UNUSED #if HWLOC_HAVE_ATTRIBUTE_UNUSED
#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused #define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
#define hwloc_localeswitch_init() #define hwloc_localeswitch_init()
#else #else
@@ -573,4 +573,35 @@ typedef SSIZE_T ssize_t;
# endif # endif
#endif #endif
static __inline float
hwloc__pci_link_speed(unsigned generation, unsigned lanes)
{
float lanespeed;
/*
* These are single-direction bandwidths only.
*
* Gen1 used NRZ with 8/10 encoding.
* PCIe Gen1 = 2.5GT/s signal-rate per lane x 8/10 = 0.25GB/s data-rate per lane
* PCIe Gen2 = 5 GT/s signal-rate per lane x 8/10 = 0.5 GB/s data-rate per lane
* Gen3 switched to NRZ with 128/130 encoding.
* PCIe Gen3 = 8 GT/s signal-rate per lane x 128/130 = 1 GB/s data-rate per lane
* PCIe Gen4 = 16 GT/s signal-rate per lane x 128/130 = 2 GB/s data-rate per lane
* PCIe Gen5 = 32 GT/s signal-rate per lane x 128/130 = 4 GB/s data-rate per lane
* Gen6 switched to PAM with with 242/256 FLIT (242B payload protected by 8B CRC + 6B FEC).
* PCIe Gen6 = 64 GT/s signal-rate per lane x 242/256 = 8 GB/s data-rate per lane
* PCIe Gen7 = 128GT/s signal-rate per lane x 242/256 = 16 GB/s data-rate per lane
*/
/* lanespeed in Gbit/s */
if (generation <= 2)
lanespeed = 2.5f * generation * 0.8f;
else if (generation <= 5)
lanespeed = 8.0f * (1<<(generation-3)) * 128/130;
else
lanespeed = 8.0f * (1<<(generation-3)) * 242/256; /* assume Gen8 will be 256 GT/s and so on */
/* linkspeed in GB/s */
return lanespeed * lanes / 8;
}
#endif /* HWLOC_PRIVATE_MISC_H */ #endif /* HWLOC_PRIVATE_MISC_H */

View File

@@ -1,578 +0,0 @@
/*
* Copyright © 2014 Cisco Systems, Inc. All rights reserved.
* Copyright © 2013-2014 University of Wisconsin-La Crosse.
* All rights reserved.
* Copyright © 2015-2017 Inria. All rights reserved.
*
* $COPYRIGHT$
*
* Additional copyrights may follow
* See COPYING in top-level directory.
*
* $HEADER$
*/
#ifndef _NETLOC_PRIVATE_H_
#define _NETLOC_PRIVATE_H_
#include <hwloc.h>
#include <netloc.h>
#include <netloc/uthash.h>
#include <netloc/utarray.h>
#include <private/autogen/config.h>
#define NETLOCFILE_VERSION 1
#ifdef NETLOC_SCOTCH
#include <stdint.h>
#include <scotch.h>
#define NETLOC_int SCOTCH_Num
#else
#define NETLOC_int int
#endif
/*
* "Import" a few things from hwloc
*/
#define __netloc_attribute_unused __hwloc_attribute_unused
#define __netloc_attribute_malloc __hwloc_attribute_malloc
#define __netloc_attribute_const __hwloc_attribute_const
#define __netloc_attribute_pure __hwloc_attribute_pure
#define __netloc_attribute_deprecated __hwloc_attribute_deprecated
#define __netloc_attribute_may_alias __hwloc_attribute_may_alias
#define NETLOC_DECLSPEC HWLOC_DECLSPEC
/**********************************************************************
* Types
**********************************************************************/
/**
* Definitions for Comparators
* \sa These are the return values from the following functions:
* netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare
*/
typedef enum {
NETLOC_CMP_SAME = 0, /**< Compared as the Same */
NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */
NETLOC_CMP_DIFF = -2 /**< Compared as Different */
} netloc_compare_type_t;
/**
* Enumerated type for the various types of supported networks
*/
typedef enum {
NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */
NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */
NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */
} netloc_network_type_t;
/**
* Enumerated type for the various types of supported topologies
*/
typedef enum {
NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */
NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */
} netloc_topology_type_t;
/**
* Enumerated type for the various types of nodes
*/
typedef enum {
NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */
NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */
NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */
} netloc_node_type_t;
typedef enum {
NETLOC_ARCH_TREE = 0, /* Fat tree */
} netloc_arch_type_t;
/* Pre declarations to avoid inter dependency problems */
/** \cond IGNORE */
struct netloc_topology_t;
typedef struct netloc_topology_t netloc_topology_t;
struct netloc_node_t;
typedef struct netloc_node_t netloc_node_t;
struct netloc_edge_t;
typedef struct netloc_edge_t netloc_edge_t;
struct netloc_physical_link_t;
typedef struct netloc_physical_link_t netloc_physical_link_t;
struct netloc_path_t;
typedef struct netloc_path_t netloc_path_t;
struct netloc_arch_tree_t;
typedef struct netloc_arch_tree_t netloc_arch_tree_t;
struct netloc_arch_node_t;
typedef struct netloc_arch_node_t netloc_arch_node_t;
struct netloc_arch_node_slot_t;
typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t;
struct netloc_arch_t;
typedef struct netloc_arch_t netloc_arch_t;
/** \endcond */
/**
* \struct netloc_topology_t
* \brief Netloc Topology Context
*
* An opaque data structure used to reference a network topology.
*
* \note Must be initialized with \ref netloc_topology_construct()
*/
struct netloc_topology_t {
/** Topology path */
char *topopath;
/** Subnet ID */
char *subnet_id;
/** Node List */
netloc_node_t *nodes; /* Hash table of nodes by physical_id */
netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */
netloc_physical_link_t *physical_links; /* Hash table with physcial links */
/** Partition List */
UT_array *partitions;
/** Hwloc topology List */
char *hwlocpath;
UT_array *topos;
hwloc_topology_t *hwloc_topos;
/** Type of the graph */
netloc_topology_type_t type;
};
/**
* \brief Netloc Node Type
*
* Represents the concept of a node (a.k.a., vertex, endpoint) within a network
* graph. This could be a server or a network switch. The \ref node_type parameter
* will distinguish the exact type of node this represents in the graph.
*/
struct netloc_node_t {
UT_hash_handle hh; /* makes this structure hashable with physical_id */
UT_hash_handle hh2; /* makes this structure hashable with hostname */
/** Physical ID of the node */
char physical_id[20];
/** Logical ID of the node (if any) */
int logical_id;
/** Type of the node */
netloc_node_type_t type;
/* Pointer to physical_links */
UT_array *physical_links;
/** Description information from discovery (if any) */
char *description;
/**
* Application-given private data pointer.
* Initialized to NULL, and not used by the netloc library.
*/
void * userdata;
/** Outgoing edges from this node */
netloc_edge_t *edges;
UT_array *subnodes; /* the group of nodes for the virtual nodes */
netloc_path_t *paths;
char *hostname;
UT_array *partitions; /* index in the list from the topology */
hwloc_topology_t hwlocTopo;
int hwlocTopoIdx;
};
/**
* \brief Netloc Edge Type
*
* Represents the concept of a directed edge within a network graph.
*
* \note We do not point to the netloc_node_t structure directly to
* simplify the representation, and allow the information to more easily
* be entered into the data store without circular references.
* \todo JJH Is the note above still true?
*/
struct netloc_edge_t {
UT_hash_handle hh; /* makes this structure hashable */
netloc_node_t *dest;
int id;
/** Pointers to the parent node */
netloc_node_t *node;
/* Pointer to physical_links */
UT_array *physical_links;
/** total gbits of the links */
float total_gbits;
UT_array *partitions; /* index in the list from the topology */
UT_array *subnode_edges; /* for edges going to virtual nodes */
struct netloc_edge_t *other_way;
/**
* Application-given private data pointer.
* Initialized to NULL, and not used by the netloc library.
*/
void * userdata;
};
struct netloc_physical_link_t {
UT_hash_handle hh; /* makes this structure hashable */
int id; // TODO long long
netloc_node_t *src;
netloc_node_t *dest;
int ports[2];
char *width;
char *speed;
netloc_edge_t *edge;
int other_way_id;
struct netloc_physical_link_t *other_way;
UT_array *partitions; /* index in the list from the topology */
/** gbits of the link from speed and width */
float gbits;
/** Description information from discovery (if any) */
char *description;
};
struct netloc_path_t {
UT_hash_handle hh; /* makes this structure hashable */
char dest_id[20];
UT_array *links;
};
/**********************************************************************
* Architecture structures
**********************************************************************/
struct netloc_arch_tree_t {
NETLOC_int num_levels;
NETLOC_int *degrees;
NETLOC_int *cost;
};
struct netloc_arch_node_t {
UT_hash_handle hh; /* makes this structure hashable */
char *name; /* Hash key */
netloc_node_t *node; /* Corresponding node */
int idx_in_topo; /* idx with ghost hosts to have complete topo */
int num_slots; /* it is not the real number of slots but the maximum slot idx */
int *slot_idx; /* corresponding idx in slot_tree */
int *slot_os_idx; /* corresponding os index for each leaf in tree */
netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */
int num_current_slots; /* Number of PUs */
NETLOC_int *current_slots; /* indices in the complete tree */
int *slot_ranks; /* corresponding MPI rank for each leaf in tree */
};
struct netloc_arch_node_slot_t {
netloc_arch_node_t *node;
int slot;
};
struct netloc_arch_t {
netloc_topology_t *topology;
int has_slots; /* if slots are included in the architecture */
netloc_arch_type_t type;
union {
netloc_arch_tree_t *node_tree;
netloc_arch_tree_t *global_tree;
} arch;
netloc_arch_node_t *nodes_by_name;
netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */
NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */
NETLOC_int *current_hosts; /* indices in the complete topology */
};
/**********************************************************************
* Topology Functions
**********************************************************************/
/**
* Allocate a topology handle.
*
* User is responsible for calling \ref netloc_detach on the topology handle.
* The network parameter information is deep copied into the topology handle, so the
* user may destruct the network handle after calling this function and/or reuse
* the network handle.
*
* \returns NETLOC_SUCCESS on success
* \returns NETLOC_ERROR upon an error.
*/
netloc_topology_t *netloc_topology_construct(char *path);
/**
* Destruct a topology handle
*
* \param topology A valid pointer to a \ref netloc_topology_t handle created
* from a prior call to \ref netloc_topology_construct.
*
* \returns NETLOC_SUCCESS on success
* \returns NETLOC_ERROR upon an error.
*/
int netloc_topology_destruct(netloc_topology_t *topology);
int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name);
int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
netloc_node_t **node_list);
#define netloc_topology_iter_partitions(topology,partition) \
for ((partition) = (char **)utarray_front(topology->partitions); \
(partition) != NULL; \
(partition) = (char **)utarray_next(topology->partitions, partition))
#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \
for ((hwloctopo) = (char **)utarray_front(topology->topos); \
(hwloctopo) != NULL; \
(hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo))
#define netloc_topology_find_node(topology,node_id,node) \
HASH_FIND_STR(topology->nodes, node_id, node)
#define netloc_topology_iter_nodes(topology,node,_tmp) \
HASH_ITER(hh, topology->nodes, node, _tmp)
#define netloc_topology_num_nodes(topology) \
HASH_COUNT(topology->nodes)
/*************************************************/
/**
* Constructor for netloc_node_t
*
* User is responsible for calling the destructor on the handle.
*
* Returns
* A newly allocated pointer to the network information.
*/
netloc_node_t *netloc_node_construct(void);
/**
* Destructor for netloc_node_t
*
* \param node A valid node handle
*
* Returns
* NETLOC_SUCCESS on success
* NETLOC_ERROR on error
*/
int netloc_node_destruct(netloc_node_t *node);
char *netloc_node_pretty_print(netloc_node_t* node);
#define netloc_node_get_num_subnodes(node) \
utarray_len((node)->subnodes)
#define netloc_node_get_subnode(node,i) \
(*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i)))
#define netloc_node_get_num_edges(node) \
utarray_len((node)->edges)
#define netloc_node_get_edge(node,i) \
(*(netloc_edge_t **)utarray_eltptr((node)->edges, (i)))
#define netloc_node_iter_edges(node,edge,_tmp) \
HASH_ITER(hh, node->edges, edge, _tmp)
#define netloc_node_iter_paths(node,path,_tmp) \
HASH_ITER(hh, node->paths, path, _tmp)
#define netloc_node_is_host(node) \
(node->type == NETLOC_NODE_TYPE_HOST)
#define netloc_node_is_switch(node) \
(node->type == NETLOC_NODE_TYPE_SWITCH)
#define netloc_node_iter_paths(node, path,_tmp) \
HASH_ITER(hh, node->paths, path, _tmp)
int netloc_node_is_in_partition(netloc_node_t *node, int partition);
/*************************************************/
/**
* Constructor for netloc_edge_t
*
* User is responsible for calling the destructor on the handle.
*
* Returns
* A newly allocated pointer to the edge information.
*/
netloc_edge_t *netloc_edge_construct(void);
/**
* Destructor for netloc_edge_t
*
* \param edge A valid edge handle
*
* Returns
* NETLOC_SUCCESS on success
* NETLOC_ERROR on error
*/
int netloc_edge_destruct(netloc_edge_t *edge);
char * netloc_edge_pretty_print(netloc_edge_t* edge);
void netloc_edge_reset_uid(void);
int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition);
#define netloc_edge_get_num_links(edge) \
utarray_len((edge)->physical_links)
#define netloc_edge_get_link(edge,i) \
(*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i)))
#define netloc_edge_get_num_subedges(edge) \
utarray_len((edge)->subnode_edges)
#define netloc_edge_get_subedge(edge,i) \
(*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i)))
/*************************************************/
/**
* Constructor for netloc_physical_link_t
*
* User is responsible for calling the destructor on the handle.
*
* Returns
* A newly allocated pointer to the physical link information.
*/
netloc_physical_link_t * netloc_physical_link_construct(void);
/**
* Destructor for netloc_physical_link_t
*
* Returns
* NETLOC_SUCCESS on success
* NETLOC_ERROR on error
*/
int netloc_physical_link_destruct(netloc_physical_link_t *link);
char * netloc_link_pretty_print(netloc_physical_link_t* link);
/*************************************************/
netloc_path_t *netloc_path_construct(void);
int netloc_path_destruct(netloc_path_t *path);
/**********************************************************************
* Architecture functions
**********************************************************************/
netloc_arch_t * netloc_arch_construct(void);
int netloc_arch_destruct(netloc_arch_t *arch);
int netloc_arch_build(netloc_arch_t *arch, int add_slots);
int netloc_arch_set_current_resources(netloc_arch_t *arch);
int netloc_arch_set_global_resources(netloc_arch_t *arch);
int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch);
void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
int num_hosts, int **parch_idx);
NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree);
/**********************************************************************
* Access functions of various elements of the topology
**********************************************************************/
#define netloc_get_num_partitions(object) \
utarray_len((object)->partitions)
#define netloc_get_partition(object,i) \
(*(int *)utarray_eltptr((object)->partitions, (i)))
#define netloc_path_iter_links(path,link) \
for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \
(link) != NULL; \
(link) = (netloc_physical_link_t **)utarray_next(path->links, link))
/**********************************************************************
* Misc functions
**********************************************************************/
/**
* Decode the network type
*
* \param net_type A valid member of the \ref netloc_network_type_t type
*
* \returns NULL if the type is invalid
* \returns A string for that \ref netloc_network_type_t type
*/
static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) {
if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) {
return "ETH";
}
else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) {
return "IB";
}
else {
return NULL;
}
}
/**
* Decode the node type
*
* \param node_type A valid member of the \ref netloc_node_type_t type
*
* \returns NULL if the type is invalid
* \returns A string for that \ref netloc_node_type_t type
*/
static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) {
if( NETLOC_NODE_TYPE_SWITCH == node_type ) {
return "SW";
}
else if( NETLOC_NODE_TYPE_HOST == node_type ) {
return "CA";
}
else {
return NULL;
}
}
ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream);
char *netloc_line_get_next_token(char **string, char c);
int netloc_build_comm_mat(char *filename, int *pn, double ***pmat);
#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str))
#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str)
#endif // _NETLOC_PRIVATE_H_

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2025 Inria. All rights reserved.
* Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2012, 2020 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* *
@@ -166,6 +166,7 @@ struct hwloc_topology {
unsigned long kind; unsigned long kind;
#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */ #define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */
#define HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED (1U<<1) /* if the distances isn't in the list yet */
unsigned iflags; unsigned iflags;
/* objects are currently stored in physical_index order */ /* objects are currently stored in physical_index order */
@@ -244,6 +245,12 @@ struct hwloc_topology {
* temporary variables during discovery * temporary variables during discovery
*/ */
/* set to 1 at the beginning of load() if the filter of any cpu cache type (L1 to L3i) is not NONE,
* may be checked by backends before querying caches
* (when they don't know the level of caches they are querying).
*/
int want_some_cpu_caches;
/* machine-wide memory. /* machine-wide memory.
* temporarily stored there by OSes that only provide this without NUMA information, * temporarily stored there by OSes that only provide this without NUMA information,
* and actually used later by the core. * and actually used later by the core.
@@ -258,6 +265,7 @@ struct hwloc_topology {
unsigned bus_first, bus_last; unsigned bus_first, bus_last;
hwloc_bitmap_t cpuset; hwloc_bitmap_t cpuset;
} * pci_forced_locality; } * pci_forced_locality;
hwloc_uint64_t pci_locality_quirks;
/* component blacklisting */ /* component blacklisting */
unsigned nr_blacklisted_components; unsigned nr_blacklisted_components;
@@ -294,6 +302,9 @@ extern void hwloc__reorder_children(hwloc_obj_t parent);
extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology); extern void hwloc_topology_setup_defaults(struct hwloc_topology *topology);
extern void hwloc_topology_clear(struct hwloc_topology *topology); extern void hwloc_topology_clear(struct hwloc_topology *topology);
#define _HWLOC_RECONNECT_FLAG_KEEPSTRUCTURE (1UL<<0)
extern int hwloc__reconnect(struct hwloc_topology *topology, unsigned long flags);
/* insert memory object as memory child of normal parent */ /* insert memory object as memory child of normal parent */
extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent, extern struct hwloc_obj * hwloc__attach_memory_object(struct hwloc_topology *topology, hwloc_obj_t parent,
hwloc_obj_t obj, const char *reason); hwloc_obj_t obj, const char *reason);
@@ -304,11 +315,6 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology); extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology); extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology);
/* Look for an object matching the given domain/bus/func,
* either exactly or return the smallest container bridge
*/
extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
/* Look for an object matching complete cpuset exactly, or insert one. /* Look for an object matching complete cpuset exactly, or insert one.
* Return NULL on failure. * Return NULL on failure.
* Return a good fallback (object above) on failure to insert. * Return a good fallback (object above) on failure to insert.
@@ -408,10 +414,14 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology);
extern void hwloc_internal_distances_destroy(hwloc_topology_t topology); extern void hwloc_internal_distances_destroy(hwloc_topology_t topology);
extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old); extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old);
extern void hwloc_internal_distances_refresh(hwloc_topology_t topology); extern void hwloc_internal_distances_refresh(hwloc_topology_t topology);
extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology); extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
/* these distances_add() functions are higher-level than those in hwloc/plugins.h
* but they may change in the future, hence they are not exported to plugins.
*/
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
extern void hwloc_internal_memattrs_init(hwloc_topology_t topology); extern void hwloc_internal_memattrs_init(hwloc_topology_t topology);
extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology); extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology);
extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology); extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology);
@@ -419,6 +429,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology);
extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology); extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology);
extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old); extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old);
extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value); extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value);
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype);
extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology); extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology);
extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology); extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology);
@@ -475,11 +486,13 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */ #define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */ #define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */ #define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */
/* then, OS-specific groups */ /* then, OS-specific groups */
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */ #define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */ #define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */ #define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */
#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */ #define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */
#define HWLOC_GROUP_KIND_LINUX_CLUSTER 222 /* no subkind */
/* distance groups */ /* distance groups */
#define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */ #define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */
/* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */ /* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */

View File

@@ -0,0 +1,30 @@
/*
* Copyright © 2009 Université Bordeaux
* Copyright © 2020-2022 Inria. All rights reserved.
*
* See COPYING in top-level directory.
*/
#ifndef HWLOC_PRIVATE_WINDOWS_H
#define HWLOC_PRIVATE_WINDOWS_H
#ifndef _ANONYMOUS_UNION
#ifdef __GNUC__
#define _ANONYMOUS_UNION __extension__
#else
#define _ANONYMOUS_UNION
#endif /* __GNUC__ */
#endif /* _ANONYMOUS_UNION */
#ifndef _ANONYMOUS_STRUCT
#ifdef __GNUC__
#define _ANONYMOUS_STRUCT __extension__
#else
#define _ANONYMOUS_STRUCT
#endif /* __GNUC__ */
#endif /* _ANONYMOUS_STRUCT */
#define DUMMYUNIONNAME
#define DUMMYSTRUCTNAME
#endif /* HWLOC_PRIVATE_WINDOWS_H */

View File

@@ -19,13 +19,14 @@ HWLOC_DECLSPEC int hwloc__xml_verbose(void);
typedef struct hwloc__xml_import_state_s { typedef struct hwloc__xml_import_state_s {
struct hwloc__xml_import_state_s *parent; struct hwloc__xml_import_state_s *parent;
/* globals shared because the entire stack of states during import */ /* globals shared between the entire stack of states during import */
struct hwloc_xml_backend_data_s *global; struct hwloc_xml_backend_data_s *global;
/* opaque data used to store backend-specific data. /* opaque data used to store backend-specific data.
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs. * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
* libxml is 3 ptrs. nolibxml is 3 ptr + one int.
*/ */
char data[32]; char data[4 * SIZEOF_VOID_P];
} * hwloc__xml_import_state_t; } * hwloc__xml_import_state_t;
struct hwloc__xml_imported_v1distances_s { struct hwloc__xml_imported_v1distances_s {
@@ -74,8 +75,9 @@ typedef struct hwloc__xml_export_state_s {
/* opaque data used to store backend-specific data. /* opaque data used to store backend-specific data.
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs. * statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
* libxml is 1 ptr. nolibxml is 1 ptr + 2 size_t + 3 ints.
*/ */
char data[40]; char data[6 * SIZEOF_VOID_P];
} * hwloc__xml_export_state_t; } * hwloc__xml_export_state_t;
HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags); HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags);

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* Copyright © 2009-2010, 2012 Université Bordeaux * Copyright © 2009-2010, 2012 Université Bordeaux
* Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved. * Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -287,6 +287,7 @@ static __hwloc_inline int hwloc__check_membind_policy(hwloc_membind_policy_t pol
|| policy == HWLOC_MEMBIND_FIRSTTOUCH || policy == HWLOC_MEMBIND_FIRSTTOUCH
|| policy == HWLOC_MEMBIND_BIND || policy == HWLOC_MEMBIND_BIND
|| policy == HWLOC_MEMBIND_INTERLEAVE || policy == HWLOC_MEMBIND_INTERLEAVE
|| policy == HWLOC_MEMBIND_WEIGHTED_INTERLEAVE
|| policy == HWLOC_MEMBIND_NEXTTOUCH) || policy == HWLOC_MEMBIND_NEXTTOUCH)
return 0; return 0;
return -1; return -1;

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -245,6 +245,7 @@ int hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s *
/* Strings always use 32bit groups */ /* Strings always use 32bit groups */
#define HWLOC_PRIxSUBBITMAP "%08lx" #define HWLOC_PRIxSUBBITMAP "%08lx"
#define HWLOC_BITMAP_SUBSTRING_SIZE 32 #define HWLOC_BITMAP_SUBSTRING_SIZE 32
#define HWLOC_BITMAP_SUBSTRING_FULL_VALUE 0xFFFFFFFFUL
#define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4) #define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4)
#define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE) #define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE)
@@ -261,6 +262,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
const unsigned long accum_mask = ~0UL; const unsigned long accum_mask = ~0UL;
#else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ #else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE); const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
int merge_with_infinite_prefix = 0;
#endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */ #endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
HWLOC__BITMAP_CHECK(set); HWLOC__BITMAP_CHECK(set);
@@ -279,6 +281,9 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
res = size>0 ? (int)size - 1 : 0; res = size>0 ? (int)size - 1 : 0;
tmp += res; tmp += res;
size -= res; size -= res;
#if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE
merge_with_infinite_prefix = 1;
#endif
} }
i=(int) set->ulongs_count-1; i=(int) set->ulongs_count-1;
@@ -294,16 +299,24 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
} }
while (i>=0 || accumed) { while (i>=0 || accumed) {
unsigned long value;
/* Refill accumulator */ /* Refill accumulator */
if (!accumed) { if (!accumed) {
accum = set->ulongs[i--]; accum = set->ulongs[i--];
accumed = HWLOC_BITS_PER_LONG; accumed = HWLOC_BITS_PER_LONG;
} }
value = (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
if (accum & accum_mask) { #if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE
if (merge_with_infinite_prefix && value == HWLOC_BITMAP_SUBSTRING_FULL_VALUE) {
/* first full subbitmap merged with infinite prefix */
res = 0;
} else
#endif
if (value) {
/* print the whole subset if not empty */ /* print the whole subset if not empty */
res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, value);
(accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE));
needcomma = 1; needcomma = 1;
} else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) { } else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) {
/* print a single 0 to mark the last subset */ /* print a single 0 to mark the last subset */
@@ -323,6 +336,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
#else #else
accum <<= HWLOC_BITMAP_SUBSTRING_SIZE; accum <<= HWLOC_BITMAP_SUBSTRING_SIZE;
accumed -= HWLOC_BITMAP_SUBSTRING_SIZE; accumed -= HWLOC_BITMAP_SUBSTRING_SIZE;
merge_with_infinite_prefix = 0;
#endif #endif
if (res >= size) if (res >= size)
@@ -362,7 +376,8 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric
{ {
const char * current = string; const char * current = string;
unsigned long accum = 0; unsigned long accum = 0;
int count=0; int count = 0;
int ulongcount;
int infinite = 0; int infinite = 0;
/* count how many substrings there are */ /* count how many substrings there are */
@@ -383,9 +398,20 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric
count--; count--;
} }
if (hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG) < 0) ulongcount = (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG;
if (hwloc_bitmap_reset_by_ulongs(set, ulongcount) < 0)
return -1; return -1;
set->infinite = 0;
set->infinite = 0; /* will be updated later */
#if HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE
if (infinite && (count % HWLOC_BITMAP_STRING_PER_LONG) != 0) {
/* accumulate substrings of the first ulong that are hidden in the infinite prefix */
int i;
for(i = (count % HWLOC_BITMAP_STRING_PER_LONG); i < HWLOC_BITMAP_STRING_PER_LONG; i++)
accum |= (HWLOC_BITMAP_SUBSTRING_FULL_VALUE << (i*HWLOC_BITMAP_SUBSTRING_SIZE));
}
#endif
while (*current != '\0') { while (*current != '\0') {
unsigned long val; unsigned long val;
@@ -544,6 +570,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
ssize_t size = buflen; ssize_t size = buflen;
char *tmp = buf; char *tmp = buf;
int res, ret = 0; int res, ret = 0;
#if HWLOC_BITS_PER_LONG == 64
int merge_with_infinite_prefix = 0;
#endif
int started = 0; int started = 0;
int i; int i;
@@ -563,6 +592,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
res = size>0 ? (int)size - 1 : 0; res = size>0 ? (int)size - 1 : 0;
tmp += res; tmp += res;
size -= res; size -= res;
#if HWLOC_BITS_PER_LONG == 64
merge_with_infinite_prefix = 1;
#endif
} }
i=set->ulongs_count-1; i=set->ulongs_count-1;
@@ -582,7 +614,11 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
if (started) { if (started) {
/* print the whole subset */ /* print the whole subset */
#if HWLOC_BITS_PER_LONG == 64 #if HWLOC_BITS_PER_LONG == 64
res = hwloc_snprintf(tmp, size, "%016lx", val); if (merge_with_infinite_prefix && (val & 0xffffffff00000000UL) == 0xffffffff00000000UL) {
res = hwloc_snprintf(tmp, size, "%08lx", val & 0xffffffffUL);
} else {
res = hwloc_snprintf(tmp, size, "%016lx", val);
}
#else #else
res = hwloc_snprintf(tmp, size, "%08lx", val); res = hwloc_snprintf(tmp, size, "%08lx", val);
#endif #endif
@@ -599,6 +635,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
res = size>0 ? (int)size - 1 : 0; res = size>0 ? (int)size - 1 : 0;
tmp += res; tmp += res;
size -= res; size -= res;
#if HWLOC_BITS_PER_LONG == 64
merge_with_infinite_prefix = 0;
#endif
} }
/* if didn't display anything, display 0x0 */ /* if didn't display anything, display 0x0 */
@@ -679,6 +718,10 @@ int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc
goto failed; goto failed;
set->ulongs[count-1] = val; set->ulongs[count-1] = val;
if (infinite && tmpchars != HWLOC_BITS_PER_LONG/4) {
/* infinite prefix with partial substring, fill remaining bits */
set->ulongs[count-1] |= (~0ULL)<<(4*tmpchars);
}
current += tmpchars; current += tmpchars;
chars -= tmpchars; chars -= tmpchars;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2022 Inria. All rights reserved.
* Copyright © 2012 Université Bordeaux * Copyright © 2012 Université Bordeaux
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -94,8 +94,7 @@ static hwloc_dlhandle hwloc_dlopenext(const char *_filename)
{ {
hwloc_dlhandle handle; hwloc_dlhandle handle;
char *filename = NULL; char *filename = NULL;
(void) asprintf(&filename, "%s.so", _filename); if (asprintf(&filename, "%s.so", _filename) < 0)
if (!filename)
return NULL; return NULL;
handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL); handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL);
free(filename); free(filename);
@@ -124,7 +123,7 @@ hwloc_dlforeachfile(const char *_paths,
*colon = '\0'; *colon = '\0';
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, " Looking under %s\n", path); fprintf(stderr, "hwloc: Looking under %s\n", path);
dir = opendir(path); dir = opendir(path);
if (!dir) if (!dir)
@@ -198,7 +197,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
char *componentsymbolname; char *componentsymbolname;
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin dlforeach found `%s'\n", filename); fprintf(stderr, "hwloc: Plugin dlforeach found `%s'\n", filename);
basename = strrchr(filename, '/'); basename = strrchr(filename, '/');
if (!basename) if (!basename)
@@ -208,7 +207,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) { if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin `%s' is blacklisted in the environment\n", basename); fprintf(stderr, "hwloc: Plugin `%s' is blacklisted in the environment\n", basename);
goto out; goto out;
} }
@@ -216,14 +215,14 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
handle = hwloc_dlopenext(filename); handle = hwloc_dlopenext(filename);
if (!handle) { if (!handle) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Failed to load plugin: %s\n", hwloc_dlerror()); fprintf(stderr, "hwloc: Failed to load plugin: %s\n", hwloc_dlerror());
goto out; goto out;
} }
componentsymbolname = malloc(strlen(basename)+10+1); componentsymbolname = malloc(strlen(basename)+10+1);
if (!componentsymbolname) { if (!componentsymbolname) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Failed to allocation component `%s' symbol\n", fprintf(stderr, "hwloc: Failed to allocation component `%s' symbol\n",
basename); basename);
goto out_with_handle; goto out_with_handle;
} }
@@ -231,38 +230,38 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
component = hwloc_dlsym(handle, componentsymbolname); component = hwloc_dlsym(handle, componentsymbolname);
if (!component) { if (!component) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Failed to find component symbol `%s'\n", fprintf(stderr, "hwloc: Failed to find component symbol `%s'\n",
componentsymbolname); componentsymbolname);
free(componentsymbolname); free(componentsymbolname);
goto out_with_handle; goto out_with_handle;
} }
if (component->abi != HWLOC_COMPONENT_ABI) { if (component->abi != HWLOC_COMPONENT_ABI) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin symbol ABI %u instead of %d\n", fprintf(stderr, "hwloc: Plugin symbol ABI %u instead of %d\n",
component->abi, HWLOC_COMPONENT_ABI); component->abi, HWLOC_COMPONENT_ABI);
free(componentsymbolname); free(componentsymbolname);
goto out_with_handle; goto out_with_handle;
} }
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin contains expected symbol `%s'\n", fprintf(stderr, "hwloc: Plugin contains expected symbol `%s'\n",
componentsymbolname); componentsymbolname);
free(componentsymbolname); free(componentsymbolname);
if (HWLOC_COMPONENT_TYPE_DISC == component->type) { if (HWLOC_COMPONENT_TYPE_DISC == component->type) {
if (strncmp(basename, "hwloc_", 6)) { if (strncmp(basename, "hwloc_", 6)) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin name `%s' doesn't match its type DISCOVERY\n", basename); fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type DISCOVERY\n", basename);
goto out_with_handle; goto out_with_handle;
} }
} else if (HWLOC_COMPONENT_TYPE_XML == component->type) { } else if (HWLOC_COMPONENT_TYPE_XML == component->type) {
if (strncmp(basename, "hwloc_xml_", 10)) { if (strncmp(basename, "hwloc_xml_", 10)) {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin name `%s' doesn't match its type XML\n", basename); fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type XML\n", basename);
goto out_with_handle; goto out_with_handle;
} }
} else { } else {
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin name `%s' has invalid type %u\n", fprintf(stderr, "hwloc: Plugin name `%s' has invalid type %u\n",
basename, (unsigned) component->type); basename, (unsigned) component->type);
goto out_with_handle; goto out_with_handle;
} }
@@ -277,7 +276,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
desc->handle = handle; desc->handle = handle;
desc->next = NULL; desc->next = NULL;
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin descriptor `%s' ready\n", basename); fprintf(stderr, "hwloc: Plugin descriptor `%s' ready\n", basename);
/* append to the list */ /* append to the list */
prevdesc = &hwloc_plugins; prevdesc = &hwloc_plugins;
@@ -285,7 +284,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
prevdesc = &((*prevdesc)->next); prevdesc = &((*prevdesc)->next);
*prevdesc = desc; *prevdesc = desc;
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Plugin descriptor `%s' queued\n", basename); fprintf(stderr, "hwloc: Plugin descriptor `%s' queued\n", basename);
return 0; return 0;
out_with_handle: out_with_handle:
@@ -300,7 +299,7 @@ hwloc_plugins_exit(void)
struct hwloc__plugin_desc *desc, *next; struct hwloc__plugin_desc *desc, *next;
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Closing all plugins\n"); fprintf(stderr, "hwloc: Closing all plugins\n");
desc = hwloc_plugins; desc = hwloc_plugins;
while (desc) { while (desc) {
@@ -340,7 +339,7 @@ hwloc_plugins_init(void)
hwloc_plugins = NULL; hwloc_plugins = NULL;
if (hwloc_plugins_verbose) if (hwloc_plugins_verbose)
fprintf(stderr, "Starting plugin dlforeach in %s\n", path); fprintf(stderr, "hwloc: Starting plugin dlforeach in %s\n", path);
err = hwloc_dlforeachfile(path, hwloc__dlforeach_cb, NULL); err = hwloc_dlforeachfile(path, hwloc__dlforeach_cb, NULL);
if (err) if (err)
goto out_with_init; goto out_with_init;
@@ -364,14 +363,14 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
/* check that the component name is valid */ /* check that the component name is valid */
if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) { if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n"); fprintf(stderr, "hwloc: Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n");
return -1; return -1;
} }
if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR) if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR)
|| strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR) || strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR)
|| strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) { || strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n", fprintf(stderr, "hwloc: Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
component->name, HWLOC_COMPONENT_EXCLUDE_CHAR); component->name, HWLOC_COMPONENT_EXCLUDE_CHAR);
return -1; return -1;
} }
@@ -386,8 +385,9 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
|HWLOC_DISC_PHASE_MISC |HWLOC_DISC_PHASE_MISC
|HWLOC_DISC_PHASE_ANNOTATE |HWLOC_DISC_PHASE_ANNOTATE
|HWLOC_DISC_PHASE_TWEAK))) { |HWLOC_DISC_PHASE_TWEAK))) {
fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n", if (HWLOC_SHOW_CRITICAL_ERRORS())
component->name, component->phases); fprintf(stderr, "hwloc: Cannot register discovery component `%s' with invalid phases 0x%x\n",
component->name, component->phases);
return -1; return -1;
} }
@@ -398,13 +398,13 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
if ((*prev)->priority < component->priority) { if ((*prev)->priority < component->priority) {
/* drop the existing component */ /* drop the existing component */
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Dropping previously registered discovery component `%s', priority %u lower than new one %u\n", fprintf(stderr, "hwloc: Dropping previously registered discovery component `%s', priority %u lower than new one %u\n",
(*prev)->name, (*prev)->priority, component->priority); (*prev)->name, (*prev)->priority, component->priority);
*prev = (*prev)->next; *prev = (*prev)->next;
} else { } else {
/* drop the new one */ /* drop the new one */
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n", fprintf(stderr, "hwloc: Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n",
component->name, component->priority, (*prev)->priority); component->name, component->priority, (*prev)->priority);
return -1; return -1;
} }
@@ -412,7 +412,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
prev = &((*prev)->next); prev = &((*prev)->next);
} }
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n", fprintf(stderr, "hwloc: Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n",
component->name, component->phases, component->priority, component->name, component->phases, component->priority,
filename ? "from plugin " : "statically build", filename ? filename : ""); filename ? "from plugin " : "statically build", filename ? filename : "");
@@ -475,15 +475,16 @@ hwloc_components_init(void)
/* hwloc_static_components is created by configure in static-components.h */ /* hwloc_static_components is created by configure in static-components.h */
for(i=0; NULL != hwloc_static_components[i]; i++) { for(i=0; NULL != hwloc_static_components[i]; i++) {
if (hwloc_static_components[i]->flags) { if (hwloc_static_components[i]->flags) {
fprintf(stderr, "Ignoring static component with invalid flags %lx\n", if (HWLOC_SHOW_CRITICAL_ERRORS())
hwloc_static_components[i]->flags); fprintf(stderr, "hwloc: Ignoring static component with invalid flags %lx\n",
hwloc_static_components[i]->flags);
continue; continue;
} }
/* initialize the component */ /* initialize the component */
if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) { if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Ignoring static component, failed to initialize\n"); fprintf(stderr, "hwloc: Ignoring static component, failed to initialize\n");
continue; continue;
} }
/* queue ->finalize() callback if any */ /* queue ->finalize() callback if any */
@@ -503,15 +504,16 @@ hwloc_components_init(void)
#ifdef HWLOC_HAVE_PLUGINS #ifdef HWLOC_HAVE_PLUGINS
for(desc = hwloc_plugins; NULL != desc; desc = desc->next) { for(desc = hwloc_plugins; NULL != desc; desc = desc->next) {
if (desc->component->flags) { if (desc->component->flags) {
fprintf(stderr, "Ignoring plugin `%s' component with invalid flags %lx\n", if (HWLOC_SHOW_CRITICAL_ERRORS())
desc->name, desc->component->flags); fprintf(stderr, "hwloc: Ignoring plugin `%s' component with invalid flags %lx\n",
desc->name, desc->component->flags);
continue; continue;
} }
/* initialize the component */ /* initialize the component */
if (desc->component->init && desc->component->init(0) < 0) { if (desc->component->init && desc->component->init(0) < 0) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Ignoring plugin `%s', failed to initialize\n", desc->name); fprintf(stderr, "hwloc: Ignoring plugin `%s', failed to initialize\n", desc->name);
continue; continue;
} }
/* queue ->finalize() callback if any */ /* queue ->finalize() callback if any */
@@ -608,7 +610,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
/* replace linuxpci and linuxio with linux (with IO phases) /* replace linuxpci and linuxio with linux (with IO phases)
* for backward compatibility with pre-v2.0 and v2.0 respectively */ * for backward compatibility with pre-v2.0 and v2.0 respectively */
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name); fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name);
comp = hwloc_disc_component_find("linux", NULL); comp = hwloc_disc_component_find("linux", NULL);
phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE; phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE;
@@ -624,7 +626,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
} }
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases); fprintf(stderr, "hwloc: Blacklisting component `%s` phases 0x%x\n", comp->name, phases);
for(i=0; i<topology->nr_blacklisted_components; i++) { for(i=0; i<topology->nr_blacklisted_components; i++) {
if (topology->blacklisted_components[i].component == comp) { if (topology->blacklisted_components[i].component == comp) {
@@ -727,7 +729,7 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology,
if (hwloc_components_verbose) if (hwloc_components_verbose)
/* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc. /* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc.
*/ */
fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n", fprintf(stderr, "hwloc: Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n",
comp->name, comp->phases, topology->backend_excluded_phases); comp->name, comp->phases, topology->backend_excluded_phases);
return -1; return -1;
} }
@@ -735,8 +737,8 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology,
backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases, backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases,
NULL, NULL, NULL); NULL, NULL, NULL);
if (!backend) { if (!backend) {
if (hwloc_components_verbose || envvar_forced) if (hwloc_components_verbose || (envvar_forced && HWLOC_SHOW_CRITICAL_ERRORS()))
fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name); fprintf(stderr, "hwloc: Failed to instantiate discovery component `%s'\n", comp->name);
return -1; return -1;
} }
@@ -817,7 +819,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
name = curenv; name = curenv;
if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) { if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Replacing deprecated component `%s' with `linux' in envvar forcing\n", name); fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' in envvar forcing\n", name);
name = "linux"; name = "linux";
} }
@@ -832,7 +834,8 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
if (comp->phases & ~blacklisted_phases) if (comp->phases & ~blacklisted_phases)
hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases); hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases);
} else { } else {
fprintf(stderr, "Cannot find discovery component `%s'\n", name); if (HWLOC_SHOW_CRITICAL_ERRORS())
fprintf(stderr, "hwloc: Cannot find discovery component `%s'\n", name);
} }
/* restore chars (the second loop below needs env to be unmodified) */ /* restore chars (the second loop below needs env to be unmodified) */
@@ -864,7 +867,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
if (!(comp->phases & ~blacklisted_phases)) { if (!(comp->phases & ~blacklisted_phases)) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n", fprintf(stderr, "hwloc: Excluding blacklisted discovery component `%s' phases 0x%x\n",
comp->name, comp->phases); comp->name, comp->phases);
goto nextcomp; goto nextcomp;
} }
@@ -879,7 +882,7 @@ nextcomp:
/* print a summary */ /* print a summary */
int first = 1; int first = 1;
backend = topology->backends; backend = topology->backends;
fprintf(stderr, "Final list of enabled discovery components: "); fprintf(stderr, "hwloc: Final list of enabled discovery components: ");
while (backend != NULL) { while (backend != NULL) {
fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases); fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases);
backend = backend->next; backend = backend->next;
@@ -935,7 +938,7 @@ hwloc_backend_alloc(struct hwloc_topology *topology,
/* filter-out component phases that are excluded */ /* filter-out component phases that are excluded */
backend->phases = component->phases & ~topology->backend_excluded_phases; backend->phases = component->phases & ~topology->backend_excluded_phases;
if (backend->phases != component->phases && hwloc_components_verbose) if (backend->phases != component->phases && hwloc_components_verbose)
fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n", fprintf(stderr, "hwloc: Trying discovery component `%s' with phases 0x%x instead of 0x%x\n",
component->name, backend->phases, component->phases); component->name, backend->phases, component->phases);
backend->flags = 0; backend->flags = 0;
backend->discover = NULL; backend->discover = NULL;
@@ -963,8 +966,9 @@ hwloc_backend_enable(struct hwloc_backend *backend)
/* check backend flags */ /* check backend flags */
if (backend->flags) { if (backend->flags) {
fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n", if (HWLOC_SHOW_CRITICAL_ERRORS())
backend->component->name, backend->component->phases, backend->flags); fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n",
backend->component->name, backend->component->phases, backend->flags);
return -1; return -1;
} }
@@ -973,7 +977,7 @@ hwloc_backend_enable(struct hwloc_backend *backend)
while (NULL != *pprev) { while (NULL != *pprev) {
if ((*pprev)->component == backend->component) { if ((*pprev)->component == backend->component) {
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n", fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x twice\n",
backend->component->name, backend->component->phases); backend->component->name, backend->component->phases);
hwloc_backend_disable(backend); hwloc_backend_disable(backend);
errno = EBUSY; errno = EBUSY;
@@ -983,7 +987,7 @@ hwloc_backend_enable(struct hwloc_backend *backend)
} }
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n", fprintf(stderr, "hwloc: Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n",
backend->component->name, backend->phases, backend->component->phases); backend->component->name, backend->phases, backend->component->phases);
/* enqueue at the end */ /* enqueue at the end */
@@ -1067,7 +1071,7 @@ hwloc_backends_disable_all(struct hwloc_topology *topology)
while (NULL != (backend = topology->backends)) { while (NULL != (backend = topology->backends)) {
struct hwloc_backend *next = backend->next; struct hwloc_backend *next = backend->next;
if (hwloc_components_verbose) if (hwloc_components_verbose)
fprintf(stderr, "Disabling discovery component `%s'\n", fprintf(stderr, "hwloc: Disabling discovery component `%s'\n",
backend->component->name); backend->component->name);
hwloc_backend_disable(backend); hwloc_backend_disable(backend);
topology->backends = next; topology->backends = next;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2020 Inria. All rights reserved. * Copyright © 2020-2024 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -42,11 +42,15 @@ hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old)
struct hwloc_internal_cpukind_s *kinds; struct hwloc_internal_cpukind_s *kinds;
unsigned i; unsigned i;
if (!old->nr_cpukinds)
return 0;
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds)); kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
if (!kinds) if (!kinds)
return -1; return -1;
new->cpukinds = kinds; new->cpukinds = kinds;
new->nr_cpukinds = old->nr_cpukinds; new->nr_cpukinds = old->nr_cpukinds;
new->nr_cpukinds_allocated = old->nr_cpukinds;
memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds)); memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds));
for(i=0;i<old->nr_cpukinds; i++) { for(i=0;i<old->nr_cpukinds; i++) {
@@ -270,7 +274,7 @@ hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology)
unsigned i,j; unsigned i,j;
for(i=0; i<topology->nr_cpukinds; i++) for(i=0; i<topology->nr_cpukinds; i++)
for(j=i+1; j<topology->nr_cpukinds; j++) for(j=i+1; j<topology->nr_cpukinds; j++)
if (topology->cpukinds[i].forced_efficiency == topology->cpukinds[j].forced_efficiency) if (topology->cpukinds[i].ranking_value == topology->cpukinds[j].ranking_value)
/* if any duplicate, fail */ /* if any duplicate, fail */
return -1; return -1;
return 0; return 0;
@@ -343,7 +347,8 @@ enum hwloc_cpukinds_ranking {
HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */ HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */
HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */ HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */
HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY, HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY,
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, /* either coretype or frequency or both */
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT, /* both coretype and frequency are required */
HWLOC_CPUKINDS_RANKING_CORETYPE, HWLOC_CPUKINDS_RANKING_CORETYPE,
HWLOC_CPUKINDS_RANKING_FREQUENCY, HWLOC_CPUKINDS_RANKING_FREQUENCY,
HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX, HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX,
@@ -358,9 +363,9 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
{ {
unsigned i; unsigned i;
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) { if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT == heuristics) {
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n"); hwloc_debug("Trying to rank cpukinds by coretype+frequency_strict...\n");
/* we need intel_core_type + (base or max freq) for all kinds */ /* we need intel_core_type AND (base or max freq) for all kinds */
if (!summary->have_intel_core_type if (!summary->have_intel_core_type
|| (!summary->have_max_freq && !summary->have_base_freq)) || (!summary->have_max_freq && !summary->have_base_freq))
return -1; return -1;
@@ -373,6 +378,21 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq; kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
} }
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) {
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n");
/* we need intel_core_type OR (base or max freq) for all kinds */
if (!summary->have_intel_core_type
&& (!summary->have_max_freq && !summary->have_base_freq))
return -1;
/* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
if (summary->have_base_freq)
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq;
else
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
}
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) { } else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) {
hwloc_debug("Trying to rank cpukinds by coretype...\n"); hwloc_debug("Trying to rank cpukinds by coretype...\n");
/* we need intel_core_type */ /* we need intel_core_type */
@@ -429,7 +449,9 @@ static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b
{ {
const struct hwloc_internal_cpukind_s *a = _a; const struct hwloc_internal_cpukind_s *a = _a;
const struct hwloc_internal_cpukind_s *b = _b; const struct hwloc_internal_cpukind_s *b = _b;
return a->ranking_value - b->ranking_value; uint64_t arv = a->ranking_value;
uint64_t brv = b->ranking_value;
return arv < brv ? -1 : arv > brv ? 1 : 0;
} }
/* this function requires ranking values to be unique */ /* this function requires ranking values to be unique */
@@ -469,6 +491,8 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
heuristics = HWLOC_CPUKINDS_RANKING_NONE; heuristics = HWLOC_CPUKINDS_RANKING_NONE;
else if (!strcmp(env, "coretype+frequency")) else if (!strcmp(env, "coretype+frequency"))
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY; heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
else if (!strcmp(env, "coretype+frequency_strict"))
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT;
else if (!strcmp(env, "coretype")) else if (!strcmp(env, "coretype"))
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE; heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE;
else if (!strcmp(env, "frequency")) else if (!strcmp(env, "frequency"))
@@ -481,16 +505,14 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY; heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY;
else if (!strcmp(env, "no_forced_efficiency")) else if (!strcmp(env, "no_forced_efficiency"))
heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY; heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY;
else if (!hwloc_hide_errors()) else if (HWLOC_SHOW_CRITICAL_ERRORS())
fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env); fprintf(stderr, "hwloc: Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env);
} }
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT
|| heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) { || heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
/* default is forced_efficiency first */ /* default is forced_efficiency first */
struct hwloc_cpukinds_info_summary summary; struct hwloc_cpukinds_info_summary summary;
enum hwloc_cpukinds_ranking subheuristics;
const char *arch;
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT) if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT)
hwloc_debug("Using default ranking strategy...\n"); hwloc_debug("Using default ranking strategy...\n");
@@ -508,16 +530,7 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
goto failed; goto failed;
hwloc__cpukinds_summarize_info(topology, &summary); hwloc__cpukinds_summarize_info(topology, &summary);
arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture"); err = hwloc__cpukinds_try_rank_by_info(topology, HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, &summary);
/* TODO: rather coretype_frequency only on x86/Intel? */
if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5)))
/* then frequency on ARM */
subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
else
/* or coretype+frequency otherwise */
subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary);
free(summary.summaries); free(summary.summaries);
if (!err) if (!err)
goto ready; goto ready;

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2013-2020 Inria. All rights reserved. * Copyright © 2013-2023 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -218,7 +218,7 @@ hwloc_diff_trees(hwloc_topology_t topo1, hwloc_obj_t obj1,
struct hwloc_info_s *info1 = &obj1->infos[i], *info2 = &obj2->infos[i]; struct hwloc_info_s *info1 = &obj1->infos[i], *info2 = &obj2->infos[i];
if (strcmp(info1->name, info2->name)) if (strcmp(info1->name, info2->name))
goto out_too_complex; goto out_too_complex;
if (strcmp(obj1->infos[i].value, obj2->infos[i].value)) { if (strcmp(info1->value, info2->value)) {
err = hwloc_append_diff_obj_attr_string(obj1, err = hwloc_append_diff_obj_attr_string(obj1,
HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO, HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO,
info1->name, info1->name,
@@ -411,6 +411,30 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
} }
} }
if (!err) {
/* cpukinds */
if (topo1->nr_cpukinds != topo2->nr_cpukinds)
goto roottoocomplex;
for(i=0; i<topo1->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *ic1 = &topo1->cpukinds[i];
struct hwloc_internal_cpukind_s *ic2 = &topo2->cpukinds[i];
unsigned j;
if (!hwloc_bitmap_isequal(ic1->cpuset, ic2->cpuset)
|| ic1->efficiency != ic2->efficiency
|| ic1->forced_efficiency != ic2->forced_efficiency
|| ic1->ranking_value != ic2->ranking_value
|| ic1->nr_infos != ic2->nr_infos)
goto roottoocomplex;
for(j=0; j<ic1->nr_infos; j++) {
struct hwloc_info_s *info1 = &ic1->infos[j], *info2 = &ic2->infos[j];
if (strcmp(info1->name, info2->name)
|| strcmp(info1->value, info2->value)) {
goto roottoocomplex;
}
}
}
}
return err; return err;
roottoocomplex: roottoocomplex:

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2025 Inria. All rights reserved.
* Copyright © 2011-2012 Université Bordeaux * Copyright © 2011-2012 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -17,6 +17,37 @@
static struct hwloc_internal_distances_s * static struct hwloc_internal_distances_s *
hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances); hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances);
static void
hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck);
static void
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
uint64_t *indexes,
hwloc_obj_type_t *different_types,
uint64_t *values,
unsigned nbobjs, unsigned disappeared);
static void
hwloc_internal_distances_print_matrix(struct hwloc_internal_distances_s *dist)
{
unsigned nbobjs = dist->nbobjs;
hwloc_obj_t *objs = dist->objs;
hwloc_uint64_t *values = dist->values;
int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type);
unsigned i, j;
fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
for(j=0; j<nbobjs; j++)
fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
fprintf(stderr, "\n");
for(i=0; i<nbobjs; i++) {
fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
for(j=0; j<nbobjs; j++)
fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
fprintf(stderr, "\n");
}
}
/****************************************************** /******************************************************
* Global init, prepare, destroy, dup * Global init, prepare, destroy, dup
*/ */
@@ -244,27 +275,33 @@ int hwloc_distances_release_remove(hwloc_topology_t topology,
return 0; return 0;
} }
/****************************************************** /*********************************************************
* Add distances to the topology * Backend functions for adding distances to the topology
*/ */
/* cancel a distances handle. only needed internally for now */
static void static void
hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck); hwloc_backend_distances_add__cancel(struct hwloc_internal_distances_s *dist)
{
/* everything is set to NULL in hwloc_backend_distances_add_create() */
free(dist->name);
free(dist->indexes);
free(dist->objs);
free(dist->different_types);
free(dist->values);
free(dist);
}
/* insert a distance matrix in the topology. /* prepare a distances handle for later commit in the topology.
* the caller gives us the distances and objs pointers, we'll free them later. * we duplicate the caller's name.
*/ */
static int hwloc_backend_distances_add_handle_t
hwloc_internal_distances__add(hwloc_topology_t topology, const char *name, hwloc_backend_distances_add_create(hwloc_topology_t topology,
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, const char *name, unsigned long kind, unsigned long flags)
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
unsigned long kind, unsigned iflags)
{ {
struct hwloc_internal_distances_s *dist; struct hwloc_internal_distances_s *dist;
if (different_types) { if (flags) {
kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */
} else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) {
errno = EINVAL; errno = EINVAL;
goto err; goto err;
} }
@@ -273,110 +310,54 @@ hwloc_internal_distances__add(hwloc_topology_t topology, const char *name,
if (!dist) if (!dist)
goto err; goto err;
if (name) if (name) {
dist->name = strdup(name); /* ignore failure */ dist->name = strdup(name); /* ignore failure */
if (!dist->name)
dist->unique_type = unique_type;
dist->different_types = different_types;
dist->nbobjs = nbobjs;
dist->kind = kind;
dist->iflags = iflags;
assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs);
if (!objs) {
assert(indexes);
/* we only have indexes, we'll refresh objs from there */
dist->indexes = indexes;
dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
if (!dist->objs)
goto err_with_dist; goto err_with_dist;
} else {
unsigned i;
assert(!indexes);
/* we only have objs, generate the indexes arrays so that we can refresh objs later */
dist->objs = objs;
dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
if (!dist->indexes)
goto err_with_dist;
if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
for(i=0; i<nbobjs; i++)
dist->indexes[i] = objs[i]->os_index;
} else {
for(i=0; i<nbobjs; i++)
dist->indexes[i] = objs[i]->gp_index;
}
} }
dist->values = values; dist->kind = kind;
dist->iflags = HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED;
dist->unique_type = HWLOC_OBJ_TYPE_NONE;
dist->different_types = NULL;
dist->nbobjs = 0;
dist->indexes = NULL;
dist->objs = NULL;
dist->values = NULL;
dist->id = topology->next_dist_id++; dist->id = topology->next_dist_id++;
return dist;
if (topology->last_dist)
topology->last_dist->next = dist;
else
topology->first_dist = dist;
dist->prev = topology->last_dist;
dist->next = NULL;
topology->last_dist = dist;
return 0;
err_with_dist: err_with_dist:
if (name) hwloc_backend_distances_add__cancel(dist);
free(dist->name);
free(dist);
err: err:
free(different_types); return NULL;
free(objs);
free(indexes);
free(values);
return -1;
} }
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, /* attach objects and values to a distances handle.
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, * on success, objs and values arrays are attached and will be freed with the distances.
unsigned long kind, unsigned long flags) * on failure, the handle is freed.
*/
int
hwloc_backend_distances_add_values(hwloc_topology_t topology __hwloc_attribute_unused,
hwloc_backend_distances_add_handle_t handle,
unsigned nbobjs, hwloc_obj_t *objs,
hwloc_uint64_t *values,
unsigned long flags)
{ {
unsigned iflags = 0; /* objs not valid */ struct hwloc_internal_distances_s *dist = handle;
hwloc_obj_type_t unique_type, *different_types = NULL;
if (nbobjs < 2) { hwloc_uint64_t *indexes = NULL;
errno = EINVAL;
goto err;
}
/* cannot group without objects,
* and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
*/
if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) {
errno = EINVAL;
goto err;
}
return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags);
err:
free(indexes);
free(values);
free(different_types);
return -1;
}
static void
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
uint64_t *indexes,
uint64_t *values,
unsigned nbobjs, unsigned disappeared);
int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
unsigned long kind, unsigned long flags)
{
hwloc_obj_type_t unique_type, *different_types;
unsigned i, disappeared = 0; unsigned i, disappeared = 0;
unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
if (nbobjs < 2) { if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
/* target distances is already set */
errno = EINVAL;
goto err;
}
if (flags || nbobjs < 2 || !objs || !values) {
errno = EINVAL; errno = EINVAL;
goto err; goto err;
} }
@@ -389,15 +370,18 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
/* some objects are NULL */ /* some objects are NULL */
if (disappeared == nbobjs) { if (disappeared == nbobjs) {
/* nothing left, drop the matrix */ /* nothing left, drop the matrix */
free(objs); errno = ENOENT;
free(values); goto err;
return 0;
} }
/* restrict the matrix */ /* restrict the matrix */
hwloc_internal_distances_restrict(objs, NULL, values, nbobjs, disappeared); hwloc_internal_distances_restrict(objs, NULL, NULL, values, nbobjs, disappeared);
nbobjs -= disappeared; nbobjs -= disappeared;
} }
indexes = malloc(nbobjs * sizeof(*indexes));
if (!indexes)
goto err;
unique_type = objs[0]->type; unique_type = objs[0]->type;
for(i=1; i<nbobjs; i++) for(i=1; i<nbobjs; i++)
if (objs[i]->type != unique_type) { if (objs[i]->type != unique_type) {
@@ -408,16 +392,108 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
/* heterogeneous types */ /* heterogeneous types */
different_types = malloc(nbobjs * sizeof(*different_types)); different_types = malloc(nbobjs * sizeof(*different_types));
if (!different_types) if (!different_types)
goto err; goto err_with_indexes;
for(i=0; i<nbobjs; i++) for(i=0; i<nbobjs; i++)
different_types[i] = objs[i]->type; different_types[i] = objs[i]->type;
} else {
/* homogeneous types */
different_types = NULL;
} }
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) { dist->nbobjs = nbobjs;
dist->objs = objs;
dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
dist->indexes = indexes;
dist->unique_type = unique_type;
dist->different_types = different_types;
dist->values = values;
if (different_types)
dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
for(i=0; i<nbobjs; i++)
dist->indexes[i] = objs[i]->os_index;
} else {
for(i=0; i<nbobjs; i++)
dist->indexes[i] = objs[i]->gp_index;
}
return 0;
err_with_indexes:
free(indexes);
err:
hwloc_backend_distances_add__cancel(dist);
return -1;
}
/* attach objects and values to a distance handle.
* on success, objs and values arrays are attached and will be freed with the distances.
* on failure, the handle is freed.
*/
static int
hwloc_backend_distances_add_values_by_index(hwloc_topology_t topology __hwloc_attribute_unused,
hwloc_backend_distances_add_handle_t handle,
unsigned nbobjs, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, hwloc_uint64_t *indexes,
hwloc_uint64_t *values)
{
struct hwloc_internal_distances_s *dist = handle;
hwloc_obj_t *objs;
if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
/* target distances is already set */
errno = EINVAL;
goto err;
}
if (nbobjs < 2 || !indexes || !values || (unique_type == HWLOC_OBJ_TYPE_NONE && !different_types)) {
errno = EINVAL;
goto err;
}
objs = malloc(nbobjs * sizeof(*objs));
if (!objs)
goto err;
dist->nbobjs = nbobjs;
dist->objs = objs;
dist->indexes = indexes;
dist->unique_type = unique_type;
dist->different_types = different_types;
dist->values = values;
if (different_types)
dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
return 0;
err:
hwloc_backend_distances_add__cancel(dist);
return -1;
}
/* commit a distances handle.
* on failure, the handle is freed with its objects and values arrays.
*/
int
hwloc_backend_distances_add_commit(hwloc_topology_t topology,
hwloc_backend_distances_add_handle_t handle,
unsigned long flags)
{
struct hwloc_internal_distances_s *dist = handle;
if (!dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
/* target distances not ready for commit */
errno = EINVAL;
goto err;
}
if ((flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->objs) {
/* cannot group without objects,
* and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
*/
errno = EINVAL;
goto err;
}
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->different_types) {
float full_accuracy = 0.f; float full_accuracy = 0.f;
float *accuracies; float *accuracies;
unsigned nbaccuracies; unsigned nbaccuracies;
@@ -431,26 +507,94 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
} }
if (topology->grouping_verbose) { if (topology->grouping_verbose) {
unsigned j;
int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type);
fprintf(stderr, "Trying to group objects using distance matrix:\n"); fprintf(stderr, "Trying to group objects using distance matrix:\n");
fprintf(stderr, "%s", gp ? "gp_index" : "os_index"); hwloc_internal_distances_print_matrix(dist);
for(j=0; j<nbobjs; j++)
fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
fprintf(stderr, "\n");
for(i=0; i<nbobjs; i++) {
fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
for(j=0; j<nbobjs; j++)
fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
fprintf(stderr, "\n");
}
} }
hwloc__groups_by_distances(topology, nbobjs, objs, values, hwloc__groups_by_distances(topology, dist->nbobjs, dist->objs, dist->values,
kind, nbaccuracies, accuracies, 1 /* check the first matrice */); dist->kind, nbaccuracies, accuracies, 1 /* check the first matrix */);
} }
return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags); if (topology->last_dist)
topology->last_dist->next = dist;
else
topology->first_dist = dist;
dist->prev = topology->last_dist;
dist->next = NULL;
topology->last_dist = dist;
dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED;
return 0;
err:
hwloc_backend_distances_add__cancel(dist);
return -1;
}
/* all-in-one backend function not exported to plugins, only used by XML for now */
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name,
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
unsigned long kind, unsigned long flags)
{
hwloc_backend_distances_add_handle_t handle;
int err;
handle = hwloc_backend_distances_add_create(topology, name, kind, 0);
if (!handle)
goto err;
err = hwloc_backend_distances_add_values_by_index(topology, handle,
nbobjs, unique_type, different_types, indexes,
values);
if (err < 0)
goto err;
/* arrays are now attached to the handle */
indexes = NULL;
different_types = NULL;
values = NULL;
err = hwloc_backend_distances_add_commit(topology, handle, flags);
if (err < 0)
goto err;
return 0;
err:
free(indexes);
free(different_types);
free(values);
return -1;
}
/* all-in-one backend function not exported to plugins, used by OS backends */
int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
unsigned long kind, unsigned long flags)
{
hwloc_backend_distances_add_handle_t handle;
int err;
handle = hwloc_backend_distances_add_create(topology, name, kind, 0);
if (!handle)
goto err;
err = hwloc_backend_distances_add_values(topology, handle,
nbobjs, objs,
values,
0);
if (err < 0)
goto err;
/* arrays are now attached to the handle */
objs = NULL;
values = NULL;
err = hwloc_backend_distances_add_commit(topology, handle, flags);
if (err < 0)
goto err;
return 0;
err: err:
free(objs); free(objs);
@@ -458,44 +602,54 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
return -1; return -1;
} }
/********************************
* User API for adding distances
*/
#define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER) #define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER)
#define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH) #define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL) #define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL|HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
#define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) #define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE)
/* The actual function exported to the user void * hwloc_distances_add_create(hwloc_topology_t topology,
*/ const char *name, unsigned long kind,
int hwloc_distances_add(hwloc_topology_t topology, unsigned long flags)
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values, {
unsigned long kind, unsigned long flags) if (!topology->is_loaded) {
errno = EINVAL;
return NULL;
}
if (topology->adopted_shmem_addr) {
errno = EPERM;
return NULL;
}
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) > 1
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) > 1) {
errno = EINVAL;
return NULL;
}
return hwloc_backend_distances_add_create(topology, name, kind, flags);
}
int hwloc_distances_add_values(hwloc_topology_t topology,
void *handle,
unsigned nbobjs, hwloc_obj_t *objs,
hwloc_uint64_t *values,
unsigned long flags)
{ {
unsigned i; unsigned i;
uint64_t *_values; uint64_t *_values;
hwloc_obj_t *_objs; hwloc_obj_t *_objs;
int err; int err;
if (nbobjs < 2 || !objs || !values || !topology->is_loaded) {
errno = EINVAL;
return -1;
}
if (topology->adopted_shmem_addr) {
errno = EPERM;
return -1;
}
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
|| (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL)) {
errno = EINVAL;
return -1;
}
/* no strict need to check for duplicates, things shouldn't break */ /* no strict need to check for duplicates, things shouldn't break */
for(i=1; i<nbobjs; i++) for(i=1; i<nbobjs; i++)
if (!objs[i]) { if (!objs[i]) {
errno = EINVAL; errno = EINVAL;
return -1; goto out;
} }
/* copy the input arrays and give them to the topology */ /* copy the input arrays and give them to the topology */
@@ -506,22 +660,78 @@ int hwloc_distances_add(hwloc_topology_t topology,
memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t)); memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values)); memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags);
if (err < 0)
goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
/* in case we added some groups, see if we need to reconnect */ err = hwloc_backend_distances_add_values(topology, handle, nbobjs, _objs, _values, flags);
hwloc_topology_reconnect(topology, 0); if (err < 0) {
/* handle was canceled inside hwloc_backend_distances_add_values */
handle = NULL;
goto out_with_arrays;
}
return 0; return 0;
out_with_arrays: out_with_arrays:
free(_values);
free(_objs); free(_objs);
free(_values);
out: out:
if (handle)
hwloc_backend_distances_add__cancel(handle);
return -1; return -1;
} }
int
hwloc_distances_add_commit(hwloc_topology_t topology,
void *handle,
unsigned long flags)
{
int err;
if (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL) {
errno = EINVAL;
goto out;
}
err = hwloc_backend_distances_add_commit(topology, handle, flags);
if (err < 0) {
/* handle was canceled inside hwloc_backend_distances_add_commit */
handle = NULL;
goto out;
}
/* in case we added some groups, see if we need to reconnect */
hwloc__reconnect(topology, 0);
return 0;
out:
if (handle)
hwloc_backend_distances_add__cancel(handle);
return -1;
}
/* deprecated all-in-one user function */
int hwloc_distances_add(hwloc_topology_t topology,
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
unsigned long kind, unsigned long flags)
{
void *handle;
int err;
handle = hwloc_distances_add_create(topology, NULL, kind, 0);
if (!handle)
return -1;
err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
if (err < 0)
return -1;
err = hwloc_distances_add_commit(topology, handle, flags);
if (err < 0)
return -1;
return 0;
}
/****************************************************** /******************************************************
* Refresh objects in distances * Refresh objects in distances
*/ */
@@ -529,6 +739,7 @@ int hwloc_distances_add(hwloc_topology_t topology,
static void static void
hwloc_internal_distances_restrict(hwloc_obj_t *objs, hwloc_internal_distances_restrict(hwloc_obj_t *objs,
uint64_t *indexes, uint64_t *indexes,
hwloc_obj_type_t *different_types,
uint64_t *values, uint64_t *values,
unsigned nbobjs, unsigned disappeared) unsigned nbobjs, unsigned disappeared)
{ {
@@ -550,6 +761,8 @@ hwloc_internal_distances_restrict(hwloc_obj_t *objs,
objs[newi] = objs[i]; objs[newi] = objs[i];
if (indexes) if (indexes)
indexes[newi] = indexes[i]; indexes[newi] = indexes[i];
if (different_types)
different_types[newi] = different_types[i];
newi++; newi++;
} }
} }
@@ -594,7 +807,7 @@ hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
return -1; return -1;
if (disappeared) { if (disappeared) {
hwloc_internal_distances_restrict(objs, dist->indexes, dist->values, nbobjs, disappeared); hwloc_internal_distances_restrict(objs, dist->indexes, dist->different_types, dist->values, nbobjs, disappeared);
dist->nbobjs -= disappeared; dist->nbobjs -= disappeared;
} }
@@ -647,7 +860,7 @@ struct hwloc_distances_container_s {
struct hwloc_distances_s distances; struct hwloc_distances_s distances;
}; };
#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL) #define HWLOC_DISTANCES_CONTAINER_OFFSET ((uintptr_t)(&((struct hwloc_distances_container_s*)NULL)->distances) - (uintptr_t)NULL)
#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET ) #define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET )
static struct hwloc_internal_distances_s * static struct hwloc_internal_distances_s *
@@ -1087,3 +1300,196 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
out_with_groupids: out_with_groupids:
free(groupids); free(groupids);
} }
static int
hwloc__distances_transform_remove_null(struct hwloc_distances_s *distances)
{
hwloc_uint64_t *values = distances->values;
hwloc_obj_t *objs = distances->objs;
unsigned i, nb, nbobjs = distances->nbobjs;
hwloc_obj_type_t unique_type;
for(i=0, nb=0; i<nbobjs; i++)
if (objs[i])
nb++;
if (nb < 2) {
errno = EINVAL;
return -1;
}
if (nb == nbobjs)
return 0;
hwloc_internal_distances_restrict(objs, NULL, NULL, values, nbobjs, nbobjs-nb);
distances->nbobjs = nb;
/* update HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES for convenience */
unique_type = objs[0]->type;
for(i=1; i<nb; i++)
if (objs[i]->type != unique_type) {
unique_type = HWLOC_OBJ_TYPE_NONE;
break;
}
if (unique_type == HWLOC_OBJ_TYPE_NONE)
distances->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
else
distances->kind &= ~HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
return 0;
}
static int
hwloc__distances_transform_links(struct hwloc_distances_s *distances)
{
/* FIXME: we should look for the greatest common denominator
* but we just use the smallest positive value, that's enough for current use-cases.
* We'll return -1 in other cases.
*/
hwloc_uint64_t divider, *values = distances->values;
unsigned i, nbobjs = distances->nbobjs;
if (!(distances->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)) {
errno = EINVAL;
return -1;
}
for(i=0; i<nbobjs; i++)
values[i*nbobjs+i] = 0;
/* find the smallest positive value */
divider = 0;
for(i=0; i<nbobjs*nbobjs; i++)
if (values[i] && (!divider || values[i] < divider))
divider = values[i];
if (!divider)
/* only zeroes? do nothing */
return 0;
/* check it divides all values */
for(i=0; i<nbobjs*nbobjs; i++)
if (values[i]%divider) {
errno = ENOENT;
return -1;
}
/* ok, now divide for real */
for(i=0; i<nbobjs*nbobjs; i++)
values[i] /= divider;
return 0;
}
static __hwloc_inline int is_nvswitch(hwloc_obj_t obj)
{
return obj && obj->subtype && !strcmp(obj->subtype, "NVSwitch");
}
static int
hwloc__distances_transform_merge_switch_ports(struct hwloc_distances_s *distances)
{
hwloc_obj_t *objs = distances->objs;
hwloc_uint64_t *values = distances->values;
unsigned first, i, j, nbobjs = distances->nbobjs;
/* find the first port */
first = (unsigned) -1;
for(i=0; i<nbobjs; i++)
if (is_nvswitch(objs[i])) {
first = i;
break;
}
if (first == (unsigned)-1) {
errno = ENOENT;
return -1;
}
for(j=i+1; j<nbobjs; j++) {
if (is_nvswitch(objs[j])) {
/* another port, merge it */
unsigned k;
for(k=0; k<nbobjs; k++) {
if (k==i || k==j)
continue;
values[k*nbobjs+i] += values[k*nbobjs+j];
values[k*nbobjs+j] = 0;
values[i*nbobjs+k] += values[j*nbobjs+k];
values[j*nbobjs+k] = 0;
}
values[i*nbobjs+i] += values[j*nbobjs+j];
values[j*nbobjs+j] = 0;
}
/* the caller will also call REMOVE_NULL to remove other ports */
objs[j] = NULL;
}
return 0;
}
static int
hwloc__distances_transform_transitive_closure(struct hwloc_distances_s *distances)
{
hwloc_obj_t *objs = distances->objs;
hwloc_uint64_t *values = distances->values;
unsigned nbobjs = distances->nbobjs;
unsigned i, j, k;
for(i=0; i<nbobjs; i++) {
hwloc_uint64_t bw_i2sw = 0;
if (is_nvswitch(objs[i]))
continue;
/* count our BW to the switch */
for(k=0; k<nbobjs; k++)
if (is_nvswitch(objs[k]))
bw_i2sw += values[i*nbobjs+k];
for(j=0; j<nbobjs; j++) {
hwloc_uint64_t bw_sw2j = 0;
if (i == j || is_nvswitch(objs[j]))
continue;
/* count our BW from the switch */
for(k=0; k<nbobjs; k++)
if (is_nvswitch(objs[k]))
bw_sw2j += values[k*nbobjs+j];
/* bandwidth from i to j now gets indirect bandwidth too, min(i2sw,sw2j) */
values[i*nbobjs+j] += bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
}
}
return 0;
}
int
hwloc_distances_transform(hwloc_topology_t topology __hwloc_attribute_unused,
struct hwloc_distances_s *distances,
enum hwloc_distances_transform_e transform,
void *transform_attr,
unsigned long flags)
{
if (flags || transform_attr) {
errno = EINVAL;
return -1;
}
switch (transform) {
case HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL:
return hwloc__distances_transform_remove_null(distances);
case HWLOC_DISTANCES_TRANSFORM_LINKS:
return hwloc__distances_transform_links(distances);
case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS:
{
int err;
err = hwloc__distances_transform_merge_switch_ports(distances);
if (!err)
err = hwloc__distances_transform_remove_null(distances);
return err;
}
case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE:
return hwloc__distances_transform_transitive_closure(distances);
default:
errno = EINVAL;
return -1;
}
}

View File

@@ -1,11 +1,12 @@
/* /*
* Copyright © 2020 Inria. All rights reserved. * Copyright © 2020-2025 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
#include "private/autogen/config.h" #include "private/autogen/config.h"
#include "hwloc.h" #include "hwloc.h"
#include "private/private.h" #include "private/private.h"
#include "private/debug.h"
/***************************** /*****************************
@@ -13,13 +14,26 @@
*/ */
static __hwloc_inline static __hwloc_inline
hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, int hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id,
hwloc_obj_t node) hwloc_obj_t node,
hwloc_uint64_t *valuep)
{ {
if (id == HWLOC_MEMATTR_ID_CAPACITY) if (id == HWLOC_MEMATTR_ID_CAPACITY) {
return node->attr->numanode.local_memory; if (node->type != HWLOC_OBJ_NUMANODE) {
else if (id == HWLOC_MEMATTR_ID_LOCALITY) errno = EINVAL;
return hwloc_bitmap_weight(node->cpuset); return -1;
}
*valuep = node->attr->numanode.local_memory;
return 0;
}
else if (id == HWLOC_MEMATTR_ID_LOCALITY) {
if (!node->cpuset) {
errno = EINVAL;
return -1;
}
*valuep = hwloc_bitmap_weight(node->cpuset);
return 0;
}
else else
assert(0); assert(0);
return 0; /* shut up the compiler */ return 0; /* shut up the compiler */
@@ -49,36 +63,51 @@ hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr,
void void
hwloc_internal_memattrs_prepare(struct hwloc_topology *topology) hwloc_internal_memattrs_prepare(struct hwloc_topology *topology)
{ {
#define NR_DEFAULT_MEMATTRS 4 topology->memattrs = malloc(HWLOC_MEMATTR_ID_MAX * sizeof(*topology->memattrs));
topology->memattrs = malloc(NR_DEFAULT_MEMATTRS * sizeof(*topology->memattrs));
if (!topology->memattrs) if (!topology->memattrs)
return; return;
assert(HWLOC_MEMATTR_ID_CAPACITY < NR_DEFAULT_MEMATTRS);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY], hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY],
(char *) "Capacity", (char *) "Capacity",
HWLOC_MEMATTR_FLAG_HIGHER_FIRST, HWLOC_MEMATTR_FLAG_HIGHER_FIRST,
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
assert(HWLOC_MEMATTR_ID_LOCALITY < NR_DEFAULT_MEMATTRS);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY], hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY],
(char *) "Locality", (char *) "Locality",
HWLOC_MEMATTR_FLAG_LOWER_FIRST, HWLOC_MEMATTR_FLAG_LOWER_FIRST,
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
assert(HWLOC_MEMATTR_ID_BANDWIDTH < NR_DEFAULT_MEMATTRS);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH], hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH],
(char *) "Bandwidth", (char *) "Bandwidth",
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME); HWLOC_IMATTR_FLAG_STATIC_NAME);
assert(HWLOC_MEMATTR_ID_LATENCY < NR_DEFAULT_MEMATTRS); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_BANDWIDTH],
(char *) "ReadBandwidth",
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_BANDWIDTH],
(char *) "WriteBandwidth",
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY], hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY],
(char *) "Latency", (char *) "Latency",
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME); HWLOC_IMATTR_FLAG_STATIC_NAME);
topology->nr_memattrs = NR_DEFAULT_MEMATTRS; hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_LATENCY],
(char *) "ReadLatency",
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME);
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_LATENCY],
(char *) "WriteLatency",
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
HWLOC_IMATTR_FLAG_STATIC_NAME);
topology->nr_memattrs = HWLOC_MEMATTR_ID_MAX;
} }
static void static void
@@ -127,6 +156,8 @@ hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *o
struct hwloc_internal_memattr_s *imattrs; struct hwloc_internal_memattr_s *imattrs;
hwloc_memattr_id_t id; hwloc_memattr_id_t id;
/* old->nr_memattrs is always > 0 thanks to default memattrs */
imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs)); imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs));
if (!imattrs) if (!imattrs)
return -1; return -1;
@@ -604,7 +635,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
if (found<max) { if (found<max) {
targets[found] = node; targets[found] = node;
if (values) if (values)
values[found] = hwloc__memattr_get_convenience_value(id, node); hwloc__memattr_get_convenience_value(id, node, &values[found]);
} }
found++; found++;
} }
@@ -730,7 +761,7 @@ hwloc_memattr_get_initiators(hwloc_topology_t topology,
struct hwloc_internal_memattr_target_s *imtg; struct hwloc_internal_memattr_target_s *imtg;
unsigned i, max; unsigned i, max;
if (flags) { if (flags || !target_node) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
@@ -792,7 +823,7 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
struct hwloc_internal_memattr_s *imattr; struct hwloc_internal_memattr_s *imattr;
struct hwloc_internal_memattr_target_s *imtg; struct hwloc_internal_memattr_target_s *imtg;
if (flags) { if (flags || !target_node) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
@@ -805,8 +836,7 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
/* convenience attributes */ /* convenience attributes */
*valuep = hwloc__memattr_get_convenience_value(id, target_node); return hwloc__memattr_get_convenience_value(id, target_node, valuep);
return 0;
} }
/* normal attributes */ /* normal attributes */
@@ -918,7 +948,7 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
{ {
struct hwloc_internal_location_s iloc, *ilocp; struct hwloc_internal_location_s iloc, *ilocp;
if (flags) { if (flags || !target_node) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
@@ -989,10 +1019,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
/* convenience attributes */ /* convenience attributes */
for(j=0; ; j++) { for(j=0; ; j++) {
hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j); hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j);
hwloc_uint64_t value; hwloc_uint64_t value = 0;
if (!node) if (!node)
break; break;
value = hwloc__memattr_get_convenience_value(id, node); hwloc__memattr_get_convenience_value(id, node, &value);
hwloc__update_best_target(&best, &best_value, &found, hwloc__update_best_target(&best, &best_value, &found,
node, value, node, value,
imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST);
@@ -1075,7 +1105,7 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
int found; int found;
unsigned i; unsigned i;
if (flags) { if (flags || !target_node) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
} }
@@ -1128,6 +1158,8 @@ match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long fl
{ {
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL) if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)
return 1; return 1;
if (flags & HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY)
return hwloc_bitmap_intersects(node->cpuset, cpuset);
if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY)
&& hwloc_bitmap_isincluded(cpuset, node->cpuset)) && hwloc_bitmap_isincluded(cpuset, node->cpuset))
return 1; return 1;
@@ -1150,6 +1182,7 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY
|HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY |HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY
|HWLOC_LOCAL_NUMANODE_FLAG_INTERSECT_LOCALITY
| HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { | HWLOC_LOCAL_NUMANODE_FLAG_ALL)) {
errno = EINVAL; errno = EINVAL;
return -1; return -1;
@@ -1195,3 +1228,750 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
*nrp = i; *nrp = i;
return 0; return 0;
} }
static int compare_nodes_by_os_index(const void *_a, const void *_b)
{
const hwloc_obj_t * a = _a, * b = _b;
return (*a)->os_index - (*b)->os_index;
}
int
hwloc_topology_get_default_nodeset(hwloc_topology_t topology,
hwloc_nodeset_t nodeset,
unsigned long flags)
{
hwloc_obj_t *nodes;
hwloc_bitmap_t remainingcpuset;
unsigned nrnodes, i;
const char *first_subtype;
if (flags) {
errno = EINVAL;
goto out;
}
remainingcpuset = hwloc_bitmap_dup(topology->levels[0][0]->cpuset);
if (!remainingcpuset)
goto out;
nrnodes = topology->slevels[HWLOC_SLEVEL_NUMANODE].nbobjs;
nodes = malloc(nrnodes * sizeof(*nodes));
if (!nodes)
goto out_with_remainingcpuset;
memcpy(nodes, topology->slevels[HWLOC_SLEVEL_NUMANODE].objs, nrnodes * sizeof(*nodes));
qsort(nodes, nrnodes, sizeof(*nodes), compare_nodes_by_os_index);
hwloc_bitmap_zero(nodeset);
/* always take the first node (FIXME: except if unexpected subtype?) */
first_subtype = nodes[0]->subtype;
hwloc_bitmap_set(nodeset, nodes[0]->os_index);
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[0]->cpuset);
/* use all non-intersecting nodes with same subtype */
for(i=1; i<nrnodes; i++) {
/* check same or no subtype */
if (first_subtype) {
if (!nodes[i]->subtype || strcmp(first_subtype, nodes[i]->subtype))
continue;
} else if (nodes[i]->subtype) {
continue;
}
/* take non-overlapping nodes */
if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset) /* can be empty */) {
hwloc_bitmap_set(nodeset, nodes[i]->os_index);
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
}
/* more needed? */
if (hwloc_bitmap_iszero(remainingcpuset))
goto done;
}
/* find more nodes to cover the entire topology cpuset.
* only take what's necessary: first nodes, non-empty */
for(i=1; i<nrnodes; i++) {
/* already taken? */
if (hwloc_bitmap_isset(nodeset, i))
continue;
/* take non-overlapping nodes, except empty */
if (hwloc_bitmap_isincluded(nodes[i]->cpuset, remainingcpuset)
&& !hwloc_bitmap_iszero(nodes[i]->cpuset)) {
hwloc_bitmap_set(nodeset, nodes[i]->os_index);
hwloc_bitmap_andnot(remainingcpuset, remainingcpuset, nodes[i]->cpuset);
}
/* more needed? */
if (hwloc_bitmap_iszero(remainingcpuset))
goto done;
}
done:
free(nodes);
hwloc_bitmap_free(remainingcpuset);
return 0;
out_with_remainingcpuset:
hwloc_bitmap_free(remainingcpuset);
out:
return -1;
}
/**************************************
* Using memattrs to identify HBM/DRAM
*/
enum hwloc_memory_tier_type_e {
/* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */
HWLOC_MEMORY_TIER_HBM = 1UL<<0,
HWLOC_MEMORY_TIER_DRAM = 1UL<<1,
HWLOC_MEMORY_TIER_GPU = 1UL<<2,
HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/
HWLOC_MEMORY_TIER_NVM = 1UL<<4,
HWLOC_MEMORY_TIER_CXL = 1UL<<5
};
typedef unsigned long hwloc_memory_tier_type_t;
#define HWLOC_MEMORY_TIER_UNKNOWN 0UL
static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type)
{
switch (type) {
case HWLOC_MEMORY_TIER_DRAM: return "DRAM";
case HWLOC_MEMORY_TIER_HBM: return "HBM";
case HWLOC_MEMORY_TIER_GPU: return "GPUMemory";
case HWLOC_MEMORY_TIER_SPM: return "SPM";
case HWLOC_MEMORY_TIER_NVM: return "NVM";
case HWLOC_MEMORY_TIER_CXL:
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM";
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM";
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory";
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM";
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM";
default: return NULL;
}
}
static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name)
{
if (!strcasecmp(name, "DRAM"))
return HWLOC_MEMORY_TIER_DRAM;
if (!strcasecmp(name, "HBM"))
return HWLOC_MEMORY_TIER_HBM;
if (!strcasecmp(name, "GPUMemory"))
return HWLOC_MEMORY_TIER_GPU;
if (!strcasecmp(name, "SPM"))
return HWLOC_MEMORY_TIER_SPM;
if (!strcasecmp(name, "NVM"))
return HWLOC_MEMORY_TIER_NVM;
if (!strcasecmp(name, "CXL-DRAM"))
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM;
if (!strcasecmp(name, "CXL-HBM"))
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM;
if (!strcasecmp(name, "CXL-GPUMemory"))
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU;
if (!strcasecmp(name, "CXL-SPM"))
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM;
if (!strcasecmp(name, "CXL-NVM"))
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM;
return 0;
}
/* factorized tier, grouping multiple nodes */
struct hwloc_memory_tier_s {
hwloc_nodeset_t nodeset;
uint64_t local_bw_min, local_bw_max;
uint64_t local_lat_min, local_lat_max;
hwloc_memory_tier_type_t type;
};
/* early tier discovery, one entry per node */
struct hwloc_memory_node_info_s {
hwloc_obj_t node;
uint64_t local_bw;
uint64_t local_lat;
hwloc_memory_tier_type_t type;
unsigned rank;
};
static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b)
{
const struct hwloc_memory_node_info_s *a = _a, *b = _b;
/* sort by type of node first */
if (a->type != b->type)
return a->type - b->type;
/* then by bandwidth */
if (a->local_bw > b->local_bw)
return -1;
else if (a->local_bw < b->local_bw)
return 1;
return 0;
}
static int compare_tiers_by_bw_and_type(const void *_a, const void *_b)
{
const struct hwloc_memory_tier_s *a = _a, *b = _b;
/* sort by (average) BW first */
if (a->local_bw_min && b->local_bw_min) {
if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max)
return -1;
else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max)
return 1;
}
/* then by tier type */
if (a->type != b->type)
return a->type - b->type;
return 0;
}
static struct hwloc_memory_tier_s *
hwloc__group_memory_tiers(hwloc_topology_t topology,
unsigned *nr_tiers_p)
{
struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat;
struct hwloc_memory_node_info_s *nodeinfos;
struct hwloc_memory_tier_s *tiers;
unsigned nr_tiers;
float bw_threshold = 0.1;
float lat_threshold = 0.1;
const char *env;
unsigned i, j, n;
n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE);
assert(n);
env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD");
if (env)
bw_threshold = atof(env);
env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD");
if (env)
lat_threshold = atof(env);
imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY];
if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
hwloc__imattr_refresh(topology, imattr_bw);
if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
hwloc__imattr_refresh(topology, imattr_lat);
nodeinfos = malloc(n * sizeof(*nodeinfos));
if (!nodeinfos)
return NULL;
for(i=0; i<n; i++) {
hwloc_obj_t node;
const char *daxtype;
struct hwloc_internal_location_s iloc;
struct hwloc_internal_memattr_target_s *imtg;
node = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, i);
assert(node);
nodeinfos[i].node = node;
/* defaults to unknown */
nodeinfos[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
nodeinfos[i].local_bw = 0;
nodeinfos[i].local_lat = 0;
daxtype = hwloc_obj_get_info_by_name(node, "DAXType");
/* mark NVM, SPM and GPU nodes */
if (node->subtype && !strcmp(node->subtype, "GPUMemory"))
nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU;
else if (daxtype && !strcmp(daxtype, "NVM"))
nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM;
else if (daxtype && !strcmp(daxtype, "SPM"))
nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM;
/* add CXL flag */
if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) {
/* CXL is always SPM for now. HBM and DRAM not possible here yet.
* Hence remove all but NVM first.
*/
nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM;
nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL;
}
/* get local bandwidth */
imtg = NULL;
for(j=0; j<imattr_bw->nr_targets; j++)
if (imattr_bw->targets[j].obj == node) {
imtg = &imattr_bw->targets[j];
break;
}
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
struct hwloc_internal_memattr_initiator_s *imi;
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
iloc.location.cpuset = node->cpuset;
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
if (imi)
nodeinfos[i].local_bw = imi->value;
}
/* get local latency */
imtg = NULL;
for(j=0; j<imattr_lat->nr_targets; j++)
if (imattr_lat->targets[j].obj == node) {
imtg = &imattr_lat->targets[j];
break;
}
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
struct hwloc_internal_memattr_initiator_s *imi;
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
iloc.location.cpuset = node->cpuset;
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
if (imi)
nodeinfos[i].local_lat = imi->value;
}
}
/* Sort nodes by tier type and bandwidth.
*
* We could also use the existing subtype but it's not clear it'd be better.
* For NVIDIA GPU, "GPUMemory" is set in the Linux backend, and used above to set tier type anyway.
* For KNL, the Linux backend sets subtypes and memattrs, sorting by memattrs already works fine.
* Existing subtypes could have been imported from XML, usually mostly OK except maybe SPM (fallback for I don't know)?
* An envvar (or HWLOC_MEMTIERS_REFRESH special value?) could be passed to ignore existing subtypes,
* but "GPUMemory" wouldn't be available anymore, we'd have to use something else like "PCIBusId",
* but that one might not always be specific to GPU-backed NUMA nodes?
*/
hwloc_debug("Sorting memory node infos...\n");
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
#ifdef HWLOC_DEBUG
for(i=0; i<n; i++)
hwloc_debug(" node info %u = node L#%u P#%u with info type %lx and local BW %llu lat %llu\n",
i,
nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index,
nodeinfos[i].type,
(unsigned long long) nodeinfos[i].local_bw,
(unsigned long long) nodeinfos[i].local_lat);
#endif
/* now we have UNKNOWN nodes (sorted by BW only), then known ones */
/* iterate among them and add a rank value.
* start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */
hwloc_debug("Starting memory tier #0 and iterating over nodes...\n");
nodeinfos[0].rank = 0;
for(i=1; i<n; i++) {
/* reuse the same rank by default */
nodeinfos[i].rank = nodeinfos[i-1].rank;
/* comparing type */
if (nodeinfos[i].type != nodeinfos[i-1].type) {
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of type\n",
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
nodeinfos[i].rank++;
continue;
}
/* comparing bandwidth */
if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) {
float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw;
if (bw_ratio < 1.)
bw_ratio = 1./bw_ratio;
if (bw_ratio > 1.0 + bw_threshold) {
nodeinfos[i].rank++;
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n",
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
continue;
}
}
/* comparing latency */
if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) {
float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat;
if (lat_ratio < 1.)
lat_ratio = 1./lat_ratio;
if (lat_ratio > 1.0 + lat_threshold) {
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n",
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
nodeinfos[i].rank++;
continue;
}
}
}
/* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */
hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1);
/* now group nodeinfos into factorized tiers */
nr_tiers = nodeinfos[n-1].rank + 1;
tiers = calloc(nr_tiers, sizeof(*tiers));
if (!tiers)
goto out_with_nodeinfos;
for(i=0; i<nr_tiers; i++) {
tiers[i].nodeset = hwloc_bitmap_alloc();
if (!tiers[i].nodeset)
goto out_with_tiers;
tiers[i].local_bw_min = tiers[i].local_bw_max = 0;
tiers[i].local_lat_min = tiers[i].local_lat_max = 0;
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
}
for(i=0; i<n; i++) {
unsigned rank = nodeinfos[i].rank;
assert(rank < nr_tiers);
hwloc_bitmap_set(tiers[rank].nodeset, nodeinfos[i].node->os_index);
assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN
|| tiers[rank].type == nodeinfos[i].type);
tiers[rank].type = nodeinfos[i].type;
/* nodeinfos are sorted in BW order, no need to compare */
if (!tiers[rank].local_bw_min)
tiers[rank].local_bw_min = nodeinfos[i].local_bw;
tiers[rank].local_bw_max = nodeinfos[i].local_bw;
/* compare latencies to update min/max */
if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min)
tiers[rank].local_lat_min = nodeinfos[i].local_lat;
if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max)
tiers[rank].local_lat_max = nodeinfos[i].local_lat;
}
free(nodeinfos);
*nr_tiers_p = nr_tiers;
return tiers;
out_with_tiers:
for(i=0; i<nr_tiers; i++)
hwloc_bitmap_free(tiers[i].nodeset);
free(tiers);
out_with_nodeinfos:
free(nodeinfos);
return NULL;
}
enum hwloc_guess_memtiers_flag {
HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM = 1<<0,
HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM = 1<<1
};
static int
hwloc__guess_dram_hbm_tiers(struct hwloc_memory_tier_s *tier1,
struct hwloc_memory_tier_s *tier2,
unsigned long flags)
{
struct hwloc_memory_tier_s *tmp;
if (!tier1->local_bw_min || !tier2->local_bw_min) {
hwloc_debug(" Missing BW info\n");
return -1;
}
/* reorder tiers by BW */
if (tier1->local_bw_min > tier2->local_bw_min) {
tmp = tier1; tier1 = tier2; tier2 = tmp;
}
/* tier1 < tier2 */
hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n",
(unsigned long long) tier1->local_bw_min,
(unsigned long long) tier1->local_bw_max,
(unsigned long long) tier2->local_bw_min,
(unsigned long long) tier2->local_bw_max);
if (tier2->local_bw_min <= tier1->local_bw_max * 2) {
/* tier2 BW isn't 2x tier1, we cannot guess HBM */
hwloc_debug(" BW difference isn't >2x\n");
return -1;
}
/* tier2 BW is >2x tier1 */
if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM)
&& hwloc_bitmap_isset(tier2->nodeset, 0)) {
/* node0 is not DRAM, and we assume that's not possible */
hwloc_debug(" node0 shouldn't have HBM BW\n");
return -1;
}
/* assume tier1 == DRAM and tier2 == HBM */
tier1->type = HWLOC_MEMORY_TIER_DRAM;
tier2->type = HWLOC_MEMORY_TIER_HBM;
hwloc_debug(" Success\n");
return 0;
}
static int
hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused,
unsigned nr_tiers,
struct hwloc_memory_tier_s *tiers)
{
unsigned long flags;
const char *env;
unsigned nr_unknown, nr_spm;
struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier;
unsigned i;
flags = 0;
env = getenv("HWLOC_MEMTIERS_GUESS");
if (env) {
if (!strcmp(env, "none"))
return 0;
/* by default, we don't guess anything unsure */
if (!strcmp(env, "all"))
/* enable all typical cases */
flags = ~0UL;
if (strstr(env, "spm_is_hbm")) {
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM;
}
if (strstr(env, "node0_is_dram")) {
hwloc_debug("Assuming node0 is DRAM\n");
flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM;
}
}
if (nr_tiers == 1)
/* Likely DRAM only, but could also be HBM-only in non-SPM mode.
* We cannot be sure, but it doesn't matter since there's a single tier.
*/
return 0;
nr_unknown = nr_spm = 0;
unknown_tier[0] = unknown_tier[1] = spm_tier = NULL;
for(i=0; i<nr_tiers; i++) {
switch (tiers[i].type) {
case HWLOC_MEMORY_TIER_UNKNOWN:
if (nr_unknown < 2)
unknown_tier[nr_unknown] = &tiers[i];
nr_unknown++;
break;
case HWLOC_MEMORY_TIER_SPM:
spm_tier = &tiers[i];
nr_spm++;
break;
case HWLOC_MEMORY_TIER_DRAM:
case HWLOC_MEMORY_TIER_HBM:
/* not possible */
abort();
default:
/* ignore HBM, NVM, ... */
break;
}
}
hwloc_debug("Found %u unknown memory tiers and %u SPM\n",
nr_unknown, nr_spm);
/* Try to guess DRAM + HBM common cases.
* Other things we'd like to detect:
* single unknown => DRAM or HBM? HBM won't be SPM on HBM-only CPUs
* unknown + CXL DRAM => DRAM or HBM?
*/
if (nr_unknown == 2 && !nr_spm) {
/* 2 unknown, could be DRAM + non-SPM HBM */
hwloc_debug(" Trying to guess 2 unknown tiers using BW\n");
hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags);
} else if (nr_unknown == 1 && nr_spm == 1) {
/* 1 unknown + 1 SPM, could be DRAM + SPM HBM */
hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n");
hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags);
}
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) {
/* force mark SPM as HBM */
for(i=0; i<nr_tiers; i++)
if (tiers[i].type == HWLOC_MEMORY_TIER_SPM) {
hwloc_debug("Forcing SPM tier to HBM");
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
}
}
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) {
/* force mark node0's tier as DRAM if we couldn't guess it */
for(i=0; i<nr_tiers; i++)
if (hwloc_bitmap_isset(tiers[i].nodeset, 0)
&& tiers[i].type == HWLOC_MEMORY_TIER_UNKNOWN) {
hwloc_debug("Forcing node0 tier to DRAM");
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
break;
}
}
return 0;
}
/* parses something like 0xf=HBM;0x0f=DRAM;0x00f=CXL-DRAM */
static struct hwloc_memory_tier_s *
hwloc__force_memory_tiers(hwloc_topology_t topology __hwloc_attribute_unused,
unsigned *nr_tiers_p,
const char *_env)
{
struct hwloc_memory_tier_s *tiers = NULL;
unsigned nr_tiers, i;
hwloc_bitmap_t nodeset = NULL;
char *env;
const char *tmp;
env = strdup(_env);
if (!env) {
fprintf(stderr, "[hwloc/memtiers] failed to duplicate HWLOC_MEMTIERS envvar\n");
goto out;
}
tmp = env;
nr_tiers = 1;
while (1) {
tmp = strchr(tmp, ';');
if (!tmp)
break;
tmp++;
nr_tiers++;
}
nodeset = hwloc_bitmap_alloc();
if (!nodeset) {
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers' nodeset\n");
goto out_with_envvar;
}
tiers = calloc(nr_tiers, sizeof(*tiers));
if (!tiers) {
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers\n");
goto out_with_nodeset;
}
nr_tiers = 0;
tmp = env;
while (1) {
char *end;
char *equal;
hwloc_memory_tier_type_t type;
end = strchr(tmp, ';');
if (end)
*end = '\0';
equal = strchr(tmp, '=');
if (!equal) {
fprintf(stderr, "[hwloc/memtiers] missing `=' before end of forced tier description at `%s'\n", tmp);
goto out_with_tiers;
}
*equal = '\0';
hwloc_bitmap_sscanf(nodeset, tmp);
if (hwloc_bitmap_iszero(nodeset)) {
fprintf(stderr, "[hwloc/memtiers] empty forced tier nodeset `%s', aborting\n", tmp);
goto out_with_tiers;
}
type = hwloc_memory_tier_type_sscanf(equal+1);
if (!type)
hwloc_debug("failed to recognize forced tier type `%s'\n", equal+1);
tiers[nr_tiers].nodeset = hwloc_bitmap_dup(nodeset);
tiers[nr_tiers].type = type;
tiers[nr_tiers].local_bw_min = tiers[nr_tiers].local_bw_max = 0;
tiers[nr_tiers].local_lat_min = tiers[nr_tiers].local_lat_max = 0;
nr_tiers++;
if (!end)
break;
tmp = end+1;
}
free(env);
hwloc_bitmap_free(nodeset);
hwloc_debug("Forcing %u memory tiers\n", nr_tiers);
#ifdef HWLOC_DEBUG
for(i=0; i<nr_tiers; i++) {
char *s;
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
hwloc_debug(" tier #%u type %lx nodeset %s\n", i, tiers[i].type, s);
free(s);
}
#endif
*nr_tiers_p = nr_tiers;
return tiers;
out_with_tiers:
for(i=0; i<nr_tiers; i++)
hwloc_bitmap_free(tiers[i].nodeset);
free(tiers);
out_with_nodeset:
hwloc_bitmap_free(nodeset);
out_with_envvar:
free(env);
out:
return NULL;
}
static void
hwloc__apply_memory_tiers_subtypes(hwloc_topology_t topology,
unsigned nr_tiers,
struct hwloc_memory_tier_s *tiers,
int force)
{
hwloc_obj_t node = NULL;
hwloc_debug("Marking node tiers\n");
while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) {
unsigned j;
for(j=0; j<nr_tiers; j++) {
if (hwloc_bitmap_isset(tiers[j].nodeset, node->os_index)) {
const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type);
if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */
if (subtype) { /* don't set a subtype for unknown tiers */
hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype);
free(node->subtype);
node->subtype = strdup(subtype);
}
} else
hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n",
node->logical_index, node->os_index, node->subtype, subtype);
if (nr_tiers > 1) {
char tmp[20];
snprintf(tmp, sizeof(tmp), "%u", j);
hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1);
}
break; /* each node is in a single tier */
}
}
}
if (nr_tiers > 1) {
hwloc_obj_t root = hwloc_get_root_obj(topology);
char tmp[20];
snprintf(tmp, sizeof(tmp), "%u", nr_tiers);
hwloc__add_info_nodup(&root->infos, &root->infos_count, "MemoryTiersNr", tmp, 1);
}
}
int
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype)
{
struct hwloc_memory_tier_s *tiers;
unsigned nr_tiers;
unsigned i;
const char *env;
env = getenv("HWLOC_MEMTIERS");
if (env) {
if (!strcmp(env, "none"))
goto out;
tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env);
if (tiers) {
assert(nr_tiers > 0);
force_subtype = 1;
goto ready;
}
}
tiers = hwloc__group_memory_tiers(topology, &nr_tiers);
if (!tiers)
goto out;
hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers);
/* sort tiers by BW first, then by type */
hwloc_debug("Sorting memory tiers...\n");
qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type);
ready:
#ifdef HWLOC_DEBUG
for(i=0; i<nr_tiers; i++) {
char *s;
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
hwloc_debug(" tier %u = nodes %s with type %lx and local BW %llu-%llu lat %llu-%llu\n",
i,
s, tiers[i].type,
(unsigned long long) tiers[i].local_bw_min,
(unsigned long long) tiers[i].local_bw_max,
(unsigned long long) tiers[i].local_lat_min,
(unsigned long long) tiers[i].local_lat_max);
free(s);
}
#endif
hwloc__apply_memory_tiers_subtypes(topology, nr_tiers, tiers, force_subtype);
for(i=0; i<nr_tiers; i++)
hwloc_bitmap_free(tiers[i].nodeset);
free(tiers);
out:
return 0;
}

View File

@@ -1,5 +1,5 @@
/* /*
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
*/ */
@@ -119,6 +119,13 @@ hwloc_pci_discovery_init(struct hwloc_topology *topology)
topology->pci_forced_locality = NULL; topology->pci_forced_locality = NULL;
topology->first_pci_locality = topology->last_pci_locality = NULL; topology->first_pci_locality = topology->last_pci_locality = NULL;
#define HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A (1ULL<<0)
#define HWLOC_PCI_LOCALITY_QUIRK_FAKE (1ULL<<62)
topology->pci_locality_quirks = (uint64_t) -1;
/* -1 is unknown, 0 is disabled, >0 is bitmask of enabled quirks.
* bit 63 should remain unused so that -1 is unaccessible as a bitmask.
*/
} }
void void
@@ -146,8 +153,9 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
} }
free(buffer); free(buffer);
} else { } else {
fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n", if (HWLOC_SHOW_CRITICAL_ERRORS())
env, (unsigned long) st.st_size); fprintf(stderr, "hwloc/pci: Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
env, (unsigned long) st.st_size);
} }
} }
close(fd); close(fd);
@@ -206,8 +214,11 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
else else
hwloc_debug("%s Bridge [%04x:%04x]", busid, hwloc_debug("%s Bridge [%04x:%04x]", busid,
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id); pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
hwloc_debug(" to %04x:[%02x:%02x]\n", if (pcidev->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus); hwloc_debug(" to %04x:[%02x:%02x]\n",
pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
else
assert(0);
} else } else
hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid, hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id, pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
@@ -251,11 +262,11 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
if (a->attr->pcidev.domain > b->attr->pcidev.domain) if (a->attr->pcidev.domain > b->attr->pcidev.domain)
return HWLOC_PCI_BUSID_HIGHER; return HWLOC_PCI_BUSID_HIGHER;
if (a->type == HWLOC_OBJ_BRIDGE if (a->type == HWLOC_OBJ_BRIDGE && a->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
&& b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus && b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
&& b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus) && b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
return HWLOC_PCI_BUSID_SUPERSET; return HWLOC_PCI_BUSID_SUPERSET;
if (b->type == HWLOC_OBJ_BRIDGE if (b->type == HWLOC_OBJ_BRIDGE && b->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
&& a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus && a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
&& a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus) && a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
return HWLOC_PCI_BUSID_INCLUDED; return HWLOC_PCI_BUSID_INCLUDED;
@@ -302,7 +313,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs
new->next_sibling = *curp; new->next_sibling = *curp;
*curp = new; *curp = new;
new->parent = parent; new->parent = parent;
if (new->type == HWLOC_OBJ_BRIDGE) { if (new->type == HWLOC_OBJ_BRIDGE && new->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
/* look at remaining siblings and move some below new */ /* look at remaining siblings and move some below new */
childp = &new->io_first_child; childp = &new->io_first_child;
curp = &new->next_sibling; curp = &new->next_sibling;
@@ -329,7 +340,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs
} }
case HWLOC_PCI_BUSID_EQUAL: { case HWLOC_PCI_BUSID_EQUAL: {
static int reported = 0; static int reported = 0;
if (!reported && !hwloc_hide_errors()) { if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) {
fprintf(stderr, "*********************************************************\n"); fprintf(stderr, "*********************************************************\n");
fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION); fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION);
fprintf(stderr, "*\n"); fprintf(stderr, "*\n");
@@ -411,7 +422,7 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
dstnextp = &child->next_sibling; dstnextp = &child->next_sibling;
/* compute hostbridge secondary/subordinate buses */ /* compute hostbridge secondary/subordinate buses */
if (child->type == HWLOC_OBJ_BRIDGE if (child->type == HWLOC_OBJ_BRIDGE && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
&& child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate) && child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus; current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
@@ -438,13 +449,90 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
return new; return new;
} }
static struct hwloc_obj * /* return 1 if a quirk was applied */
hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused, static int
struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused, hwloc__pci_find_busid_parent_quirk(struct hwloc_topology *topology,
struct hwloc_obj *parent __hwloc_attribute_unused) struct hwloc_pcidev_attr_s *busid,
hwloc_cpuset_t cpuset)
{ {
/* no quirk for now */ if (topology->pci_locality_quirks == (uint64_t)-1 /* unknown */) {
return parent; const char *dmi_board_name, *env;
/* first invokation, detect which quirks are needed */
topology->pci_locality_quirks = 0; /* no quirk yet */
dmi_board_name = hwloc_obj_get_info_by_name(hwloc_get_root_obj(topology), "DMIBoardName");
if (dmi_board_name && !strcmp(dmi_board_name, "HPE CRAY EX235A")) {
hwloc_debug("enabling for PCI locality quirk for HPE Cray EX235A\n");
topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A;
}
env = getenv("HWLOC_PCI_LOCALITY_QUIRK_FAKE");
if (env && atoi(env)) {
hwloc_debug("enabling for PCI locality fake quirk (attaching everything to last PU)\n");
topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_FAKE;
}
}
if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_FAKE) {
unsigned last = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology));
hwloc_bitmap_set(cpuset, last);
return 1;
}
if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A) {
/* AMD Trento has xGMI ports connected to individual CCDs (8 cores + L3)
* instead of NUMA nodes (pairs of CCDs within Trento) as is usual in AMD EPYC CPUs.
* This is not described by the ACPI tables, hence we need to manually hardwire
* the xGMI locality for the (currently single) server that currently uses that CPU.
* It's not clear if ACPI tables can/will ever be fixed (would require one initiator
* proximity domain per CCD), or if Linux can/will work around the issue.
*/
if (busid->domain == 0) {
if (busid->bus >= 0xd0 && busid->bus <= 0xd1) {
hwloc_bitmap_set_range(cpuset, 0, 7);
hwloc_bitmap_set_range(cpuset, 64, 71);
return 1;
}
if (busid->bus >= 0xd4 && busid->bus <= 0xd6) {
hwloc_bitmap_set_range(cpuset, 8, 15);
hwloc_bitmap_set_range(cpuset, 72, 79);
return 1;
}
if (busid->bus >= 0xc8 && busid->bus <= 0xc9) {
hwloc_bitmap_set_range(cpuset, 16, 23);
hwloc_bitmap_set_range(cpuset, 80, 87);
return 1;
}
if (busid->bus >= 0xcc && busid->bus <= 0xce) {
hwloc_bitmap_set_range(cpuset, 24, 31);
hwloc_bitmap_set_range(cpuset, 88, 95);
return 1;
}
if (busid->bus >= 0xd8 && busid->bus <= 0xd9) {
hwloc_bitmap_set_range(cpuset, 32, 39);
hwloc_bitmap_set_range(cpuset, 96, 103);
return 1;
}
if (busid->bus >= 0xdc && busid->bus <= 0xde) {
hwloc_bitmap_set_range(cpuset, 40, 47);
hwloc_bitmap_set_range(cpuset, 104, 111);
return 1;
}
if (busid->bus >= 0xc0 && busid->bus <= 0xc1) {
hwloc_bitmap_set_range(cpuset, 48, 55);
hwloc_bitmap_set_range(cpuset, 112, 119);
return 1;
}
if (busid->bus >= 0xc4 && busid->bus <= 0xc6) {
hwloc_bitmap_set_range(cpuset, 56, 63);
hwloc_bitmap_set_range(cpuset, 120, 127);
return 1;
}
}
}
return 0;
} }
static struct hwloc_obj * static struct hwloc_obj *
@@ -453,7 +541,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc(); hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
hwloc_obj_t parent; hwloc_obj_t parent;
int forced = 0; int forced = 0;
int noquirks = 0; int noquirks = 0, got_quirked = 0;
unsigned i; unsigned i;
int err; int err;
@@ -486,7 +574,8 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
if (env) { if (env) {
static int reported = 0; static int reported = 0;
if (!topology->pci_has_forced_locality && !reported) { if (!topology->pci_has_forced_locality && !reported) {
fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env); if (HWLOC_SHOW_ALL_ERRORS())
fprintf(stderr, "hwloc/pci: Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
reported = 1; reported = 1;
} }
if (*env) { if (*env) {
@@ -500,7 +589,13 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
} }
} }
if (!forced) { if (!forced && !noquirks && topology->pci_locality_quirks /* either quirks are unknown yet, or some are enabled */) {
err = hwloc__pci_find_busid_parent_quirk(topology, busid, cpuset);
if (err > 0)
got_quirked = 1;
}
if (!forced && !got_quirked) {
/* get the cpuset by asking the backend that provides the relevant hook, if any. */ /* get the cpuset by asking the backend that provides the relevant hook, if any. */
struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend; struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
if (backend) if (backend)
@@ -515,11 +610,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset); hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset);
parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset); parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
if (parent) { if (!parent) {
if (!noquirks)
/* We found a valid parent. Check that the OS didn't report invalid locality */
parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
} else {
/* Fallback to root */ /* Fallback to root */
parent = hwloc_get_root_obj(topology); parent = hwloc_get_root_obj(topology);
} }
@@ -565,7 +656,7 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tre
assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
|| (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI)); || (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
if (obj->type == HWLOC_OBJ_BRIDGE) { if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
domain = obj->attr->bridge.downstream.pci.domain; domain = obj->attr->bridge.downstream.pci.domain;
bus_min = obj->attr->bridge.downstream.pci.secondary_bus; bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
bus_max = obj->attr->bridge.downstream.pci.subordinate_bus; bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
@@ -795,26 +886,12 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config,
unsigned offset, float *linkspeed) unsigned offset, float *linkspeed)
{ {
unsigned linksta, speed, width; unsigned linksta, speed, width;
float lanespeed;
memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4); memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */ speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */ width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane
* PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
* PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
*/
/* lanespeed in Gbit/s */ *linkspeed = hwloc__pci_link_speed(speed, width);
if (speed <= 2)
lanespeed = 2.5f * speed * 0.8f;
else
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
/* linkspeed in GB/s */
*linkspeed = lanespeed * width / 8;
return 0; return 0;
} }
@@ -938,6 +1015,7 @@ hwloc_pci_class_string(unsigned short class_id)
switch (class_id) { switch (class_id) {
case 0x0500: return "RAM"; case 0x0500: return "RAM";
case 0x0501: return "Flash"; case 0x0501: return "Flash";
case 0x0502: return "CXLMem";
} }
return "Memory"; return "Memory";
case 0x06: case 0x06:

View File

@@ -23,6 +23,7 @@ struct hwloc_shmem_header {
uint32_t header_length; /* where the actual topology starts in the file/mapping */ uint32_t header_length; /* where the actual topology starts in the file/mapping */
uint64_t mmap_address; /* virtual address to pass to mmap */ uint64_t mmap_address; /* virtual address to pass to mmap */
uint64_t mmap_length; /* length to pass to mmap (includes the header) */ uint64_t mmap_length; /* length to pass to mmap (includes the header) */
/* we will pad the end to a multiple of pointer size so that the topology is well aligned */
}; };
#define HWLOC_SHMEM_MALLOC_ALIGN 8UL #define HWLOC_SHMEM_MALLOC_ALIGN 8UL
@@ -85,6 +86,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
hwloc_topology_t new; hwloc_topology_t new;
struct hwloc_tma tma; struct hwloc_tma tma;
struct hwloc_shmem_header header; struct hwloc_shmem_header header;
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
void *mmap_res; void *mmap_res;
int err; int err;
@@ -100,7 +102,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
hwloc_internal_memattrs_refresh(topology); hwloc_internal_memattrs_refresh(topology);
header.header_version = HWLOC_SHMEM_HEADER_VERSION; header.header_version = HWLOC_SHMEM_HEADER_VERSION;
header.header_length = sizeof(header); header.header_length = header_length;
header.mmap_address = (uintptr_t) mmap_address; header.mmap_address = (uintptr_t) mmap_address;
header.mmap_length = length; header.mmap_length = length;
@@ -127,7 +129,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
tma.malloc = tma_shmem_malloc; tma.malloc = tma_shmem_malloc;
tma.dontfree = 1; tma.dontfree = 1;
tma.data = (char *)mmap_res + sizeof(header); tma.data = (char *)mmap_res + header_length;
err = hwloc__topology_dup(&new, topology, &tma); err = hwloc__topology_dup(&new, topology, &tma);
if (err < 0) if (err < 0)
return err; return err;
@@ -154,6 +156,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
{ {
hwloc_topology_t new, old; hwloc_topology_t new, old;
struct hwloc_shmem_header header; struct hwloc_shmem_header header;
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
void *mmap_res; void *mmap_res;
int err; int err;
@@ -171,7 +174,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
return -1; return -1;
if (header.header_version != HWLOC_SHMEM_HEADER_VERSION if (header.header_version != HWLOC_SHMEM_HEADER_VERSION
|| header.header_length != sizeof(header) || header.header_length != header_length
|| header.mmap_address != (uintptr_t) mmap_address || header.mmap_address != (uintptr_t) mmap_address
|| header.mmap_length != length) { || header.mmap_length != length) {
errno = EINVAL; errno = EINVAL;
@@ -186,7 +189,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
goto out_with_mmap; goto out_with_mmap;
} }
old = (hwloc_topology_t)((char*)mmap_address + sizeof(header)); old = (hwloc_topology_t)((char*)mmap_address + header_length);
if (hwloc_topology_abi_check(old) < 0) { if (hwloc_topology_abi_check(old) < 0) {
errno = EINVAL; errno = EINVAL;
goto out_with_mmap; goto out_with_mmap;

View File

@@ -1,4 +1,9 @@
#include <private/internal-components.h> HWLOC_DECLSPEC extern const struct hwloc_component hwloc_noos_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_synthetic_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_xml_nolibxml_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_windows_component;
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_x86_component;
static const struct hwloc_component * hwloc_static_components[] = { static const struct hwloc_component * hwloc_static_components[] = {
&hwloc_noos_component, &hwloc_noos_component,
&hwloc_xml_component, &hwloc_xml_component,

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2023 Inria. All rights reserved.
* Copyright © 2009-2010 Université Bordeaux * Copyright © 2009-2010 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -23,6 +23,7 @@ struct hwloc_synthetic_attr_s {
unsigned depth; /* For caches/groups */ unsigned depth; /* For caches/groups */
hwloc_obj_cache_type_t cachetype; /* For caches */ hwloc_obj_cache_type_t cachetype; /* For caches */
hwloc_uint64_t memorysize; /* For caches/memory */ hwloc_uint64_t memorysize; /* For caches/memory */
hwloc_uint64_t memorysidecachesize; /* Single level of memory-side-cache in-front of a NUMA node */
}; };
struct hwloc_synthetic_indexes_s { struct hwloc_synthetic_indexes_s {
@@ -323,17 +324,29 @@ hwloc_synthetic_parse_memory_attr(const char *attr, const char **endp)
hwloc_uint64_t size; hwloc_uint64_t size;
size = strtoull(attr, (char **) &endptr, 0); size = strtoull(attr, (char **) &endptr, 0);
if (!hwloc_strncasecmp(endptr, "TB", 2)) { if (!hwloc_strncasecmp(endptr, "TB", 2)) {
size *= 1000ULL*1000ULL*1000ULL*1000ULL;
endptr += 2;
} else if (!hwloc_strncasecmp(endptr, "TiB", 3)) {
size <<= 40; size <<= 40;
endptr += 2; endptr += 3;
} else if (!hwloc_strncasecmp(endptr, "GB", 2)) { } else if (!hwloc_strncasecmp(endptr, "GB", 2)) {
size *= 1000ULL*1000ULL*1000ULL;
endptr += 2;
} else if (!hwloc_strncasecmp(endptr, "GiB", 3)) {
size <<= 30; size <<= 30;
endptr += 2; endptr += 3;
} else if (!hwloc_strncasecmp(endptr, "MB", 2)) { } else if (!hwloc_strncasecmp(endptr, "MB", 2)) {
size *= 1000ULL*1000ULL;
endptr += 2;
} else if (!hwloc_strncasecmp(endptr, "MiB", 3)) {
size <<= 20; size <<= 20;
endptr += 2; endptr += 3;
} else if (!hwloc_strncasecmp(endptr, "kB", 2)) { } else if (!hwloc_strncasecmp(endptr, "kB", 2)) {
size <<= 10; size *= 1000ULL;
endptr += 2; endptr += 2;
} else if (!hwloc_strncasecmp(endptr, "kiB", 3)) {
size <<= 10;
endptr += 3;
} }
*endp = endptr; *endp = endptr;
return size; return size;
@@ -368,6 +381,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
} else if (!iscache && !strncmp("memory=", attrs, 7)) { } else if (!iscache && !strncmp("memory=", attrs, 7)) {
memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs); memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs);
} else if (!strncmp("memorysidecachesize=", attrs, 20)) {
sattr->memorysidecachesize = hwloc_synthetic_parse_memory_attr(attrs+20, &attrs);
} else if (!strncmp("indexes=", attrs, 8)) { } else if (!strncmp("indexes=", attrs, 8)) {
index_string = attrs+8; index_string = attrs+8;
attrs += 8; attrs += 8;
@@ -375,10 +391,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
attrs += index_string_length; attrs += index_string_length;
} else { } else {
if (verbose) size_t length = strcspn(attrs, " )");
fprintf(stderr, "Unknown attribute at '%s'\n", attrs); fprintf(stderr, "hwloc/synthetic: Ignoring unknown attribute at '%s'\n", attrs);
errno = EINVAL; attrs += length;
return -1;
} }
if (' ' == *attrs) if (' ' == *attrs)
@@ -404,6 +419,32 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
return 0; return 0;
} }
static void
hwloc_synthetic_set_default_attrs(struct hwloc_synthetic_attr_s *sattr,
int *type_count)
{
hwloc_obj_type_t type = sattr->type;
if (type == HWLOC_OBJ_GROUP) {
if (sattr->depth == (unsigned)-1)
sattr->depth = type_count[HWLOC_OBJ_GROUP]--;
} else if (hwloc__obj_type_is_cache(type)) {
if (!sattr->memorysize) {
if (1 == sattr->depth)
/* 32KiB in L1 */
sattr->memorysize = 32*1024;
else
/* *4 at each level, starting from 1MiB for L2, unified */
sattr->memorysize = 256ULL*1024 << (2*sattr->depth);
}
} else if (type == HWLOC_OBJ_NUMANODE && !sattr->memorysize) {
/* 1GiB in memory nodes. */
sattr->memorysize = 1024*1024*1024;
}
}
/* frees level until arity = 0 */ /* frees level until arity = 0 */
static void static void
hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data) hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data)
@@ -453,6 +494,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
data->level[0].indexes.string = NULL; data->level[0].indexes.string = NULL;
data->level[0].indexes.array = NULL; data->level[0].indexes.array = NULL;
data->level[0].attr.memorysize = 0; data->level[0].attr.memorysize = 0;
data->level[0].attr.memorysidecachesize = 0;
data->level[0].attached = NULL; data->level[0].attached = NULL;
type_count[HWLOC_OBJ_MACHINE] = 1; type_count[HWLOC_OBJ_MACHINE] = 1;
if (*description == '(') { if (*description == '(') {
@@ -502,6 +544,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
if (attached) { if (attached) {
attached->attr.type = type; attached->attr.type = type;
attached->attr.memorysize = 0; attached->attr.memorysize = 0;
attached->attr.memorysidecachesize = 0;
/* attached->attr.depth and .cachetype unused */ /* attached->attr.depth and .cachetype unused */
attached->next = NULL; attached->next = NULL;
pprev = &data->level[count-1].attached; pprev = &data->level[count-1].attached;
@@ -589,7 +632,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
} }
if (!item) { if (!item) {
if (verbose) if (verbose)
fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos); fprintf(stderr,"Synthetic string with disallowed 0 number of objects at '%s'\n", pos);
errno = EINVAL; errno = EINVAL;
goto error; goto error;
} }
@@ -599,6 +642,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
data->level[count].indexes.string = NULL; data->level[count].indexes.string = NULL;
data->level[count].indexes.array = NULL; data->level[count].indexes.array = NULL;
data->level[count].attr.memorysize = 0; data->level[count].attr.memorysize = 0;
data->level[count].attr.memorysidecachesize = 0;
if (*next_pos == '(') { if (*next_pos == '(') {
err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose); err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose);
if (err < 0) if (err < 0)
@@ -784,6 +828,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
data->level[1].indexes.string = NULL; data->level[1].indexes.string = NULL;
data->level[1].indexes.array = NULL; data->level[1].indexes.array = NULL;
data->level[1].attr.memorysize = 0; data->level[1].attr.memorysize = 0;
data->level[1].attr.memorysidecachesize = 0;
data->level[1].totalwidth = data->level[0].totalwidth; data->level[1].totalwidth = data->level[0].totalwidth;
/* update arity to insert a single NUMA node per parent */ /* update arity to insert a single NUMA node per parent */
data->level[1].arity = data->level[0].arity; data->level[1].arity = data->level[0].arity;
@@ -791,30 +836,14 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
count++; count++;
} }
/* set default attributes that depend on the depth/hierarchy of levels */
for (i=0; i<count; i++) { for (i=0; i<count; i++) {
struct hwloc_synthetic_attached_s *attached;
struct hwloc_synthetic_level_data_s *curlevel = &data->level[i]; struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
hwloc_obj_type_t type = curlevel->attr.type; hwloc_synthetic_set_default_attrs(&curlevel->attr, type_count);
for(attached = curlevel->attached; attached != NULL; attached = attached->next)
if (type == HWLOC_OBJ_GROUP) { hwloc_synthetic_set_default_attrs(&attached->attr, type_count);
if (curlevel->attr.depth == (unsigned)-1) hwloc_synthetic_process_indexes(data, &curlevel->indexes, curlevel->totalwidth, verbose);
curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--;
} else if (hwloc__obj_type_is_cache(type)) {
if (!curlevel->attr.memorysize) {
if (1 == curlevel->attr.depth)
/* 32Kb in L1 */
curlevel->attr.memorysize = 32*1024;
else
/* *4 at each level, starting from 1MB for L2, unified */
curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth);
}
} else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) {
/* 1GB in memory nodes. */
curlevel->attr.memorysize = 1024*1024*1024;
}
hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose);
} }
hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose); hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose);
@@ -847,6 +876,12 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
obj->attr->numanode.page_types[0].size = 4096; obj->attr->numanode.page_types[0].size = 4096;
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096; obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
break; break;
case HWLOC_OBJ_MEMCACHE:
obj->attr->cache.depth = 1;
obj->attr->cache.linesize = 64;
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
obj->attr->cache.size = sattr->memorysidecachesize;
break;
case HWLOC_OBJ_PACKAGE: case HWLOC_OBJ_PACKAGE:
case HWLOC_OBJ_DIE: case HWLOC_OBJ_DIE:
break; break;
@@ -914,6 +949,14 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology,
hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached"); hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached");
if (attached->attr.memorysidecachesize) {
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
mscachechild->cpuset = hwloc_bitmap_dup(set);
mscachechild->nodeset = hwloc_bitmap_dup(child->nodeset);
hwloc_synthetic_set_attr(&attached->attr, mscachechild);
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:attached:mscache");
}
hwloc_synthetic_insert_attached(topology, data, attached->next, set); hwloc_synthetic_insert_attached(topology, data, attached->next, set);
} }
@@ -965,6 +1008,14 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
hwloc_synthetic_set_attr(&curlevel->attr, obj); hwloc_synthetic_set_attr(&curlevel->attr, obj);
hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic"); hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic");
if (type == HWLOC_OBJ_NUMANODE && curlevel->attr.memorysidecachesize) {
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
mscachechild->cpuset = hwloc_bitmap_dup(set);
mscachechild->nodeset = hwloc_bitmap_dup(obj->nodeset);
hwloc_synthetic_set_attr(&curlevel->attr, mscachechild);
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:mscache");
}
} }
hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set); hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set);
@@ -1205,6 +1256,7 @@ hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total,
static int static int
hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology, hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
unsigned long flags,
hwloc_obj_t obj, hwloc_obj_t obj,
char *buffer, size_t buflen) char *buffer, size_t buflen)
{ {
@@ -1212,6 +1264,7 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
const char * prefix = "("; const char * prefix = "(";
char cachesize[64] = ""; char cachesize[64] = "";
char memsize[64] = ""; char memsize[64] = "";
char memorysidecachesize[64] = "";
int needindexes = 0; int needindexes = 0;
if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) { if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) {
@@ -1224,6 +1277,19 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
prefix, (unsigned long long) obj->attr->numanode.local_memory); prefix, (unsigned long long) obj->attr->numanode.local_memory);
prefix = separator; prefix = separator;
} }
if (obj->type == HWLOC_OBJ_NUMANODE && !(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1)) {
hwloc_obj_t memorysidecache = obj->parent;
hwloc_uint64_t size = 0;
while (memorysidecache && memorysidecache->type == HWLOC_OBJ_MEMCACHE) {
size += memorysidecache->attr->cache.size;
memorysidecache = memorysidecache->parent;
}
if (size) {
snprintf(memorysidecachesize, sizeof(memorysidecachesize), "%smemorysidecachesize=%llu",
prefix, (unsigned long long) size);
prefix = separator;
}
}
if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */ if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */
&& (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) { && (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) {
hwloc_obj_t cur = obj; hwloc_obj_t cur = obj;
@@ -1235,12 +1301,12 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
cur = cur->next_cousin; cur = cur->next_cousin;
} }
} }
if (*cachesize || *memsize || needindexes) { if (*cachesize || *memsize || *memorysidecachesize || needindexes) {
ssize_t tmplen = buflen; ssize_t tmplen = buflen;
char *tmp = buffer; char *tmp = buffer;
int res, ret = 0; int res, ret = 0;
res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")"); res = hwloc_snprintf(tmp, tmplen, "%s%s%s%s", cachesize, memsize, memorysidecachesize, needindexes ? "" : ")");
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
return -1; return -1;
@@ -1314,7 +1380,7 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
/* obj attributes */ /* obj attributes */
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
return -1; return -1;
} }
@@ -1339,7 +1405,7 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
/* v1: export a single NUMA child */ /* v1: export a single NUMA child */
if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) { if (parent->memory_arity > 1) {
/* not supported */ /* not supported */
if (verbose) if (verbose)
fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n"); fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n");
@@ -1350,6 +1416,9 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
if (needprefix) if (needprefix)
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' '); hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
/* ignore memcaches and export the NUMA node */
while (mchild->type != HWLOC_OBJ_NUMANODE)
mchild = mchild->memory_first_child;
res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen); res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen);
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
return -1; return -1;
@@ -1357,16 +1426,25 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
} }
while (mchild) { while (mchild) {
/* FIXME: really recurse to export memcaches and numanode, /* The core doesn't support shared memcache for now (because ACPI and Linux don't).
* So, for each mchild here, recurse only in the first children at each level.
*
* FIXME: whenever supported by the core, really recurse to export memcaches and numanode,
* but it requires clever parsing of [ memcache [numa] [numa] ] during import, * but it requires clever parsing of [ memcache [numa] [numa] ] during import,
* better attaching of things to describe the hierarchy. * better attaching of things to describe the hierarchy.
*/ */
hwloc_obj_t numanode = mchild; hwloc_obj_t numanode = mchild;
/* only export the first NUMA node leaf of each memory child /* Only export the first NUMA node leaf of each memory child.
* FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms * Memcaches are ignored here, they will be summed and exported as a single attribute
* of the NUMA node in hwloc__export_synthetic_obj().
*/ */
while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) { while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
assert(numanode->arity == 1); if (verbose && numanode->memory_arity > 1) {
static int warned = 0;
if (!warned)
fprintf(stderr, "Ignoring non-first memory children at non-first level of memory hierarchy.\n");
warned = 1;
}
numanode = numanode->memory_first_child; numanode = numanode->memory_first_child;
} }
assert(numanode); /* there's always a numanode at the bottom of the memory tree */ assert(numanode); /* there's always a numanode at the bottom of the memory tree */
@@ -1499,17 +1577,21 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) { if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
/* v1 requires all NUMA at the same level */ /* v1 requires all NUMA at the same level */
hwloc_obj_t node; hwloc_obj_t node, parent;
signed pdepth; signed pdepth;
node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
assert(node); assert(node);
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ parent = node->parent;
pdepth = node->parent->depth; while (!hwloc__obj_type_is_normal(parent->type))
parent = parent->parent;
pdepth = parent->depth;
while ((node = node->next_cousin) != NULL) { while ((node = node->next_cousin) != NULL) {
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */ parent = node->parent;
if (node->parent->depth != pdepth) { while (!hwloc__obj_type_is_normal(parent->type))
parent = parent->parent;
if (parent->depth != pdepth) {
if (verbose) if (verbose)
fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n"); fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n");
errno = EINVAL; errno = EINVAL;
@@ -1522,7 +1604,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) { if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
/* obj attributes */ /* obj attributes */
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen); res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
if (res > 0) if (res > 0)
needprefix = 1; needprefix = 1;
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0) if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2025 Inria. All rights reserved.
* Copyright © 2009-2012, 2020 Université Bordeaux * Copyright © 2009-2012, 2020 Université Bordeaux
* Copyright © 2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -11,7 +11,9 @@
#include "private/autogen/config.h" #include "private/autogen/config.h"
#include "hwloc.h" #include "hwloc.h"
#include "hwloc/windows.h"
#include "private/private.h" #include "private/private.h"
#include "private/windows.h" /* must be before windows.h */
#include "private/debug.h" #include "private/debug.h"
#include <windows.h> #include <windows.h>
@@ -54,6 +56,9 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
RelationCache, RelationCache,
RelationProcessorPackage, RelationProcessorPackage,
RelationGroup, RelationGroup,
RelationProcessorDie,
RelationNumaNodeEx, /* only used to *request* extended numa info only, but included in RelationAll, never returned on output */
RelationProcessorModule,
RelationAll = 0xffff RelationAll = 0xffff
} LOGICAL_PROCESSOR_RELATIONSHIP; } LOGICAL_PROCESSOR_RELATIONSHIP;
#else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ #else /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
@@ -62,28 +67,13 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
# define RelationGroup 4 # define RelationGroup 4
# define RelationAll 0xffff # define RelationAll 0xffff
# endif /* HAVE_RELATIONPROCESSORPACKAGE */ # endif /* HAVE_RELATIONPROCESSORPACKAGE */
# ifndef HAVE_RELATIONPROCESSORDIE
# define RelationProcessorDie 5
# define RelationNumaNodeEx 6
# define RelationProcessorModule 7
# endif
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */ #endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
ULONG_PTR ProcessorMask;
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
_ANONYMOUS_UNION
union {
struct {
BYTE flags;
} ProcessorCore;
struct {
DWORD NodeNumber;
} NumaNode;
CACHE_DESCRIPTOR Cache;
ULONGLONG Reserved[2];
} DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
#endif
/* Extended interface, for group support */
#ifndef HAVE_GROUP_AFFINITY #ifndef HAVE_GROUP_AFFINITY
typedef struct _GROUP_AFFINITY { typedef struct _GROUP_AFFINITY {
KAFFINITY Mask; KAFFINITY Mask;
@@ -92,35 +82,40 @@ typedef struct _GROUP_AFFINITY {
} GROUP_AFFINITY, *PGROUP_AFFINITY; } GROUP_AFFINITY, *PGROUP_AFFINITY;
#endif #endif
#ifndef HAVE_PROCESSOR_RELATIONSHIP /* always use our own structure because the EfficiencyClass field didn't exist before Win10 */
typedef struct HWLOC_PROCESSOR_RELATIONSHIP { typedef struct HWLOC_PROCESSOR_RELATIONSHIP {
BYTE Flags; BYTE Flags;
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */ BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency */
BYTE Reserved[20]; BYTE Reserved[20];
WORD GroupCount; WORD GroupCount;
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY]; GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP; } HWLOC_PROCESSOR_RELATIONSHIP;
#endif
#ifndef HAVE_NUMA_NODE_RELATIONSHIP /* always use our own structure because the GroupCount and GroupMasks fields didn't exist in some Win10 */
typedef struct _NUMA_NODE_RELATIONSHIP { typedef struct HWLOC_NUMA_NODE_RELATIONSHIP {
DWORD NodeNumber; DWORD NodeNumber;
BYTE Reserved[20]; BYTE Reserved[18];
GROUP_AFFINITY GroupMask; WORD GroupCount;
} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP; _ANONYMOUS_UNION
#endif union {
GROUP_AFFINITY GroupMask;
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
} DUMMYUNIONNAME;
} HWLOC_NUMA_NODE_RELATIONSHIP;
#ifndef HAVE_CACHE_RELATIONSHIP typedef struct HWLOC_CACHE_RELATIONSHIP {
typedef struct _CACHE_RELATIONSHIP {
BYTE Level; BYTE Level;
BYTE Associativity; BYTE Associativity;
WORD LineSize; WORD LineSize;
DWORD CacheSize; DWORD CacheSize;
PROCESSOR_CACHE_TYPE Type; PROCESSOR_CACHE_TYPE Type;
BYTE Reserved[20]; BYTE Reserved[18];
GROUP_AFFINITY GroupMask; WORD GroupCount;
} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP; union {
#endif GROUP_AFFINITY GroupMask;
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
} DUMMYUNIONNAME;
} HWLOC_CACHE_RELATIONSHIP;
#ifndef HAVE_PROCESSOR_GROUP_INFO #ifndef HAVE_PROCESSOR_GROUP_INFO
typedef struct _PROCESSOR_GROUP_INFO { typedef struct _PROCESSOR_GROUP_INFO {
@@ -140,20 +135,19 @@ typedef struct _GROUP_RELATIONSHIP {
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP; } GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
#endif #endif
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX /* always use our own structure because we need our own HWLOC_PROCESSOR/CACHE/NUMA_NODE_RELATIONSHIP */
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX { typedef struct HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
LOGICAL_PROCESSOR_RELATIONSHIP Relationship; LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
DWORD Size; DWORD Size;
_ANONYMOUS_UNION _ANONYMOUS_UNION
union { union {
PROCESSOR_RELATIONSHIP Processor; HWLOC_PROCESSOR_RELATIONSHIP Processor;
NUMA_NODE_RELATIONSHIP NumaNode; HWLOC_NUMA_NODE_RELATIONSHIP NumaNode;
CACHE_RELATIONSHIP Cache; HWLOC_CACHE_RELATIONSHIP Cache;
GROUP_RELATIONSHIP Group; GROUP_RELATIONSHIP Group;
/* Odd: no member to tell the cpu mask of the package... */ /* Odd: no member to tell the cpu mask of the package... */
} DUMMYUNIONNAME; } DUMMYUNIONNAME;
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX; } HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
#endif
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK #ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
typedef union _PSAPI_WORKING_SET_EX_BLOCK { typedef union _PSAPI_WORKING_SET_EX_BLOCK {
@@ -190,9 +184,6 @@ typedef struct _PROCESSOR_NUMBER {
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void); typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void);
static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc; static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc;
static unsigned long nr_processor_groups = 1;
static unsigned long max_numanode_index = 0;
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD); typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD);
static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc; static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc;
@@ -202,10 +193,7 @@ static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc;
typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER); typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc; static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength); typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *Buffer, PDWORD ReturnLength);
static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc;
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc; static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity); typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
@@ -240,14 +228,12 @@ static void hwloc_win_get_function_ptrs(void)
#pragma GCC diagnostic ignored "-Wcast-function-type" #pragma GCC diagnostic ignored "-Wcast-function-type"
#endif #endif
kernel32 = LoadLibrary("kernel32.dll"); kernel32 = LoadLibrary(TEXT("kernel32.dll"));
if (kernel32) { if (kernel32) {
GetActiveProcessorGroupCountProc = GetActiveProcessorGroupCountProc =
(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount"); (PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
GetActiveProcessorCountProc = GetActiveProcessorCountProc =
(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount"); (PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
GetLogicalProcessorInformationProc =
(PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation");
GetCurrentProcessorNumberProc = GetCurrentProcessorNumberProc =
(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber"); (PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
GetCurrentProcessorNumberExProc = GetCurrentProcessorNumberExProc =
@@ -270,16 +256,13 @@ static void hwloc_win_get_function_ptrs(void)
(PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx"); (PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx");
} }
if (GetActiveProcessorGroupCountProc)
nr_processor_groups = GetActiveProcessorGroupCountProc();
if (!QueryWorkingSetExProc) { if (!QueryWorkingSetExProc) {
HMODULE psapi = LoadLibrary("psapi.dll"); HMODULE psapi = LoadLibrary(TEXT("psapi.dll"));
if (psapi) if (psapi)
QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx"); QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx");
} }
ntdll = GetModuleHandle("ntdll"); ntdll = GetModuleHandle(TEXT("ntdll"));
RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion"); RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion");
#if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE #if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE
@@ -363,6 +346,173 @@ static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *
return 0; return 0;
} }
/**********************
* Processor Groups
*/
static unsigned long max_numanode_index = 0;
static unsigned long nr_processor_groups = 1;
static hwloc_cpuset_t * processor_group_cpusets = NULL;
static void
hwloc_win_get_processor_groups(void)
{
HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
DWORD length;
unsigned i;
hwloc_debug("querying windows processor groups\n");
if (!GetLogicalProcessorInformationExProc)
goto error;
nr_processor_groups = GetActiveProcessorGroupCountProc();
if (!nr_processor_groups)
goto error;
hwloc_debug("found %lu windows processor groups\n", nr_processor_groups);
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
if (HWLOC_SHOW_CRITICAL_ERRORS())
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
}
length = 0;
procInfoTotal = NULL;
while (1) {
if (GetLogicalProcessorInformationExProc(RelationGroup, procInfoTotal, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
goto error;
tmpprocInfoTotal = realloc(procInfoTotal, length);
if (!tmpprocInfoTotal)
goto error_with_procinfo;
procInfoTotal = tmpprocInfoTotal;
}
processor_group_cpusets = calloc(nr_processor_groups, sizeof(*processor_group_cpusets));
if (!processor_group_cpusets)
goto error_with_procinfo;
for (procInfo = procInfoTotal;
(void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length);
procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) {
unsigned id;
assert(procInfo->Relationship == RelationGroup);
hwloc_debug("Found %u active windows processor groups\n",
(unsigned) procInfo->Group.ActiveGroupCount);
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
KAFFINITY mask;
hwloc_bitmap_t set;
set = hwloc_bitmap_alloc();
if (!set)
goto error_with_cpusets;
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
hwloc_debug("group %u with %u cpus mask 0x%llx\n", id,
(unsigned) procInfo->Group.GroupInfo[id].ActiveProcessorCount, (unsigned long long) mask);
/* KAFFINITY is ULONG_PTR */
hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
/* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
* ULONG_PTR is 32bits, so half the group is invisible?
* maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned?
*/
hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set);
processor_group_cpusets[id] = set;
}
}
free(procInfoTotal);
return;
error_with_cpusets:
for(i=0; i<nr_processor_groups; i++) {
if (processor_group_cpusets[i])
hwloc_bitmap_free(processor_group_cpusets[i]);
}
free(processor_group_cpusets);
processor_group_cpusets = NULL;
error_with_procinfo:
free(procInfoTotal);
error:
/* on error set nr to 1 and keep cpusets NULL. We'll use the topology cpuset whenever needed */
nr_processor_groups = 1;
}
static void
hwloc_win_free_processor_groups(void)
{
unsigned i;
for(i=0; i<nr_processor_groups; i++) {
if (processor_group_cpusets[i])
hwloc_bitmap_free(processor_group_cpusets[i]);
}
free(processor_group_cpusets);
processor_group_cpusets = NULL;
nr_processor_groups = 1;
}
int
hwloc_windows_get_nr_processor_groups(hwloc_topology_t topology, unsigned long flags)
{
if (!topology->is_loaded || !topology->is_thissystem) {
errno = EINVAL;
return -1;
}
if (flags) {
errno = EINVAL;
return -1;
}
return nr_processor_groups;
}
int
hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags)
{
if (!topology->is_loaded || !topology->is_thissystem) {
errno = EINVAL;
return -1;
}
if (!cpuset) {
errno = EINVAL;
return -1;
}
if (flags) {
errno = EINVAL;
return -1;
}
if (pg_index >= nr_processor_groups) {
errno = ENOENT;
return -1;
}
if (!processor_group_cpusets) {
assert(nr_processor_groups == 1);
/* we found no processor groups, return the entire topology as a single one */
hwloc_bitmap_copy(cpuset, topology->levels[0][0]->cpuset);
return 0;
}
if (!processor_group_cpusets[pg_index]) {
errno = ENOENT;
return -1;
}
hwloc_bitmap_copy(cpuset, processor_group_cpusets[pg_index]);
return 0;
}
/************************************************************** /**************************************************************
* hwloc PU numbering with respect to Windows processor groups * hwloc PU numbering with respect to Windows processor groups
* *
@@ -845,9 +995,15 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
OSVERSIONINFOEX osvi; OSVERSIONINFOEX osvi;
char versionstr[20]; char versionstr[20];
char hostname[122] = ""; char hostname[122] = "";
unsigned hostname_size = sizeof(hostname); #if !defined(__CYGWIN__)
DWORD hostname_size = sizeof(hostname);
#else
size_t hostname_size = sizeof(hostname);
#endif
int has_efficiencyclass = 0; int has_efficiencyclass = 0;
struct hwloc_win_efficiency_classes eclasses; struct hwloc_win_efficiency_classes eclasses;
char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS");
int keep_pgroup_objs = (env && atoi(env));
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
@@ -878,137 +1034,8 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
GetSystemInfo(&SystemInfo); GetSystemInfo(&SystemInfo);
if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo;
unsigned id;
unsigned i;
struct hwloc_obj *obj;
hwloc_obj_type_t type;
length = 0;
procInfo = NULL;
while (1) {
if (GetLogicalProcessorInformationProc(procInfo, &length))
break;
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
return -1;
tmpprocInfo = realloc(procInfo, length);
if (!tmpprocInfo) {
free(procInfo);
goto out;
}
procInfo = tmpprocInfo;
}
assert(!length || procInfo);
for (i = 0; i < length / sizeof(*procInfo); i++) {
/* Ignore unknown caches */
if (procInfo->Relationship == RelationCache
&& procInfo->Cache.Type != CacheUnified
&& procInfo->Cache.Type != CacheData
&& procInfo->Cache.Type != CacheInstruction)
continue;
id = HWLOC_UNKNOWN_INDEX;
switch (procInfo[i].Relationship) {
case RelationNumaNode:
type = HWLOC_OBJ_NUMANODE;
id = procInfo[i].NumaNode.NodeNumber;
gotnuma++;
if (id > max_numanode_index)
max_numanode_index = id;
break;
case RelationProcessorPackage:
type = HWLOC_OBJ_PACKAGE;
break;
case RelationCache:
type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1;
break;
case RelationProcessorCore:
type = HWLOC_OBJ_CORE;
break;
case RelationGroup:
default:
type = HWLOC_OBJ_GROUP;
break;
}
if (!hwloc_filter_check_keep_object_type(topology, type))
continue;
obj = hwloc_alloc_setup_object(topology, type, id);
obj->cpuset = hwloc_bitmap_alloc();
hwloc_debug("%s#%u mask %llx\n", hwloc_obj_type_string(type), id, (unsigned long long) procInfo[i].ProcessorMask);
/* ProcessorMask is a ULONG_PTR */
hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask);
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
switch (type) {
case HWLOC_OBJ_NUMANODE:
{
ULONGLONG avail;
obj->nodeset = hwloc_bitmap_alloc();
hwloc_bitmap_set(obj->nodeset, id);
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) {
obj->attr->numanode.local_memory = avail;
gotnumamemory++;
}
obj->attr->numanode.page_types_len = 2;
obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types));
memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types));
obj->attr->numanode.page_types_len = 1;
obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize;
#if HAVE_DECL__SC_LARGE_PAGESIZE
obj->attr->numanode.page_types_len++;
obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
#endif
break;
}
case HWLOC_OBJ_L1CACHE:
case HWLOC_OBJ_L2CACHE:
case HWLOC_OBJ_L3CACHE:
case HWLOC_OBJ_L4CACHE:
case HWLOC_OBJ_L5CACHE:
case HWLOC_OBJ_L1ICACHE:
case HWLOC_OBJ_L2ICACHE:
case HWLOC_OBJ_L3ICACHE:
obj->attr->cache.size = procInfo[i].Cache.Size;
obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ;
obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
obj->attr->cache.depth = procInfo[i].Cache.Level;
switch (procInfo->Cache.Type) {
case CacheUnified:
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
break;
case CacheData:
obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
break;
case CacheInstruction:
obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
break;
default:
hwloc_free_unlinked_object(obj);
continue;
}
break;
case HWLOC_OBJ_GROUP:
obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
break;
default:
break;
}
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation");
}
free(procInfo);
}
if (GetLogicalProcessorInformationExProc) { if (GetLogicalProcessorInformationExProc) {
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo; HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
unsigned id; unsigned id;
struct hwloc_obj *obj; struct hwloc_obj *obj;
hwloc_obj_type_t type; hwloc_obj_type_t type;
@@ -1036,19 +1063,32 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
unsigned efficiency_class = 0; unsigned efficiency_class = 0;
GROUP_AFFINITY *GroupMask; GROUP_AFFINITY *GroupMask;
/* Ignore unknown caches */ if (procInfo->Relationship == RelationCache) {
if (procInfo->Relationship == RelationCache if (!topology->want_some_cpu_caches)
&& procInfo->Cache.Type != CacheUnified /* TODO: check if RelationAll&~RelationCache works? */
&& procInfo->Cache.Type != CacheData continue;
&& procInfo->Cache.Type != CacheInstruction) if (procInfo->Cache.Type != CacheUnified
continue; && procInfo->Cache.Type != CacheData
&& procInfo->Cache.Type != CacheInstruction)
/* Ignore unknown caches */
continue;
}
id = HWLOC_UNKNOWN_INDEX; id = HWLOC_UNKNOWN_INDEX;
switch (procInfo->Relationship) { switch (procInfo->Relationship) {
case RelationNumaNodeEx: /* only used on input anyway */
case RelationNumaNode: case RelationNumaNode:
type = HWLOC_OBJ_NUMANODE; type = HWLOC_OBJ_NUMANODE;
num = 1; /* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
GroupMask = &procInfo->NumaNode.GroupMask; * and we may read GroupMasks[]. Older releases have GroupCount==0 and we must read GroupMask.
*/
if (procInfo->NumaNode.GroupCount) {
num = procInfo->NumaNode.GroupCount;
GroupMask = procInfo->NumaNode.GroupMasks;
} else {
num = 1;
GroupMask = &procInfo->NumaNode.GroupMask;
}
id = procInfo->NumaNode.NodeNumber; id = procInfo->NumaNode.NodeNumber;
gotnuma++; gotnuma++;
if (id > max_numanode_index) if (id > max_numanode_index)
@@ -1056,23 +1096,35 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
break; break;
case RelationProcessorPackage: case RelationProcessorPackage:
type = HWLOC_OBJ_PACKAGE; type = HWLOC_OBJ_PACKAGE;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationProcessorDie:
type = HWLOC_OBJ_DIE;
num = procInfo->Processor.GroupCount; num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask; GroupMask = procInfo->Processor.GroupMask;
break; break;
case RelationProcessorModule:
type = HWLOC_OBJ_GROUP;
num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask;
break;
case RelationCache: case RelationCache:
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1; type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
num = 1; /* GroupCount added approximately with NumaNode.GroupCount above */
GroupMask = &procInfo->Cache.GroupMask; if (procInfo->Cache.GroupCount) {
num = procInfo->Cache.GroupCount;
GroupMask = procInfo->Cache.GroupMasks;
} else {
num = 1;
GroupMask = &procInfo->Cache.GroupMask;
}
break; break;
case RelationProcessorCore: case RelationProcessorCore:
type = HWLOC_OBJ_CORE; type = HWLOC_OBJ_CORE;
num = procInfo->Processor.GroupCount; num = procInfo->Processor.GroupCount;
GroupMask = procInfo->Processor.GroupMask; GroupMask = procInfo->Processor.GroupMask;
if (has_efficiencyclass) efficiency_class = procInfo->Processor.EfficiencyClass;
/* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers,
* so just access it manually instead of trying to detect it.
*/
efficiency_class = * ((&procInfo->Processor.Flags) + 1);
break; break;
case RelationGroup: case RelationGroup:
/* So strange an interface... */ /* So strange an interface... */
@@ -1097,11 +1149,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
groups_pu_set = hwloc_bitmap_alloc(); groups_pu_set = hwloc_bitmap_alloc();
hwloc_bitmap_or(groups_pu_set, groups_pu_set, set); hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { /* Ignore processor groups unless requested and filtered-in */
if (keep_pgroup_objs && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id); obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
obj->cpuset = set; obj->cpuset = set;
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP; obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup"); hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx:ProcessorGroup");
} else } else
hwloc_bitmap_free(set); hwloc_bitmap_free(set);
} }
@@ -1177,6 +1230,19 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
continue; continue;
} }
break; break;
case HWLOC_OBJ_GROUP:
switch (procInfo->Relationship) {
case RelationGroup:
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
break;
case RelationProcessorModule:
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
obj->subtype = strdup("Module");
break;
default:
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
}
break;
default: default:
break; break;
} }
@@ -1328,11 +1394,13 @@ hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks,
static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused) static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused)
{ {
hwloc_win_get_function_ptrs(); hwloc_win_get_function_ptrs();
hwloc_win_get_processor_groups();
return 0; return 0;
} }
static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused) static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused)
{ {
hwloc_win_free_processor_groups();
} }
static struct hwloc_backend * static struct hwloc_backend *

View File

@@ -1,17 +1,21 @@
/* /*
* Copyright © 2010-2020 Inria. All rights reserved. * Copyright © 2010-2025 Inria. All rights reserved.
* Copyright © 2010-2013 Université Bordeaux * Copyright © 2010-2013 Université Bordeaux
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
* *
* *
* This backend is only used when the operating system does not export * This backend is mostly used when the operating system does not export
* the necessary hardware topology information to user-space applications. * the necessary hardware topology information to user-space applications.
* Currently, only the FreeBSD backend relies on this x86 backend. * Currently, FreeBSD and NetBSD only add PUs and then fallback to this
* backend for CPU/Cache discovery.
* *
* Other backends such as Linux have their own way to retrieve various * Other backends such as Linux have their own way to retrieve various
* pieces of hardware topology information from the operating system * pieces of hardware topology information from the operating system
* on various architectures, without having to use this x86-specific code. * on various architectures, without having to use this x86-specific code.
* But this backend is still used after them to annotate some objects with
* additional details (CPU info in Package, Inclusiveness in Caches).
* It may also be enabled manually to work-around bugs in native OS discovery.
*/ */
#include "private/autogen/config.h" #include "private/autogen/config.h"
@@ -35,6 +39,12 @@ struct hwloc_x86_backend_data_s {
int apicid_unique; int apicid_unique;
char *src_cpuiddump_path; char *src_cpuiddump_path;
int is_knl; int is_knl;
int is_hybrid;
int found_die_ids;
int found_complex_ids;
int found_unit_ids;
int found_module_ids;
int found_tile_ids;
}; };
/************************************ /************************************
@@ -77,7 +87,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
cpuiddump = malloc(sizeof(*cpuiddump)); cpuiddump = malloc(sizeof(*cpuiddump));
if (!cpuiddump) { if (!cpuiddump) {
fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx); fprintf(stderr, "hwloc/x86: Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
goto out; goto out;
} }
@@ -88,7 +98,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx); snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
file = fopen(filename, "r"); file = fopen(filename, "r");
if (!file) { if (!file) {
fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename); fprintf(stderr, "hwloc/x86: Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
goto out_with_filename; goto out_with_filename;
} }
@@ -97,7 +107,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
nr++; nr++;
cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry)); cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
if (!cpuiddump->entries) { if (!cpuiddump->entries) {
fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx); fprintf(stderr, "hwloc/x86: Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
goto out_with_file; goto out_with_file;
} }
@@ -153,7 +163,7 @@ cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *e
return; return;
} }
fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n", fprintf(stderr, "hwloc/x86: Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
*eax, *ebx, *ecx, *edx); *eax, *ebx, *ecx, *edx);
*eax = 0; *eax = 0;
*ebx = 0; *ebx = 0;
@@ -207,7 +217,8 @@ struct procinfo {
#define TILE 4 #define TILE 4
#define MODULE 5 #define MODULE 5
#define DIE 6 #define DIE 6
#define HWLOC_X86_PROCINFO_ID_NR 7 #define COMPLEX 7
#define HWLOC_X86_PROCINFO_ID_NR 8
unsigned ids[HWLOC_X86_PROCINFO_ID_NR]; unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
unsigned *otherids; unsigned *otherids;
unsigned levels; unsigned levels;
@@ -311,7 +322,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
/* the code below doesn't want any other cache yet */ /* the code below doesn't want any other cache yet */
assert(!infos->numcaches); assert(!infos->numcaches);
for (cachenum = 0; ; cachenum++) { for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
eax = 0x8000001d; eax = 0x8000001d;
ecx = cachenum; ecx = cachenum;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
@@ -322,7 +333,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache)); cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
if (cache) { if (cache) {
for (cachenum = 0; ; cachenum++) { for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
unsigned long linesize, linepart, ways, sets; unsigned long linesize, linepart, ways, sets;
eax = 0x8000001d; eax = 0x8000001d;
ecx = cachenum; ecx = cachenum;
@@ -375,7 +386,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
unsigned cachenum; unsigned cachenum;
struct cacheinfo *cache; struct cacheinfo *cache;
for (cachenum = 0; ; cachenum++) { for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
eax = 0x04; eax = 0x04;
ecx = cachenum; ecx = cachenum;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
@@ -397,7 +408,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
infos->cache = tmpcaches; infos->cache = tmpcaches;
cache = &infos->cache[oldnumcaches]; cache = &infos->cache[oldnumcaches];
for (cachenum = 0; ; cachenum++) { for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
unsigned long linesize, linepart, ways, sets; unsigned long linesize, linepart, ways, sets;
eax = 0x04; eax = 0x04;
ecx = cachenum; ecx = cachenum;
@@ -477,7 +488,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
} }
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */ /* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump) static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump)
{ {
unsigned apic_id, nodes_per_proc = 0; unsigned apic_id, nodes_per_proc = 0;
unsigned eax, ebx, ecx, edx; unsigned eax, ebx, ecx, edx;
@@ -486,7 +497,6 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
infos->apicid = apic_id = eax; infos->apicid = apic_id = eax;
if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) {
if (infos->cpufamilynumber == 0x16) { if (infos->cpufamilynumber == 0x16) {
/* ecx is reserved */ /* ecx is reserved */
infos->ids[NODE] = 0; infos->ids[NODE] = 0;
@@ -497,15 +507,16 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
nodes_per_proc = ((ecx >> 8) & 7) + 1; nodes_per_proc = ((ecx >> 8) & 7) + 1;
} }
if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2) if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
|| ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) { || ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)
|| (infos->cpufamilynumber == 0x19 && nodes_per_proc > 1)) {
hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc); hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
} }
}
if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */ if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
unsigned cores_per_unit; unsigned cores_per_unit;
/* coreid was obtained from read_amd_cores_legacy() earlier */ /* coreid was obtained from read_amd_cores_legacy() earlier */
infos->ids[UNIT] = ebx & 0xff; infos->ids[UNIT] = ebx & 0xff;
data->found_unit_ids = 1;
cores_per_unit = ((ebx >> 8) & 0xff) + 1; cores_per_unit = ((ebx >> 8) & 0xff) + 1;
hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]); hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor). /* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
@@ -520,19 +531,29 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
} }
} }
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */ /* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump) * or AMD core/thread or even complex/ccd from CPUID 0x0b or 0x80000026 (extended CPU topology)
*/
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump)
{ {
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id; unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */ unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
unsigned eax, ebx, ecx = 0, edx; unsigned eax, ebx, ecx = 0, edx;
int apic_packageshift = 0; int apic_packageshift = 0;
for (level = 0; ; level++) { for (level = 0; level<32 /* guard */; level++) {
ecx = level; ecx = level;
eax = leaf; eax = leaf;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
if (!eax && !ebx) /* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15])
* (if so, we also get 0 in eax/ebx for invalid subleaves). Zhaoxin implements this too.
* However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]).
*
* Linux kernel <= 6.8 used "invalid domain" for both Intel and AMD (in detect_extended_topology())
* but x86 discovery revamp in 6.9 now properly checks both Intel and AMD conditions (in topo_subleaf()).
* So let's assume we are allowed to break-out once one of the Intel+AMD conditions is met.
*/
if (!(ebx & 0xffff) || !(ecx & 0xff00))
break; break;
apic_packageshift = eax & 0x1f; apic_packageshift = eax & 0x1f;
} }
@@ -541,47 +562,68 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
infos->otherids = malloc(level * sizeof(*infos->otherids)); infos->otherids = malloc(level * sizeof(*infos->otherids));
if (infos->otherids) { if (infos->otherids) {
infos->levels = level; infos->levels = level;
for (level = 0; ; level++) { for (level = 0; level<32 /* guard */; level++) {
ecx = level; ecx = level;
eax = leaf; eax = leaf;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
if (!eax && !ebx) if (!(ebx & 0xffff) || !(ecx & 0xff00))
break; break;
apic_nextshift = eax & 0x1f; apic_nextshift = eax & 0x1f;
apic_number = ebx & 0xffff;
apic_type = (ecx & 0xff00) >> 8; apic_type = (ecx & 0xff00) >> 8;
apic_id = edx; apic_id = edx;
id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1); id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1);
hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id); hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n",
apic_id,
level,
apic_nextshift,
ebx & 0xffff /* number of threads in next level */,
apic_type,
id);
infos->apicid = apic_id; infos->apicid = apic_id;
infos->otherids[level] = UINT_MAX; infos->otherids[level] = UINT_MAX;
switch (apic_type) { switch (apic_type) {
case 1: case 1:
threadid = id; threadid = id;
/* apic_number is the actual number of threads per core */ break;
break; case 2:
case 2: infos->ids[CORE] = id;
infos->ids[CORE] = id; break;
/* apic_number is the actual number of threads per die */ case 3:
break; if (leaf == 0x80000026) {
case 3: data->found_complex_ids = 1;
infos->ids[MODULE] = id; infos->ids[COMPLEX] = id;
/* apic_number is the actual number of threads per tile */ } else {
break; data->found_module_ids = 1;
case 4: infos->ids[MODULE] = id;
infos->ids[TILE] = id; }
/* apic_number is the actual number of threads per die */ break;
break; case 4:
case 5: if (leaf == 0x80000026) {
infos->ids[DIE] = id; data->found_die_ids = 1;
/* apic_number is the actual number of threads per package */ infos->ids[DIE] = id;
break; } else {
default: data->found_tile_ids = 1;
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type); infos->ids[TILE] = id;
infos->otherids[level] = apic_id >> apic_shift; }
break; break;
} case 5:
apic_shift = apic_nextshift; if (leaf == 0x80000026) {
goto unknown_type;
} else {
data->found_die_ids = 1;
infos->ids[DIE] = id;
}
break;
case 6:
/* TODO: "DieGrp" on Intel */
/* fallthrough */
default:
unknown_type:
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
infos->otherids[level] = apic_id >> apic_shift;
break;
}
apic_shift = apic_nextshift;
} }
infos->apicid = apic_id; infos->apicid = apic_id;
infos->ids[PKG] = apic_id >> apic_shift; infos->ids[PKG] = apic_id >> apic_shift;
@@ -610,10 +652,19 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
eax = 0x01; eax = 0x01;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
infos->apicid = ebx >> 24; infos->apicid = ebx >> 24;
if (edx & (1 << 28)) if (edx & (1 << 28)) {
legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1); unsigned ebx_16_23 = (ebx >> 16) & 0xff;
else if (ebx_16_23) {
legacy_max_log_proc = 1 << hwloc_flsl(ebx_16_23 - 1);
} else {
hwloc_debug("HTT bit set in CPUID 0x01.edx, but legacy_max_proc = 0 in ebx, assuming legacy_max_log_proc = 1\n");
legacy_max_log_proc = 1;
}
} else {
hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n");
legacy_max_log_proc = 1; legacy_max_log_proc = 1;
}
hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc); hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc);
infos->ids[PKG] = infos->apicid / legacy_max_log_proc; infos->ids[PKG] = infos->apicid / legacy_max_log_proc;
legacy_log_proc_id = infos->apicid % legacy_max_log_proc; legacy_log_proc_id = infos->apicid % legacy_max_log_proc;
@@ -676,22 +727,34 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
unsigned max_nbcores; unsigned max_nbcores;
unsigned max_nbthreads; unsigned max_nbthreads;
unsigned threadid __hwloc_attribute_unused; unsigned threadid __hwloc_attribute_unused;
hwloc_debug("Trying to get core/thread IDs from 0x04...\n");
max_nbcores = ((eax >> 26) & 0x3f) + 1; max_nbcores = ((eax >> 26) & 0x3f) + 1;
max_nbthreads = legacy_max_log_proc / max_nbcores; hwloc_debug("found %u cores max\n", max_nbcores);
hwloc_debug("thus %u threads\n", max_nbthreads); /* some VMs (e.g. issue#525) don't report valid information, check things before dividing by 0. */
threadid = legacy_log_proc_id % max_nbthreads; if (!max_nbcores) {
infos->ids[CORE] = legacy_log_proc_id / max_nbthreads; hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of cores\n");
hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]); } else {
max_nbthreads = legacy_max_log_proc / max_nbcores;
hwloc_debug("found %u threads max\n", max_nbthreads);
if (!max_nbthreads) {
hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of threads\n");
} else {
threadid = legacy_log_proc_id % max_nbthreads;
infos->ids[CORE] = legacy_log_proc_id / max_nbthreads;
hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
}
}
} }
} }
if (highest_cpuid >= 0x1a && has_hybrid(features)) { if (highest_cpuid >= 0x1a && has_hybrid(features)) {
/* Get hybrid cpu information from cpuid 0x1a */ /* Get hybrid cpu information from cpuid 0x1a on Intel */
eax = 0x1a; eax = 0x1a;
ecx = 0; ecx = 0;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
infos->hybridcoretype = eax >> 24; infos->hybridcoretype = eax >> 24;
infos->hybridnativemodel = eax & 0xffffff; infos->hybridnativemodel = eax & 0xffffff;
data->is_hybrid = 1;
} }
/********************************************************************************* /*********************************************************************************
@@ -713,23 +776,30 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
* *
* Only needed when x2apic supported if NUMA nodes are needed. * Only needed when x2apic supported if NUMA nodes are needed.
*/ */
read_amd_cores_topoext(infos, flags, src_cpuiddump); read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
} }
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) { if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
* (AMD Extended CPU Topology)
*/
read_extended_topo(data, infos, 0x80000026, cpuid_type, src_cpuiddump);
} else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x1f) {
/* Get package/die/module/tile/core/thread information from cpuid 0x1f /* Get package/die/module/tile/core/thread information from cpuid 0x1f
* (Intel v2 Extended Topology Enumeration) * (Intel v2 Extended Topology Enumeration)
*/ */
read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump); read_extended_topo(data, infos, 0x1f, cpuid_type, src_cpuiddump);
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin) } else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
&& highest_cpuid >= 0x0b && has_x2apic(features)) { && highest_cpuid >= 0x0b && has_x2apic(features)) {
/* Get package/core/thread information from cpuid 0x0b /* Get package/core/thread information from cpuid 0x0b
* (Intel v1 Extended Topology Enumeration) * (Intel v1 Extended Topology Enumeration)
*/ */
read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump); read_extended_topo(data, infos, 0x0b, cpuid_type, src_cpuiddump);
} }
if (backend->topology->want_some_cpu_caches) {
/************************************** /**************************************
* Get caches from CPU-specific leaves * Get caches from CPU-specific leaves
*/ */
@@ -772,13 +842,19 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
} else if (cpuid_type == amd) { } else if (cpuid_type == amd) {
/* AMD quirks */ /* AMD quirks */
if (infos->cpufamilynumber == 0x17 if (infos->cpufamilynumber >= 0x17 && cache->level == 3) {
&& cache->level == 3 && cache->nbthreads_sharing == 6) { /* AMD family 0x19 always shares L3 between 16 APIC ids (8 HT cores).
/* AMD family 0x17 always shares L3 between 8 APIC ids, * while Family 0x17 shares between 8 APIC ids (4 HT cores).
* even when only 6 APIC ids are enabled and reported in nbthreads_sharing * But many models have less APIC ids enabled and reported in nbthreads_sharing.
* (on 24-core CPUs). * It means we must round-up nbthreads_sharing to the nearest power of 2
* before computing cacheid.
*/ */
cache->cacheid = infos->apicid / 8; unsigned nbapics_sharing = cache->nbthreads_sharing;
if (nbapics_sharing & (nbapics_sharing-1))
/* not a power of two, round-up */
nbapics_sharing = 1U<<(1+hwloc_ffsl(nbapics_sharing));
cache->cacheid = infos->apicid / nbapics_sharing;
} else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9 } else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
&& cache->level == 3 && cache->level == 3
@@ -804,7 +880,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
} else if (infos->cpufamilynumber == 0x15 } else if (infos->cpufamilynumber == 0x15
&& (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */) && (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
&& cache->level == 3 && cache->nbthreads_sharing == 6) { && cache->level == 3 && cache->nbthreads_sharing == 6) {
/* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours below, /* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
* but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here. * but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
*/ */
cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */ cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
@@ -821,6 +897,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
} }
} }
} }
}
if (hwloc_bitmap_isset(data->apicid_set, infos->apicid)) if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
data->apicid_unique = 0; data->apicid_unique = 0;
@@ -908,6 +985,16 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
int gotnuma = 0; int gotnuma = 0;
int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL); int fulldiscovery = (flags & HWLOC_X86_DISC_FLAG_FULL);
#ifdef HWLOC_DEBUG
hwloc_debug("\nSummary of x86 CPUID topology:\n");
for(i=0; i<nbprocs; i++) {
hwloc_debug("PU %u present=%u apicid=%u on PKG %d CORE %d DIE %d NODE %d\n",
i, infos[i].present, infos[i].apicid,
infos[i].ids[PKG], infos[i].ids[CORE], infos[i].ids[DIE], infos[i].ids[NODE]);
}
hwloc_debug("\n");
#endif
for (i = 0; i < nbprocs; i++) for (i = 0; i < nbprocs; i++)
if (infos[i].present) { if (infos[i].present) {
hwloc_bitmap_set(complete_cpuset, i); hwloc_bitmap_set(complete_cpuset, i);
@@ -1012,21 +1099,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) { if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
if (fulldiscovery) { if (fulldiscovery) {
/* Look for AMD Compute units inside packages */ if (data->found_unit_ids) {
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); /* Look for AMD Complex inside packages */
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
UNIT, "Compute Unit", hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0); COMPLEX, "Complex",
/* Look for Intel Modules inside packages */ HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); }
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, if (data->found_unit_ids) {
MODULE, "Module", /* Look for AMD Compute units inside packages */
HWLOC_GROUP_KIND_INTEL_MODULE, 0); hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
/* Look for Intel Tiles inside packages */ hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset); UNIT, "Compute Unit",
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset, HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
TILE, "Tile", }
HWLOC_GROUP_KIND_INTEL_TILE, 0); if (data->found_module_ids) {
/* Look for Intel Modules inside packages */
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
MODULE, "Module",
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
}
if (data->found_tile_ids) {
/* Look for Intel Tiles inside packages */
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
TILE, "Tile",
HWLOC_GROUP_KIND_INTEL_TILE, 0);
}
/* Look for unknown objects */ /* Look for unknown objects */
if (infos[one].otherids) { if (infos[one].otherids) {
@@ -1060,7 +1160,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
} }
} }
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) { if (data->found_die_ids
&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
/* Look for Intel Dies inside packages */ /* Look for Intel Dies inside packages */
if (fulldiscovery) { if (fulldiscovery) {
hwloc_bitmap_t die_cpuset; hwloc_bitmap_t die_cpuset;
@@ -1218,6 +1319,18 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
} }
} }
cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX); cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX);
/* We don't specify the os_index of caches because we want to be
* 100% sure they are identical to what the Linux kernel reports
* (so that things like resctrl work).
* However, vendor/model-specific quirks in the x86 code above
* make this difficult.
*
* Caveat: if the x86 backend is used on Linux to avoid kernel bugs,
* IDs won't be available to resctrl users. But resctrl heavily
* relies on the kernel x86 discovery being non-buggy anyway.
*
* TODO: make this optional? or only disable it on Linux?
*/
cache->attr->cache.depth = level; cache->attr->cache.depth = level;
cache->attr->cache.size = infos[i].cache[l].size; cache->attr->cache.size = infos[i].cache[l].size;
cache->attr->cache.linesize = infos[i].cache[l].linesize; cache->attr->cache.linesize = infos[i].cache[l].linesize;
@@ -1247,7 +1360,8 @@ static int
look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags, look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags,
unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type, unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags), int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags)) int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags),
hwloc_bitmap_t restrict_set)
{ {
struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_x86_backend_data_s *data = backend->private_data;
struct hwloc_topology *topology = backend->topology; struct hwloc_topology *topology = backend->topology;
@@ -1267,6 +1381,12 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
for (i = 0; i < nbprocs; i++) { for (i = 0; i < nbprocs; i++) {
struct cpuiddump *src_cpuiddump = NULL; struct cpuiddump *src_cpuiddump = NULL;
if (restrict_set && !hwloc_bitmap_isset(restrict_set, i)) {
/* skip this CPU outside of the binding mask */
continue;
}
if (data->src_cpuiddump_path) { if (data->src_cpuiddump_path) {
src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i); src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i);
if (!src_cpuiddump) if (!src_cpuiddump)
@@ -1296,40 +1416,45 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
if (data->apicid_unique) { if (data->apicid_unique) {
summarize(backend, infos, flags); summarize(backend, infos, flags);
if (has_hybrid(features)) { if (data->is_hybrid
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
/* use hybrid info for cpukinds */ /* use hybrid info for cpukinds */
hwloc_bitmap_t atomset = hwloc_bitmap_alloc(); if (cpuid_type == intel) {
hwloc_bitmap_t coreset = hwloc_bitmap_alloc(); /* Hybrid Intel */
for(i=0; i<nbprocs; i++) { hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
if (infos[i].hybridcoretype == 0x20) hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
hwloc_bitmap_set(atomset, i); for(i=0; i<nbprocs; i++) {
else if (infos[i].hybridcoretype == 0x40) if (infos[i].hybridcoretype == 0x20)
hwloc_bitmap_set(coreset, i); hwloc_bitmap_set(atomset, i);
} else if (infos[i].hybridcoretype == 0x40)
/* register IntelAtom set if any */ hwloc_bitmap_set(coreset, i);
if (!hwloc_bitmap_iszero(atomset)) { }
struct hwloc_info_s infoattr; /* register IntelAtom set if any */
infoattr.name = (char *) "CoreType"; if (!hwloc_bitmap_iszero(atomset)) {
infoattr.value = (char *) "IntelAtom"; struct hwloc_info_s infoattr;
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0); infoattr.name = (char *) "CoreType";
/* the cpuset is given to the callee */ infoattr.value = (char *) "IntelAtom";
} else { hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
hwloc_bitmap_free(atomset); /* the cpuset is given to the callee */
} } else {
/* register IntelCore set if any */ hwloc_bitmap_free(atomset);
if (!hwloc_bitmap_iszero(coreset)) { }
struct hwloc_info_s infoattr; /* register IntelCore set if any */
infoattr.name = (char *) "CoreType"; if (!hwloc_bitmap_iszero(coreset)) {
infoattr.value = (char *) "IntelCore"; struct hwloc_info_s infoattr;
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0); infoattr.name = (char *) "CoreType";
/* the cpuset is given to the callee */ infoattr.value = (char *) "IntelCore";
} else { hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
hwloc_bitmap_free(coreset); /* the cpuset is given to the callee */
} else {
hwloc_bitmap_free(coreset);
}
} }
} }
} else {
hwloc_debug("x86 APIC IDs aren't unique, x86 discovery ignored.\n");
/* do nothing and return success, so that the caller does nothing either */
} }
/* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
return 0; return 0;
} }
@@ -1400,12 +1525,21 @@ static
int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags) int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
{ {
struct hwloc_x86_backend_data_s *data = backend->private_data; struct hwloc_x86_backend_data_s *data = backend->private_data;
struct hwloc_topology *topology = backend->topology;
unsigned nbprocs = data->nbprocs; unsigned nbprocs = data->nbprocs;
unsigned eax, ebx, ecx = 0, edx; unsigned eax, ebx, ecx = 0, edx;
unsigned i; unsigned i;
unsigned highest_cpuid; unsigned highest_cpuid;
unsigned highest_ext_cpuid; unsigned highest_ext_cpuid;
/* This stores cpuid features with the same indexing as Linux */ /* This stores cpuid features with the same indexing as Linux:
* [0] = 0x1 edx
* [1] = 0x80000001 edx
* [4] = 0x1 ecx
* [6] = 0x80000001 ecx
* [9] = 0x7/0 ebx
* [16] = 0x7/0 ecx
* [18] = 0x7/0 edx
*/
unsigned features[19] = { 0 }; unsigned features[19] = { 0 };
struct procinfo *infos = NULL; struct procinfo *infos = NULL;
enum cpuid_type cpuid_type = unknown; enum cpuid_type cpuid_type = unknown;
@@ -1415,9 +1549,21 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused; struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL; int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL;
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL; int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL;
hwloc_bitmap_t restrict_set = NULL;
struct cpuiddump *src_cpuiddump = NULL; struct cpuiddump *src_cpuiddump = NULL;
int ret = -1; int ret = -1;
/* check if binding works */
memset(&hooks, 0, sizeof(hooks));
support.membind = &memsupport;
/* We could just copy the main hooks (except in some corner cases),
* but the current overhead is negligible, so just always reget them.
*/
hwloc_set_native_binding_hooks(&hooks, &support);
/* in theory, those are only needed if !data->src_cpuiddump_path || HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_BINDING
* but that's the vast majority of cases anyway, and the overhead is very small.
*/
if (data->src_cpuiddump_path) { if (data->src_cpuiddump_path) {
/* Just read cpuid from the dump (implies !topology->is_thissystem by default) */ /* Just read cpuid from the dump (implies !topology->is_thissystem by default) */
src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0); src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0);
@@ -1430,13 +1576,6 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
* we may still force use this backend when debugging with !thissystem. * we may still force use this backend when debugging with !thissystem.
*/ */
/* check if binding works */
memset(&hooks, 0, sizeof(hooks));
support.membind = &memsupport;
/* We could just copy the main hooks (except in some corner cases),
* but the current overhead is negligible, so just always reget them.
*/
hwloc_set_native_binding_hooks(&hooks, &support);
if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) { if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
get_cpubind = hooks.get_thisthread_cpubind; get_cpubind = hooks.get_thisthread_cpubind;
set_cpubind = hooks.set_thisthread_cpubind; set_cpubind = hooks.set_thisthread_cpubind;
@@ -1456,6 +1595,20 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
} }
} }
if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) {
restrict_set = hwloc_bitmap_alloc();
if (!restrict_set)
goto out;
if (hooks.get_thisproc_cpubind)
hooks.get_thisproc_cpubind(topology, restrict_set, 0);
else if (hooks.get_thisthread_cpubind)
hooks.get_thisthread_cpubind(topology, restrict_set, 0);
if (hwloc_bitmap_iszero(restrict_set)) {
hwloc_bitmap_free(restrict_set);
restrict_set = NULL;
}
}
if (!src_cpuiddump && !hwloc_have_x86_cpuid()) if (!src_cpuiddump && !hwloc_have_x86_cpuid())
goto out; goto out;
@@ -1506,6 +1659,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
ecx = 0; ecx = 0;
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump); cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
features[9] = ebx; features[9] = ebx;
features[16] = ecx;
features[18] = edx; features[18] = edx;
} }
@@ -1520,7 +1674,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
ret = look_procs(backend, infos, flags, ret = look_procs(backend, infos, flags,
highest_cpuid, highest_ext_cpuid, features, cpuid_type, highest_cpuid, highest_ext_cpuid, features, cpuid_type,
get_cpubind, set_cpubind); get_cpubind, set_cpubind, restrict_set);
if (!ret) if (!ret)
/* success, we're done */ /* success, we're done */
goto out_with_os_state; goto out_with_os_state;
@@ -1545,6 +1699,7 @@ out_with_infos:
} }
out: out:
hwloc_bitmap_free(restrict_set);
if (src_cpuiddump) if (src_cpuiddump)
cpuiddump_free(src_cpuiddump); cpuiddump_free(src_cpuiddump);
return ret; return ret;
@@ -1561,6 +1716,11 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU); assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
if (topology->flags & HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING) {
/* TODO: Things would work if there's a single PU, no need to rebind */
return 0;
}
if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) { if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) {
flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES; flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES;
} }
@@ -1587,7 +1747,8 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
} }
if (topology->levels[0][0]->cpuset) { if (topology->levels[0][0]->cpuset) {
/* somebody else discovered things */ /* somebody else discovered things, reconnect levels so that we can look at them */
hwloc__reconnect(topology, 0);
if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) { if (topology->nb_levels == 2 && topology->level_nbobjects[1] == data->nbprocs) {
/* only PUs were discovered, as much as we would, complete the topology with everything else */ /* only PUs were discovered, as much as we would, complete the topology with everything else */
alreadypus = 1; alreadypus = 1;
@@ -1595,7 +1756,6 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
} }
/* several object types were added, we can't easily complete, just do partial discovery */ /* several object types were added, we can't easily complete, just do partial discovery */
hwloc_topology_reconnect(topology, 0);
ret = hwloc_look_x86(backend, flags); ret = hwloc_look_x86(backend, flags);
if (ret) if (ret)
hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86"); hwloc_obj_add_info(topology->levels[0][0], "Backend", "x86");
@@ -1651,17 +1811,17 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path); sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
file = fopen(path, "r"); file = fopen(path, "r");
if (!file) { if (!file) {
fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path); fprintf(stderr, "hwloc/x86: Couldn't open dumped cpuid summary %s\n", path);
goto out_with_path; goto out_with_path;
} }
if (!fgets(line, sizeof(line), file)) { if (!fgets(line, sizeof(line), file)) {
fprintf(stderr, "Found read dumped cpuid summary in %s\n", path); fprintf(stderr, "hwloc/x86: Found read dumped cpuid summary in %s\n", path);
fclose(file); fclose(file);
goto out_with_path; goto out_with_path;
} }
fclose(file); fclose(file);
if (strcmp(line, "Architecture: x86\n")) { if (strncmp(line, "Architecture: x86", 17)) {
fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line); fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
goto out_with_path; goto out_with_path;
} }
free(path); free(path);
@@ -1673,19 +1833,19 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
if (!*end) if (!*end)
hwloc_bitmap_set(set, idx); hwloc_bitmap_set(set, idx);
else else
fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n", fprintf(stderr, "hwloc/x86: Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
dirent->d_name, src_cpuiddump_path); dirent->d_name, src_cpuiddump_path);
} }
} }
closedir(dir); closedir(dir);
if (hwloc_bitmap_iszero(set)) { if (hwloc_bitmap_iszero(set)) {
fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n", fprintf(stderr, "hwloc/x86: Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
src_cpuiddump_path); src_cpuiddump_path);
return -1; return -1;
} else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) { } else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
/* The x86 backends enforces contigous set of PUs starting at 0 so far */ /* The x86 backends enforces contigous set of PUs starting at 0 so far */
fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n", fprintf(stderr, "hwloc/x86: Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
src_cpuiddump_path); src_cpuiddump_path);
return -1; return -1;
} }
@@ -1737,9 +1897,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
/* default values */ /* default values */
data->is_knl = 0; data->is_knl = 0;
data->is_hybrid = 0;
data->apicid_set = hwloc_bitmap_alloc(); data->apicid_set = hwloc_bitmap_alloc();
data->apicid_unique = 1; data->apicid_unique = 1;
data->src_cpuiddump_path = NULL; data->src_cpuiddump_path = NULL;
data->found_die_ids = 0;
data->found_complex_ids = 0;
data->found_unit_ids = 0;
data->found_module_ids = 0;
data->found_tile_ids = 0;
src_cpuiddump_path = getenv("HWLOC_CPUID_PATH"); src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
if (src_cpuiddump_path) { if (src_cpuiddump_path) {
@@ -1750,7 +1916,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */ assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */
data->nbprocs = hwloc_bitmap_weight(set); data->nbprocs = hwloc_bitmap_weight(set);
} else { } else {
fprintf(stderr, "Ignoring dumped cpuid directory.\n"); fprintf(stderr, "hwloc/x86: Ignoring dumped cpuid directory.\n");
} }
hwloc_bitmap_free(set); hwloc_bitmap_free(set);
} }

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2024 Inria. All rights reserved.
* Copyright © 2009-2011 Université Bordeaux * Copyright © 2009-2011 Université Bordeaux
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -41,7 +41,7 @@ typedef struct hwloc__nolibxml_import_state_data_s {
static char * static char *
hwloc__nolibxml_import_ignore_spaces(char *buffer) hwloc__nolibxml_import_ignore_spaces(char *buffer)
{ {
return buffer + strspn(buffer, " \t\n"); return buffer + strspn(buffer, " \t\n\r");
} }
static int static int
@@ -411,12 +411,12 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
bdata->data = nbdata; bdata->data = nbdata;
if (xmlbuffer) { if (xmlbuffer) {
nbdata->buffer = malloc(xmlbuflen+1); nbdata->buffer = malloc(xmlbuflen);
if (!nbdata->buffer) if (!nbdata->buffer)
goto out_with_nbdata; goto out_with_nbdata;
nbdata->buflen = xmlbuflen+1; nbdata->buflen = xmlbuflen;
memcpy(nbdata->buffer, xmlbuffer, xmlbuflen); memcpy(nbdata->buffer, xmlbuffer, xmlbuflen);
nbdata->buffer[xmlbuflen] = '\0'; nbdata->buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
} else { } else {
int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen); int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen);
@@ -453,8 +453,9 @@ hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state,
buffer = malloc(xmlbuflen); buffer = malloc(xmlbuflen);
if (!buffer) if (!buffer)
goto out; goto out;
memcpy(buffer, xmlbuffer, xmlbuflen);
buflen = xmlbuflen; buflen = xmlbuflen;
memcpy(buffer, xmlbuffer, xmlbuflen);
buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
} else { } else {
ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen); ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen);

View File

@@ -1,6 +1,6 @@
/* /*
* Copyright © 2009 CNRS * Copyright © 2009 CNRS
* Copyright © 2009-2020 Inria. All rights reserved. * Copyright © 2009-2025 Inria. All rights reserved.
* Copyright © 2009-2011, 2020 Université Bordeaux * Copyright © 2009-2011, 2020 Université Bordeaux
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved. * Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
* See COPYING in top-level directory. * See COPYING in top-level directory.
@@ -123,6 +123,17 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix); fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix);
if (obj->gp_index >= topology->next_gp_index) if (obj->gp_index >= topology->next_gp_index)
topology->next_gp_index = obj->gp_index + 1; topology->next_gp_index = obj->gp_index + 1;
} else if (!strcmp(name, "id")) { /* forward compat */
if (!strncmp(value, "obj", 3)) {
obj->gp_index = strtoull(value+3, NULL, 10);
if (!obj->gp_index && hwloc__xml_verbose())
fprintf(stderr, "%s: unexpected zero id, topology may be invalid\n", state->global->msgprefix);
if (obj->gp_index >= topology->next_gp_index)
topology->next_gp_index = obj->gp_index + 1;
} else {
if (hwloc__xml_verbose())
fprintf(stderr, "%s: unexpected id `%s' not-starting with `obj', ignoring\n", state->global->msgprefix, value);
}
} else if (!strcmp(name, "cpuset")) { } else if (!strcmp(name, "cpuset")) {
if (!obj->cpuset) if (!obj->cpuset)
obj->cpuset = hwloc_bitmap_alloc(); obj->cpuset = hwloc_bitmap_alloc();
@@ -192,8 +203,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|| lvalue == HWLOC_OBJ_CACHE_INSTRUCTION) || lvalue == HWLOC_OBJ_CACHE_INSTRUCTION)
obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue; obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue;
else else
fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n", if (hwloc__xml_verbose())
state->global->msgprefix, lvalue); fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n",
state->global->msgprefix, lvalue);
} else if (hwloc__xml_verbose()) } else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n", fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n",
state->global->msgprefix); state->global->msgprefix);
@@ -242,7 +254,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
else if (!strcmp(name, "dont_merge")) { else if (!strcmp(name, "dont_merge")) {
unsigned long lvalue = strtoul(value, NULL, 10); unsigned long lvalue = strtoul(value, NULL, 10);
if (obj->type == HWLOC_OBJ_GROUP) if (obj->type == HWLOC_OBJ_GROUP)
obj->attr->group.dont_merge = lvalue; obj->attr->group.dont_merge = (unsigned char) lvalue;
else if (hwloc__xml_verbose()) else if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n", fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n",
state->global->msgprefix); state->global->msgprefix);
@@ -262,8 +274,8 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
#ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
} else if (domain > 0xffff) { } else if (domain > 0xffff) {
static int warned = 0; static int warned = 0;
if (!warned && !hwloc_hide_errors()) if (!warned && HWLOC_SHOW_ALL_ERRORS())
fprintf(stderr, "Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); fprintf(stderr, "hwloc/xml: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
warned = 1; warned = 1;
*ignore = 1; *ignore = 1;
#endif #endif
@@ -337,6 +349,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
} else { } else {
obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type; obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type;
obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type; obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type;
/* FIXME verify that upstream/downstream type is valid */
}; };
break; break;
} }
@@ -361,12 +374,13 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
#ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN #ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
} else if (domain > 0xffff) { } else if (domain > 0xffff) {
static int warned = 0; static int warned = 0;
if (!warned && !hwloc_hide_errors()) if (!warned && HWLOC_SHOW_ALL_ERRORS())
fprintf(stderr, "Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n"); fprintf(stderr, "hwloc/xml: Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
warned = 1; warned = 1;
*ignore = 1; *ignore = 1;
#endif #endif
} else { } else {
/* FIXME verify that downstream type vs pci info are valid */
obj->attr->bridge.downstream.pci.domain = domain; obj->attr->bridge.downstream.pci.domain = domain;
obj->attr->bridge.downstream.pci.secondary_bus = secbus; obj->attr->bridge.downstream.pci.secondary_bus = secbus;
obj->attr->bridge.downstream.pci.subordinate_bus = subbus; obj->attr->bridge.downstream.pci.subordinate_bus = subbus;
@@ -401,6 +415,20 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
} }
} }
else if (!strcmp(name, "numanode_type")) {
switch (obj->type) {
case HWLOC_OBJ_NUMANODE: {
/* ignored for now, here for possible forward compat */
break;
}
default:
if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring numanode_type attribute for non-NUMA object\n",
state->global->msgprefix);
break;
}
}
else if (data->version_major < 2) { else if (data->version_major < 2) {
/************************ /************************
* deprecated from 1.x * deprecated from 1.x
@@ -548,7 +576,13 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, s
char *attrname, *attrvalue; char *attrname, *attrvalue;
if (state->global->next_attr(state, &attrname, &attrvalue) < 0) if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
break; break;
if (!strcmp(attrname, "size")) if (!strcmp(attrname, "info")) {
char *infoname, *infovalue;
int ret = hwloc___xml_import_info(&infoname, &infovalue, state);
if (ret < 0)
return -1;
/* ignored */
} else if (!strcmp(attrname, "size"))
size = strtoull(attrvalue, NULL, 10); size = strtoull(attrvalue, NULL, 10);
else if (!strcmp(attrname, "count")) else if (!strcmp(attrname, "count"))
count = strtoull(attrvalue, NULL, 10); count = strtoull(attrvalue, NULL, 10);
@@ -852,14 +886,23 @@ hwloc__xml_import_object(hwloc_topology_t topology,
/* deal with possible future type */ /* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP; obj->type = HWLOC_OBJ_GROUP;
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE; obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
} else if (!strcasecmp(attrvalue, "MemCache")) { } else if (!strcasecmp(attrvalue, "Cluster")) {
/* deal with possible future type */
obj->type = HWLOC_OBJ_GROUP;
obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER;
}
#if 0
/* reenable if there's ever a future type that should be ignored without being an error */
else if (!strcasecmp(attrvalue, "MemCache")) {
/* ignore possible future type */ /* ignore possible future type */
obj->type = _HWLOC_OBJ_FUTURE; obj->type = _HWLOC_OBJ_FUTURE;
ignored = 1; ignored = 1;
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: %s object not-supported, will be ignored\n", fprintf(stderr, "%s: %s object not-supported, will be ignored\n",
state->global->msgprefix, attrvalue); state->global->msgprefix, attrvalue);
} else { }
#endif
else {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: unrecognized object type string %s\n", fprintf(stderr, "%s: unrecognized object type string %s\n",
state->global->msgprefix, attrvalue); state->global->msgprefix, attrvalue);
@@ -934,22 +977,22 @@ hwloc__xml_import_object(hwloc_topology_t topology,
if (hwloc__obj_type_is_normal(obj->type)) { if (hwloc__obj_type_is_normal(obj->type)) {
if (!hwloc__obj_type_is_normal(parent->type)) { if (!hwloc__obj_type_is_normal(parent->type)) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "normal object %s cannot be child of non-normal parent %s\n", fprintf(stderr, "%s: normal object %s cannot be child of non-normal parent %s\n",
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
goto error_with_object; goto error_with_object;
} }
} else if (hwloc__obj_type_is_memory(obj->type)) { } else if (hwloc__obj_type_is_memory(obj->type)) {
if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) { if (hwloc__obj_type_is_io(parent->type) || HWLOC_OBJ_MISC == parent->type) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "Memory object %s cannot be child of non-normal-or-memory parent %s\n", fprintf(stderr, "%s: Memory object %s cannot be child of non-normal-or-memory parent %s\n",
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
goto error_with_object; goto error_with_object;
} }
} else if (hwloc__obj_type_is_io(obj->type)) { } else if (hwloc__obj_type_is_io(obj->type)) {
if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) { if (hwloc__obj_type_is_memory(parent->type) || HWLOC_OBJ_MISC == parent->type) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "I/O object %s cannot be child of non-normal-or-I/O parent %s\n", fprintf(stderr, "%s: I/O object %s cannot be child of non-normal-or-I/O parent %s\n",
hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type)); state->global->msgprefix, hwloc_obj_type_string(obj->type), hwloc_obj_type_string(parent->type));
goto error_with_object; goto error_with_object;
} }
} }
@@ -1146,6 +1189,48 @@ hwloc__xml_import_object(hwloc_topology_t topology,
data->last_numanode = obj; data->last_numanode = obj;
} }
/* 3.0 forward compatibility */
if (data->version_major >= 3 && obj->type == HWLOC_OBJ_OS_DEVICE) {
/* osdev.type changed into bitmak in 3.0 */
if (obj->attr->osdev.type & 3 /* STORAGE|MEMORY for BLOCK */) {
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_BLOCK;
} else if (obj->attr->osdev.type & 8 /* COPROC for COPROC and rsmi/nvml GPUs */) {
if (obj->subtype && (!strcmp(obj->subtype, "RSMI") || !strcmp(obj->subtype, "NVML")))
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
else
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
} else if (obj->attr->osdev.type & 4 /* GPU for non-COPROC GPUs */) {
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
} else if (obj->attr->osdev.type & 32 /* OFED */) {
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
} else if (obj->attr->osdev.type & 16 /* NET for NET and BXI v2-fake-OFED */) {
if (obj->subtype && !strcmp(obj->subtype, "BXI"))
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
else
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_NETWORK;
} else if (obj->attr->osdev.type & 64 /* DMA */) {
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_DMA;
} else { /* none or unknown */
obj->attr->osdev.type = (hwloc_obj_osdev_type_t) -1;
}
/* Backend info only in root */
if (obj->subtype && !hwloc_obj_get_info_by_name(obj, "Backend")) {
if (!strcmp(obj->subtype, "CUDA")) {
hwloc_obj_add_info(obj, "Backend", "CUDA");
} else if (!strcmp(obj->subtype, "NVML")) {
hwloc_obj_add_info(obj, "Backend", "NVML");
} else if (!strcmp(obj->subtype, "OpenCL")) {
hwloc_obj_add_info(obj, "Backend", "OpenCL");
} else if (!strcmp(obj->subtype, "RSMI")) {
hwloc_obj_add_info(obj, "Backend", "RSMI");
} else if (!strcmp(obj->subtype, "LevelZero")) {
hwloc_obj_add_info(obj, "Backend", "LevelZero");
} else if (!strcmp(obj->subtype, "Display")) {
hwloc_obj_add_info(obj, "Backend", "GL");
}
}
}
if (!hwloc_filter_check_keep_object(topology, obj)) { if (!hwloc_filter_check_keep_object(topology, obj)) {
/* Ignore this object instead of inserting it. /* Ignore this object instead of inserting it.
* *
@@ -1232,7 +1317,7 @@ hwloc__xml_import_object(hwloc_topology_t topology,
/* next should be before cur */ /* next should be before cur */
if (!childrengotignored) { if (!childrengotignored) {
static int reported = 0; static int reported = 0;
if (!reported && !hwloc_hide_errors()) { if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) {
hwloc__xml_import_report_outoforder(topology, next, cur); hwloc__xml_import_report_outoforder(topology, next, cur);
reported = 1; reported = 1;
} }
@@ -1282,7 +1367,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology,
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
#endif #endif
@@ -1316,6 +1401,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology,
else DO(membind,firsttouch_membind); else DO(membind,firsttouch_membind);
else DO(membind,bind_membind); else DO(membind,bind_membind);
else DO(membind,interleave_membind); else DO(membind,interleave_membind);
else DO(membind,weighted_interleave_membind);
else DO(membind,nexttouch_membind); else DO(membind,nexttouch_membind);
else DO(membind,migrate_membind); else DO(membind,migrate_membind);
else DO(membind,get_area_memlocation); else DO(membind,get_area_memlocation);
@@ -1374,6 +1460,10 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
} }
else if (!strcmp(attrname, "kind")) { else if (!strcmp(attrname, "kind")) {
kind = strtoul(attrvalue, NULL, 10); kind = strtoul(attrvalue, NULL, 10);
/* forward compat with "HOPS" kind in v3 */
if (kind & (1UL<<5))
/* hops becomes latency */
kind = (kind & ~(1UL<<5)) | HWLOC_DISTANCES_KIND_MEANS_LATENCY;
} }
else if (!strcmp(attrname, "name")) { else if (!strcmp(attrname, "name")) {
name = attrvalue; name = attrvalue;
@@ -1419,7 +1509,14 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
if (ret <= 0) if (ret <= 0)
break; break;
if (!strcmp(tag, "indexes")) if (!strcmp(tag, "info")) {
char *infoname, *infovalue;
ret = hwloc___xml_import_info(&infoname, &infovalue, state);
if (ret < 0)
goto out_with_arrays;
/* ignored */
continue;
} else if (!strcmp(tag, "indexes"))
is_index = 1; is_index = 1;
else if (!strcmp(tag, "u64values")) else if (!strcmp(tag, "u64values"))
is_u64values = 1; is_u64values = 1;
@@ -1462,6 +1559,9 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
unsigned long long u; unsigned long long u;
if (heterotypes) { if (heterotypes) {
hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE; hwloc_obj_type_t t = HWLOC_OBJ_TYPE_NONE;
if (!*tmp)
/* reached the end of this indexes attribute */
break;
if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) { if (hwloc_type_sscanf(tmp, &t, NULL, 0) < 0) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n", fprintf(stderr, "%s: %s with unrecognized heterogeneous type %s\n",
@@ -1562,7 +1662,10 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
} }
} }
hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0); if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)
goto out_ignore;
hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0 /* assume grouping was applied when this matrix was discovered before exporting to XML */);
/* prevent freeing below */ /* prevent freeing below */
indexes = NULL; indexes = NULL;
@@ -1716,7 +1819,8 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
} }
} }
if (name && flags != (unsigned long) -1) { if (name && flags != (unsigned long) -1
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) {
hwloc_memattr_id_t _id; hwloc_memattr_id_t _id;
ret = hwloc_memattr_get_by_name(topology, name, &_id); ret = hwloc_memattr_get_by_name(topology, name, &_id);
@@ -1745,6 +1849,10 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
if (!strcmp(tag, "memattr_value")) { if (!strcmp(tag, "memattr_value")) {
ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate); ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate);
} else if (!strcmp(tag, "info")) {
char *infoname, *infovalue;
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
/* ignored */
} else { } else {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: memattr with unrecognized child %s\n", fprintf(stderr, "%s: memattr with unrecognized child %s\n",
@@ -1827,7 +1935,13 @@ hwloc__xml_import_cpukind(hwloc_topology_t topology,
goto error; goto error;
} }
hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY); if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS) {
hwloc__free_infos(infos, nr_infos);
hwloc_bitmap_free(cpuset);
} else {
hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
hwloc__free_infos(infos, nr_infos);
}
return state->global->close_tag(state); return state->global->close_tag(state);
@@ -2067,9 +2181,10 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
if (ret < 0) if (ret < 0)
goto failed; goto failed;
if (data->version_major > 2) { if (data->version_major > 3
|| (data->version_major == 3 && data->version_minor > 0)) {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n", fprintf(stderr, "%s: cannot import XML version %u.%u > 3.0\n",
data->msgprefix, data->version_major, data->version_minor); data->msgprefix, data->version_major, data->version_minor);
goto err; goto err;
} }
@@ -2117,6 +2232,13 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
ret = hwloc__xml_import_cpukind(topology, &childstate); ret = hwloc__xml_import_cpukind(topology, &childstate);
if (ret < 0) if (ret < 0)
goto failed; goto failed;
} else if (!strcmp(tag, "info")) {
char *infoname, *infovalue;
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
if (ret < 0)
goto failed;
/* move 3.x topology info back to the root object */
hwloc_obj_add_info(topology->levels[0][0], infoname, infovalue);
} else { } else {
if (hwloc__xml_verbose()) if (hwloc__xml_verbose())
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n", fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n",
@@ -2162,7 +2284,8 @@ done:
* but it would require to have those objects in the original XML order (like the first_numanode cousin-list). * but it would require to have those objects in the original XML order (like the first_numanode cousin-list).
* because the topology order can be different if some parents are ignored during load. * because the topology order can be different if some parents are ignored during load.
*/ */
if (nbobjs == data->nbnumanodes) { if (nbobjs == data->nbnumanodes
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) {
hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t)); hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t));
uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values)); uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values));
assert(data->nbnumanodes > 0); /* v1dist->nbobjs is >0 after import */ assert(data->nbnumanodes > 0); /* v1dist->nbobjs is >0 after import */
@@ -2644,7 +2767,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array)); logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array));
if (!logical_to_v2array) { if (!logical_to_v2array) {
fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n"); if (HWLOC_SHOW_ALL_ERRORS())
fprintf(stderr, "hwloc/xml/export/v1: failed to allocated logical_to_v2array\n");
continue; continue;
} }
@@ -2818,6 +2942,7 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
/* child has sibling, we must add a Group around those memory children */ /* child has sibling, we must add a Group around those memory children */
hwloc_obj_t group = parentstate->global->v1_memory_group; hwloc_obj_t group = parentstate->global->v1_memory_group;
parentstate->new_child(parentstate, &gstate, "object"); parentstate->new_child(parentstate, &gstate, "object");
group->parent = obj->parent;
group->cpuset = obj->cpuset; group->cpuset = obj->cpuset;
group->complete_cpuset = obj->complete_cpuset; group->complete_cpuset = obj->complete_cpuset;
group->nodeset = obj->nodeset; group->nodeset = obj->nodeset;
@@ -2990,7 +3115,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*)); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16);
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1); HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
#endif #endif
@@ -3035,6 +3160,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog
DO(membind,firsttouch_membind); DO(membind,firsttouch_membind);
DO(membind,bind_membind); DO(membind,bind_membind);
DO(membind,interleave_membind); DO(membind,interleave_membind);
DO(membind,weighted_interleave_membind);
DO(membind,nexttouch_membind); DO(membind,nexttouch_membind);
DO(membind,migrate_membind); DO(membind,migrate_membind);
DO(membind,get_area_memlocation); DO(membind,get_area_memlocation);
@@ -3116,9 +3242,11 @@ hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t top
continue; continue;
imattr = &topology->memattrs[id]; imattr = &topology->memattrs[id];
if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH) if (id < HWLOC_MEMATTR_ID_MAX && !imattr->nr_targets)
&& !imattr->nr_targets) /* no need to export standard attributes without any target,
/* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */ * their definition is now standardized,
* the old hwloc importing this XML may recreate these attributes just like it would for a non-imported topology.
*/
continue; continue;
state->new_child(state, &mstate, "memattr"); state->new_child(state, &mstate, "memattr");

Some files were not shown because too many files have changed in this diff Show More