mirror of
https://github.com/xmrig/xmrig.git
synced 2025-12-07 07:55:04 -05:00
Compare commits
610 Commits
v6.12.2
...
faa3d55123
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
faa3d55123 | ||
|
|
9e7cf69ac3 | ||
|
|
57a4998ae2 | ||
|
|
34b4448a81 | ||
|
|
650d794fb1 | ||
|
|
064a61988a | ||
|
|
2ab7f85ccd | ||
|
|
e4c30eb0dd | ||
|
|
d4e57d9427 | ||
|
|
9a71190ca1 | ||
|
|
a7dcbb143e | ||
|
|
a6a0f80b12 | ||
|
|
682834b87d | ||
|
|
184d6100dc | ||
|
|
0c52d789a9 | ||
|
|
e33334f11a | ||
|
|
6184224a66 | ||
|
|
f499155032 | ||
|
|
a32b688dcf | ||
|
|
35b334d58a | ||
|
|
33623492fe | ||
|
|
77009bd0d1 | ||
|
|
46572dcb3d | ||
|
|
0d9af3347d | ||
|
|
d24e13e605 | ||
|
|
36fdfa2694 | ||
|
|
6cfc02d24f | ||
|
|
16ecb8f085 | ||
|
|
0229c65232 | ||
|
|
4a13a8a75c | ||
|
|
cd2fd9d7a6 | ||
|
|
064cd3ef20 | ||
|
|
e8bbd134f9 | ||
|
|
cf86a1e05c | ||
|
|
f9e990d0f0 | ||
|
|
200f23bba7 | ||
|
|
4234b20e21 | ||
|
|
c5d8b8265b | ||
|
|
77c14c8362 | ||
|
|
8b03750806 | ||
|
|
40949f2767 | ||
|
|
56c447e02a | ||
|
|
21c206f05d | ||
|
|
ee65b3d159 | ||
|
|
1f75d198d8 | ||
|
|
5cf2422766 | ||
|
|
a32f9b5b04 | ||
|
|
8a4792f638 | ||
|
|
e32731b60b | ||
|
|
e1ae367084 | ||
|
|
bc1c8358c4 | ||
|
|
e0af8f0c6b | ||
|
|
29f9c8cf4c | ||
|
|
26f4936f6f | ||
|
|
a411ee3565 | ||
|
|
01bd0d48a1 | ||
|
|
20d555668b | ||
|
|
56baec762f | ||
|
|
17a52fb418 | ||
|
|
7e4caa8929 | ||
|
|
ef14d55aa5 | ||
|
|
5776fdcc20 | ||
|
|
fe0f69031b | ||
|
|
e682f89298 | ||
|
|
544c393f78 | ||
|
|
9da6ea07bd | ||
|
|
62bcd6e5dc | ||
|
|
c5f98fc5c7 | ||
|
|
ecb3ec0317 | ||
|
|
3dfeed475f | ||
|
|
98c775703e | ||
|
|
8da49f2650 | ||
|
|
4570187459 | ||
|
|
748365d6e3 | ||
|
|
dd7e0e520d | ||
|
|
ef6fb728b5 | ||
|
|
92ffcd34d6 | ||
|
|
b108845627 | ||
|
|
046b2a17d3 | ||
|
|
5342f25fbf | ||
|
|
5f6bcfe949 | ||
|
|
ecef382326 | ||
|
|
86f5db19d2 | ||
|
|
b4a47d6ed0 | ||
|
|
f5095247e8 | ||
|
|
2bb07fe633 | ||
|
|
a7be8cb80c | ||
|
|
2ce16df423 | ||
|
|
5eaa6c152e | ||
|
|
6972f727c1 | ||
|
|
7897f10c48 | ||
|
|
da2fb331b3 | ||
|
|
57f3e9c3da | ||
|
|
1efe7e9562 | ||
|
|
caae7c64f0 | ||
|
|
9fbdcc0ef0 | ||
|
|
c7c26d97fe | ||
|
|
1f7e635b04 | ||
|
|
1c5786e3c5 | ||
|
|
44eb4f0038 | ||
|
|
4ab9329dda | ||
|
|
0c2ee013a7 | ||
|
|
3347537635 | ||
|
|
7a85257ad4 | ||
|
|
850b43c079 | ||
|
|
b8e4eaac87 | ||
|
|
b9dd5e3eae | ||
|
|
032c28d50a | ||
|
|
f6c50b5393 | ||
|
|
e65e283aac | ||
|
|
5552e1f864 | ||
|
|
3beccae136 | ||
|
|
ef9bf2aa8c | ||
|
|
42f645fa3b | ||
|
|
1fb5be6c1d | ||
|
|
08c43b7e58 | ||
|
|
7b016fd9ce | ||
|
|
688d4f5ee1 | ||
|
|
64913e3163 | ||
|
|
48fa095e3e | ||
|
|
c9b9ef51ee | ||
|
|
dd782c7001 | ||
|
|
b49197f808 | ||
|
|
f9c4c57216 | ||
|
|
a5b8b85967 | ||
|
|
a5aa2c9042 | ||
|
|
fa35a32eee | ||
|
|
7b6ce59821 | ||
|
|
33315ba2ef | ||
|
|
2c9c40d623 | ||
|
|
daa6328418 | ||
|
|
8afd4d5f2f | ||
|
|
77e2f3a028 | ||
|
|
206295c6cb | ||
|
|
07e1e77c4f | ||
|
|
50a98a4bb1 | ||
|
|
c50369d65d | ||
|
|
592b0c9c76 | ||
|
|
89eab0eff2 | ||
|
|
8084ff37a5 | ||
|
|
7cf3db7750 | ||
|
|
4bda6e054d | ||
|
|
64a0ed413b | ||
|
|
0b59b7eb43 | ||
|
|
ae6b10b5a4 | ||
|
|
705a7eac0c | ||
|
|
10bfffe033 | ||
|
|
4131aa4754 | ||
|
|
fee51b20fa | ||
|
|
5e66efabcf | ||
|
|
08901a9a4b | ||
|
|
a19f590ee6 | ||
|
|
2fa754825d | ||
|
|
f3446c0a94 | ||
|
|
71209d4cd7 | ||
|
|
0a3313cb76 | ||
|
|
e855723cd9 | ||
|
|
6e294bd046 | ||
|
|
dfe70d9ea7 | ||
|
|
2ecf10cdcb | ||
|
|
b55ca8e547 | ||
|
|
12577df7ba | ||
|
|
64f5bb467a | ||
|
|
5717e72367 | ||
|
|
e7de104d88 | ||
|
|
3b5e04b1b7 | ||
|
|
2e77faa80c | ||
|
|
6e63a246bf | ||
|
|
09abc81255 | ||
|
|
fc698f7bcf | ||
|
|
cb2f8fd453 | ||
|
|
59c6c42ceb | ||
|
|
6c10cc5a4b | ||
|
|
d5a8f8a5ae | ||
|
|
d94d052e6c | ||
|
|
ae2b7e3348 | ||
|
|
7d7f30701f | ||
|
|
e80fc25789 | ||
|
|
ff53be5f3b | ||
|
|
6981e68ae3 | ||
|
|
c7e541d84f | ||
|
|
a2ae17b4c4 | ||
|
|
554b60966b | ||
|
|
0378aa8df4 | ||
|
|
6dbd46a891 | ||
|
|
055db83142 | ||
|
|
cdd5dff337 | ||
|
|
bc5fe8f456 | ||
|
|
0bc87345c4 | ||
|
|
f17d31e61a | ||
|
|
e6bf4c0077 | ||
|
|
ff79b8fce4 | ||
|
|
af87369e4f | ||
|
|
65fc16d5ac | ||
|
|
826e23b4c4 | ||
|
|
548fbb9f71 | ||
|
|
02d45834e1 | ||
|
|
1252a4710e | ||
|
|
5891f1f06b | ||
|
|
5dcbab7e3a | ||
|
|
7b51e23aa0 | ||
|
|
7f7fc363e1 | ||
|
|
c4e1363148 | ||
|
|
a2e9b3456d | ||
|
|
4790318685 | ||
|
|
038c4fbe34 | ||
|
|
d65d34ef36 | ||
|
|
af6647f377 | ||
|
|
8f9adc02c0 | ||
|
|
5e0079f012 | ||
|
|
dc5e341778 | ||
|
|
0f81ab4c67 | ||
|
|
62a3a98e7d | ||
|
|
d31b3b7c76 | ||
|
|
e352109431 | ||
|
|
88b0385bfe | ||
|
|
9508332258 | ||
|
|
bc5c1f7e65 | ||
|
|
22118330e3 | ||
|
|
240f2450af | ||
|
|
6e856ca39c | ||
|
|
6047786f43 | ||
|
|
7b8ba9ac09 | ||
|
|
02259fec05 | ||
|
|
51728b2d55 | ||
|
|
ebe818a5fb | ||
|
|
790a71b030 | ||
|
|
c62622b114 | ||
|
|
fc643e2936 | ||
|
|
12b9b62ef7 | ||
|
|
667f636c62 | ||
|
|
81e87a6931 | ||
|
|
540b223eab | ||
|
|
75474be060 | ||
|
|
49f34e59a6 | ||
|
|
223add4e22 | ||
|
|
435fc86120 | ||
|
|
c0143b90ce | ||
|
|
c3cdffe86d | ||
|
|
8a4da33bea | ||
|
|
1c7a339527 | ||
|
|
490acd6e55 | ||
|
|
6ecf57959b | ||
|
|
e2c58126e9 | ||
|
|
0ed4b35cd3 | ||
|
|
afe2aa4402 | ||
|
|
3f7533a645 | ||
|
|
6ef0409086 | ||
|
|
64b0d9562e | ||
|
|
770b71c69a | ||
|
|
44642643f8 | ||
|
|
273bb84df8 | ||
|
|
4d0b8c9daf | ||
|
|
7d4d48e83b | ||
|
|
2ea37cdf37 | ||
|
|
a02afe6d4f | ||
|
|
6e86dddc65 | ||
|
|
0171faffe7 | ||
|
|
25decd1b7f | ||
|
|
354b9ddb34 | ||
|
|
3ad6ab56a5 | ||
|
|
1aa0e37b54 | ||
|
|
807c64ddb1 | ||
|
|
5bf90704a6 | ||
|
|
912d1e362b | ||
|
|
eeb459506c | ||
|
|
f4ec0287c4 | ||
|
|
483d6ada3d | ||
|
|
28e81bd7c0 | ||
|
|
54e75bc7c4 | ||
|
|
c388113a30 | ||
|
|
36afeec225 | ||
|
|
4b5e56416d | ||
|
|
0d314d0469 | ||
|
|
7fc45dfb2d | ||
|
|
2ba40edee0 | ||
|
|
bc4dd11761 | ||
|
|
7b52a41459 | ||
|
|
b5de214ff9 | ||
|
|
8bd3b393ef | ||
|
|
9223c2f027 | ||
|
|
6346d36d1b | ||
|
|
93c07e1d34 | ||
|
|
0ba3000982 | ||
|
|
f0e7de8c71 | ||
|
|
1c4eb6c5fe | ||
|
|
63e21dfe63 | ||
|
|
b2d9dab2e3 | ||
|
|
4c57b60e59 | ||
|
|
e6c81d7166 | ||
|
|
94840c70d8 | ||
|
|
e1478bfa94 | ||
|
|
6df6e15267 | ||
|
|
834ea44507 | ||
|
|
73dc0ffb7e | ||
|
|
e57641d6b1 | ||
|
|
b324e34444 | ||
|
|
7e49fc828d | ||
|
|
fdfbb60840 | ||
|
|
ee51dec499 | ||
|
|
575742078c | ||
|
|
6bab67bced | ||
|
|
db9069897d | ||
|
|
30641b1bdf | ||
|
|
45061f40d8 | ||
|
|
9f70752090 | ||
|
|
22d6a7525e | ||
|
|
c0bce256e1 | ||
|
|
09a7219651 | ||
|
|
97869f3347 | ||
|
|
1bbbff7d17 | ||
|
|
97683e5719 | ||
|
|
059d5d8421 | ||
|
|
285719cde4 | ||
|
|
c877ba8145 | ||
|
|
6793981066 | ||
|
|
1ae9a4e428 | ||
|
|
0e57053c5a | ||
|
|
232d2d6dc5 | ||
|
|
a3cb74f29b | ||
|
|
56753d7c4a | ||
|
|
f7b9e3ca67 | ||
|
|
56c95703a5 | ||
|
|
eadf272425 | ||
|
|
cb227a0a79 | ||
|
|
4c171bea1e | ||
|
|
e55a854314 | ||
|
|
5bdfafd719 | ||
|
|
15a2091837 | ||
|
|
48bd09f730 | ||
|
|
21fb970949 | ||
|
|
23c12fc351 | ||
|
|
71d193676a | ||
|
|
baef34ba8c | ||
|
|
95a739d821 | ||
|
|
7b9135aadc | ||
|
|
e6f694ca9e | ||
|
|
afd79e7537 | ||
|
|
a2728af4f7 | ||
|
|
65dbded9c4 | ||
|
|
f25e65b5ac | ||
|
|
bbb19ea2f9 | ||
|
|
1c5b332add | ||
|
|
87fd0ea94a | ||
|
|
4a42dca2cb | ||
|
|
b674fafa0f | ||
|
|
b5da73389f | ||
|
|
bf5e38545c | ||
|
|
f7543ada60 | ||
|
|
95e1705fc8 | ||
|
|
2d0b07afbc | ||
|
|
b33ccf0e0b | ||
|
|
4f5f9bdffb | ||
|
|
4d3e3daa6a | ||
|
|
802029e5f5 | ||
|
|
14117e9658 | ||
|
|
7ccb1d65f0 | ||
|
|
15de3cc16c | ||
|
|
124daa4afd | ||
|
|
5de1609b7d | ||
|
|
644f4cc017 | ||
|
|
41a3f97060 | ||
|
|
452080cfbd | ||
|
|
4f103b6b45 | ||
|
|
39609c9183 | ||
|
|
2adb7b2b74 | ||
|
|
3673137df6 | ||
|
|
faa7095865 | ||
|
|
e0701f9dad | ||
|
|
14aacf8636 | ||
|
|
c764441337 | ||
|
|
05fae12a63 | ||
|
|
8059ce67f9 | ||
|
|
10111fd7f9 | ||
|
|
2d25bec2df | ||
|
|
cab244d468 | ||
|
|
4001488888 | ||
|
|
9bec1521b8 | ||
|
|
7bde3ed5f7 | ||
|
|
2e738509bb | ||
|
|
f5447088cb | ||
|
|
7f2f50a8d9 | ||
|
|
5747ccfafc | ||
|
|
93081eb1f6 | ||
|
|
4bf65c8669 | ||
|
|
1a6fc3a665 | ||
|
|
8dede14ac8 | ||
|
|
20687a397e | ||
|
|
454f97fa0f | ||
|
|
8149fc7dcb | ||
|
|
a39ab89236 | ||
|
|
5b8501fb57 | ||
|
|
039be2ab75 | ||
|
|
718c7e0fc1 | ||
|
|
ef7951b91d | ||
|
|
214b1f021b | ||
|
|
81b18c0741 | ||
|
|
8e83f72456 | ||
|
|
c2ae625032 | ||
|
|
60566dc84c | ||
|
|
4ea8fe694d | ||
|
|
669d1ab008 | ||
|
|
e87d5111a2 | ||
|
|
56158779de | ||
|
|
efb322df66 | ||
|
|
e673d541c1 | ||
|
|
a98db529fb | ||
|
|
1a9eaaad8f | ||
|
|
be5fbca9b6 | ||
|
|
2feb264375 | ||
|
|
00990f2649 | ||
|
|
d78713be48 | ||
|
|
77367abe13 | ||
|
|
cd046f6fd0 | ||
|
|
63b7ec2887 | ||
|
|
a1e8f1c3e5 | ||
|
|
6db480a1ab | ||
|
|
a7acd9de6d | ||
|
|
a64f4d1870 | ||
|
|
9bfe59b630 | ||
|
|
1a4bf16521 | ||
|
|
a4d5d0a75a | ||
|
|
c40f1f9f66 | ||
|
|
15e5052dd0 | ||
|
|
f9f7963453 | ||
|
|
02240eff8c | ||
|
|
d64c963e5e | ||
|
|
c6292ce9ee | ||
|
|
cd652e2644 | ||
|
|
6f5ef0fe0f | ||
|
|
01fa968763 | ||
|
|
8e6f3ad99e | ||
|
|
b1f2479ec1 | ||
|
|
ecceba8ecd | ||
|
|
cb5f4a9c17 | ||
|
|
3a8ebfdcb6 | ||
|
|
0dcafeb571 | ||
|
|
a1d7ee4c6b | ||
|
|
03e70ba2ed | ||
|
|
19ef8c5d65 | ||
|
|
63baa9e263 | ||
|
|
1248bd5859 | ||
|
|
5c951ddb8a | ||
|
|
4ab0ad928d | ||
|
|
e67eb47796 | ||
|
|
a6656a8c49 | ||
|
|
a903d0a5bd | ||
|
|
ceaebfd877 | ||
|
|
5156ff11a8 | ||
|
|
e0143a92a8 | ||
|
|
f682d9a2e9 | ||
|
|
3bece0ff40 | ||
|
|
e6c456a970 | ||
|
|
923d1d712f | ||
|
|
ae8459bd35 | ||
|
|
3a7be07c62 | ||
|
|
e1cc0000c6 | ||
|
|
1210e8e95c | ||
|
|
a45fbd9cae | ||
|
|
f6d45f7990 | ||
|
|
b9464f993b | ||
|
|
f8f73b0cd7 | ||
|
|
df6ab2edd8 | ||
|
|
8bf7600154 | ||
|
|
a30501956f | ||
|
|
c287a40a20 | ||
|
|
04f50c24e2 | ||
|
|
7627b23212 | ||
|
|
e90e7febfb | ||
|
|
733b85a132 | ||
|
|
35ba786e63 | ||
|
|
446810a837 | ||
|
|
c6a68c3e51 | ||
|
|
ca8bef3ade | ||
|
|
d735caa334 | ||
|
|
eb54cc0e0f | ||
|
|
84c67c37cd | ||
|
|
b44f38a362 | ||
|
|
8ed4088d0a | ||
|
|
cdcea2a4f9 | ||
|
|
f0d80326ec | ||
|
|
cb8fc26cbe | ||
|
|
5ec5b5ed00 | ||
|
|
67e29c1af1 | ||
|
|
4bd94a79a4 | ||
|
|
80e597d951 | ||
|
|
2e269f5b8c | ||
|
|
57b8e35903 | ||
|
|
53be5765e6 | ||
|
|
68741c925b | ||
|
|
9ce207e667 | ||
|
|
07e0966517 | ||
|
|
a9d4c2a923 | ||
|
|
dc02e1feaa | ||
|
|
7daff331dc | ||
|
|
058a2fb0f4 | ||
|
|
4fff3b946e | ||
|
|
f7aa5e781b | ||
|
|
298c5cccfa | ||
|
|
2985571620 | ||
|
|
279d29cd7f | ||
|
|
387320ad6d | ||
|
|
76cd83edb2 | ||
|
|
7f4d667351 | ||
|
|
8027716264 | ||
|
|
a459dd7741 | ||
|
|
ef6011ac12 | ||
|
|
6d66051d92 | ||
|
|
b2cc2ef0d7 | ||
|
|
9805320517 | ||
|
|
582d17bb84 | ||
|
|
9e5f5b35a6 | ||
|
|
9a9c69ff50 | ||
|
|
5c1f3f395c | ||
|
|
23cefffe43 | ||
|
|
d048d5a639 | ||
|
|
9a6f773dea | ||
|
|
cd7c7902a9 | ||
|
|
fd3dad920d | ||
|
|
3dc192f63e | ||
|
|
123c7ab140 | ||
|
|
838996a0fc | ||
|
|
6e4fea34a4 | ||
|
|
b52c289931 | ||
|
|
4dbb5b89da | ||
|
|
84d0212e79 | ||
|
|
35acb3f00b | ||
|
|
7f2771b466 | ||
|
|
5fdf5516ff | ||
|
|
234de96784 | ||
|
|
df4532d9a1 | ||
|
|
c27f535768 | ||
|
|
c7ac314110 | ||
|
|
3215403815 | ||
|
|
bea2a6cf5b | ||
|
|
a28f411339 | ||
|
|
460d9c75c5 | ||
|
|
d1033abbe5 | ||
|
|
9eac9dd30a | ||
|
|
8d7b6adf98 | ||
|
|
230ff87634 | ||
|
|
19adf2630a | ||
|
|
3de4b16117 | ||
|
|
602e3a7587 | ||
|
|
4f6ffb67c1 | ||
|
|
a0194ddd18 | ||
|
|
30f7e876a2 | ||
|
|
5958490c23 | ||
|
|
f92ad4423d | ||
|
|
e0749a82c2 | ||
|
|
440aa003af | ||
|
|
9580f5395f | ||
|
|
e9ae4deb91 | ||
|
|
aee0762424 | ||
|
|
e6332eff2b | ||
|
|
d0a632f557 | ||
|
|
f4cdc527b0 | ||
|
|
661dc515ab | ||
|
|
6d9bafe068 | ||
|
|
202c8aaee8 | ||
|
|
410084384e | ||
|
|
43e98c509a | ||
|
|
08d79ddcdc | ||
|
|
0fdf063760 | ||
|
|
929205536c | ||
|
|
d24581c963 | ||
|
|
2eb2e90631 | ||
|
|
0842e6b9d2 | ||
|
|
93805cd167 | ||
|
|
755fe28bc3 | ||
|
|
59d780169f | ||
|
|
a30ede04f3 | ||
|
|
3f2dfa4279 | ||
|
|
7177b42903 | ||
|
|
21638c2f58 | ||
|
|
02b2b87bb6 | ||
|
|
c8a9dba8fd | ||
|
|
9a77d39a3f | ||
|
|
28a1d0fe1e | ||
|
|
0243789c04 | ||
|
|
45dd58f808 | ||
|
|
1b4abe1e98 | ||
|
|
9f778742a6 | ||
|
|
015f8aeed4 | ||
|
|
9e6311a7e0 | ||
|
|
0af9d2e75b | ||
|
|
6e2a84a46c | ||
|
|
6bb8913066 | ||
|
|
cf104ebdc5 | ||
|
|
ecba750442 | ||
|
|
3967badc55 | ||
|
|
3f3f9b0661 | ||
|
|
e3fc78a66c | ||
|
|
e6d833c227 | ||
|
|
ebe299902c | ||
|
|
bc63b63a2a | ||
|
|
e739e7d704 | ||
|
|
1bae083587 | ||
|
|
88959bd703 | ||
|
|
93e689d601 | ||
|
|
a136790bee | ||
|
|
29f2dd4b9e | ||
|
|
3003c067d3 | ||
|
|
89bc6418b1 | ||
|
|
8458b4ee39 | ||
|
|
7bfb801ce2 | ||
|
|
4567499905 | ||
|
|
9b63955b09 | ||
|
|
0414511de0 | ||
|
|
b61dad128c |
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
3
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -17,6 +17,9 @@ Steps to reproduce the behavior.
|
||||
A clear and concise description of what you expected to happen.
|
||||
|
||||
**Required data**
|
||||
- XMRig version
|
||||
- Either the exact link to a release you downloaded from https://github.com/xmrig/xmrig/releases
|
||||
- Or the exact command lines that you used to build XMRig
|
||||
- Miner log as text or screenshot
|
||||
- Config file or command line (without wallets)
|
||||
- OS: [e.g. Windows]
|
||||
|
||||
198
CHANGELOG.md
198
CHANGELOG.md
@@ -1,3 +1,201 @@
|
||||
# v6.22.3
|
||||
- [#3605](https://github.com/xmrig/xmrig/pull/3605) CUDA backend: added missing RandomX dataset update.
|
||||
- [#3646](https://github.com/xmrig/xmrig/pull/3646) Optimized auto-config for AMD CPUs with less than 2 MB L3 cache per thread.
|
||||
- [#3652](https://github.com/xmrig/xmrig/pull/3652) Fixed possible crash when submitting RandomX benchmark.
|
||||
- [#3662](https://github.com/xmrig/xmrig/pull/3662) Fixed OpenCL kernel compilation error on some platforms.
|
||||
|
||||
# v6.22.2
|
||||
- [#3569](https://github.com/xmrig/xmrig/pull/3569) Fixed corrupted API output in some rare conditions.
|
||||
- [#3571](https://github.com/xmrig/xmrig/pull/3571) Fixed number of threads on the new Intel Core Ultra CPUs.
|
||||
|
||||
# v6.22.1
|
||||
- [#3531](https://github.com/xmrig/xmrig/pull/3531) Always reset nonce on RandomX dataset change.
|
||||
- [#3534](https://github.com/xmrig/xmrig/pull/3534) Fixed threads auto-config on Zen5.
|
||||
- [#3535](https://github.com/xmrig/xmrig/pull/3535) RandomX: tweaks for Zen5.
|
||||
- [#3539](https://github.com/xmrig/xmrig/pull/3539) Added Zen5 to `randomx_boost.sh`.
|
||||
- [#3540](https://github.com/xmrig/xmrig/pull/3540) Detect AMD engineering samples in `randomx_boost.sh`.
|
||||
|
||||
# v6.22.0
|
||||
- [#2411](https://github.com/xmrig/xmrig/pull/2411) Added support for [Yada](https://yadacoin.io/) (`rx/yada` algorithm).
|
||||
- [#3492](https://github.com/xmrig/xmrig/pull/3492) Fixed `--background` option on Unix systems.
|
||||
- [#3518](https://github.com/xmrig/xmrig/pull/3518) Possible fix for corrupted API output in rare cases.
|
||||
- [#3522](https://github.com/xmrig/xmrig/pull/3522) Removed `rx/keva` algorithm.
|
||||
- [#3525](https://github.com/xmrig/xmrig/pull/3525) Added Zen5 detection.
|
||||
- [#3528](https://github.com/xmrig/xmrig/pull/3528) Added `rx/yada` OpenCL support.
|
||||
|
||||
# v6.21.3
|
||||
- [#3462](https://github.com/xmrig/xmrig/pull/3462) RandomX: correct memcpy size for JIT initialization.
|
||||
|
||||
# v6.21.2
|
||||
- The dependencies of all prebuilt releases have been updated. Support for old Ubuntu releases has been dropped.
|
||||
- [#2800](https://github.com/xmrig/xmrig/issues/2800) Fixed donation with GhostRider algorithm for builds without KawPow algorithm.
|
||||
- [#3436](https://github.com/xmrig/xmrig/pull/3436) Fixed, the file log writer was not thread-safe.
|
||||
- [#3450](https://github.com/xmrig/xmrig/pull/3450) Fixed RandomX crash when compiled with fortify_source.
|
||||
|
||||
# v6.21.1
|
||||
- [#3391](https://github.com/xmrig/xmrig/pull/3391) Added support for townforge (monero fork using randomx).
|
||||
- [#3399](https://github.com/xmrig/xmrig/pull/3399) Fixed Zephyr mining (OpenCL).
|
||||
- [#3420](https://github.com/xmrig/xmrig/pull/3420) Fixed segfault in HTTP API rebind.
|
||||
|
||||
# v6.21.0
|
||||
- [#3302](https://github.com/xmrig/xmrig/pull/3302) [#3312](https://github.com/xmrig/xmrig/pull/3312) Enabled keepalive for Windows (>= Vista).
|
||||
- [#3320](https://github.com/xmrig/xmrig/pull/3320) Added "built for OS/architecture/bits" to "ABOUT".
|
||||
- [#3339](https://github.com/xmrig/xmrig/pull/3339) Added SNI option for TLS connections.
|
||||
- [#3342](https://github.com/xmrig/xmrig/pull/3342) Update `cn_main_loop.asm`.
|
||||
- [#3346](https://github.com/xmrig/xmrig/pull/3346) ARM64 JIT: don't use `x18` register.
|
||||
- [#3348](https://github.com/xmrig/xmrig/pull/3348) Update to latest `sse2neon.h`.
|
||||
- [#3356](https://github.com/xmrig/xmrig/pull/3356) Updated pricing record size for **Zephyr** solo mining.
|
||||
- [#3358](https://github.com/xmrig/xmrig/pull/3358) **Zephyr** solo mining: handle multiple outputs.
|
||||
|
||||
# v6.20.0
|
||||
- Added new ARM CPU names.
|
||||
- [#2394](https://github.com/xmrig/xmrig/pull/2394) Added new CMake options `ARM_V8` and `ARM_V7`.
|
||||
- [#2830](https://github.com/xmrig/xmrig/pull/2830) Added API rebind polling.
|
||||
- [#2927](https://github.com/xmrig/xmrig/pull/2927) Fixed compatibility with hwloc 1.11.x.
|
||||
- [#3060](https://github.com/xmrig/xmrig/pull/3060) Added x86 to `README.md`.
|
||||
- [#3236](https://github.com/xmrig/xmrig/pull/3236) Fixed: receive CUDA loader error on Linux too.
|
||||
- [#3290](https://github.com/xmrig/xmrig/pull/3290) Added [Zephyr](https://www.zephyrprotocol.com/) coin support for solo mining.
|
||||
|
||||
# v6.19.3
|
||||
- [#3245](https://github.com/xmrig/xmrig/issues/3245) Improved algorithm negotiation for donation rounds by sending extra information about current mining job.
|
||||
- [#3254](https://github.com/xmrig/xmrig/pull/3254) Tweaked auto-tuning for Intel CPUs.
|
||||
- [#3271](https://github.com/xmrig/xmrig/pull/3271) RandomX: optimized program generation.
|
||||
- [#3273](https://github.com/xmrig/xmrig/pull/3273) RandomX: fixed undefined behavior.
|
||||
- [#3275](https://github.com/xmrig/xmrig/pull/3275) RandomX: fixed `jccErratum` list.
|
||||
- [#3280](https://github.com/xmrig/xmrig/pull/3280) Updated example scripts.
|
||||
|
||||
# v6.19.2
|
||||
- [#3230](https://github.com/xmrig/xmrig/pull/3230) Fixed parsing of `TX_EXTRA_MERGE_MINING_TAG`.
|
||||
- [#3232](https://github.com/xmrig/xmrig/pull/3232) Added new `X-Hash-Difficulty` HTTP header.
|
||||
- [#3240](https://github.com/xmrig/xmrig/pull/3240) Improved .cmd files when run by shortcuts on another drive.
|
||||
- [#3241](https://github.com/xmrig/xmrig/pull/3241) Added view tag calculation (fixes Wownero solo mining issue).
|
||||
|
||||
# v6.19.1
|
||||
- Resolved deprecated methods warnings with OpenSSL 3.0.
|
||||
- [#3213](https://github.com/xmrig/xmrig/pull/3213) Fixed build with 32-bit clang 15.
|
||||
- [#3218](https://github.com/xmrig/xmrig/pull/3218) Fixed: `--randomx-wrmsr=-1` worked only on Intel.
|
||||
- [#3228](https://github.com/xmrig/xmrig/pull/3228) Fixed build with gcc 13.
|
||||
|
||||
# v6.19.0
|
||||
- [#3144](https://github.com/xmrig/xmrig/pull/3144) Update to latest `sse2neon.h`.
|
||||
- [#3161](https://github.com/xmrig/xmrig/pull/3161) MSVC build: enabled parallel compilation.
|
||||
- [#3163](https://github.com/xmrig/xmrig/pull/3163) Improved Zen 3 MSR mod.
|
||||
- [#3176](https://github.com/xmrig/xmrig/pull/3176) Update cmake required version to 3.1.
|
||||
- [#3182](https://github.com/xmrig/xmrig/pull/3182) DragonflyBSD compilation fixes.
|
||||
- [#3196](https://github.com/xmrig/xmrig/pull/3196) Show IP address for failed connections.
|
||||
- [#3185](https://github.com/xmrig/xmrig/issues/3185) Fixed macOS DMI reader.
|
||||
- [#3198](https://github.com/xmrig/xmrig/pull/3198) Fixed broken RandomX light mode mining.
|
||||
- [#3202](https://github.com/xmrig/xmrig/pull/3202) Solo mining: added job timeout (default is 15 seconds).
|
||||
|
||||
# v6.18.1
|
||||
- [#3129](https://github.com/xmrig/xmrig/pull/3129) Fix: protectRX flushed CPU cache only on MacOS/iOS.
|
||||
- [#3126](https://github.com/xmrig/xmrig/pull/3126) Don't reset when pool sends the same job blob.
|
||||
- [#3120](https://github.com/xmrig/xmrig/pull/3120) RandomX: optimized `CFROUND` elimination.
|
||||
- [#3109](https://github.com/xmrig/xmrig/pull/3109) RandomX: added Blake2 AVX2 version.
|
||||
- [#3082](https://github.com/xmrig/xmrig/pull/3082) Fixed GCC 12 warnings.
|
||||
- [#3075](https://github.com/xmrig/xmrig/pull/3075) Recognize `armv7ve` as valid ARMv7 target.
|
||||
- [#3132](https://github.com/xmrig/xmrig/pull/3132) RandomX: added MSR mod for Zen 4.
|
||||
- [#3134](https://github.com/xmrig/xmrig/pull/3134) Added Zen4 to `randomx_boost.sh`.
|
||||
|
||||
# v6.18.0
|
||||
- [#3067](https://github.com/xmrig/xmrig/pull/3067) Monero v15 network upgrade support and more house keeping.
|
||||
- Removed deprecated AstroBWTv1 and v2.
|
||||
- Fixed debug GhostRider build.
|
||||
- Monero v15 network upgrade support.
|
||||
- Fixed ZMQ debug log.
|
||||
- Improved daemon ZMQ mining stability.
|
||||
- [#3054](https://github.com/xmrig/xmrig/pull/3054) Fixes for 32-bit ARM.
|
||||
- [#3042](https://github.com/xmrig/xmrig/pull/3042) Fixed being unable to resume from `pause-on-battery`.
|
||||
- [#3031](https://github.com/xmrig/xmrig/pull/3031) Fixed `--cpu-priority` not working sometimes.
|
||||
- [#3020](https://github.com/xmrig/xmrig/pull/3020) Removed old AstroBWT algorithm.
|
||||
|
||||
# v6.17.0
|
||||
- [#2954](https://github.com/xmrig/xmrig/pull/2954) **Dero HE fork support (`astrobwt/v2` algorithm).**
|
||||
- [#2961](https://github.com/xmrig/xmrig/pull/2961) Dero HE (`astrobwt/v2`) CUDA config generator.
|
||||
- [#2969](https://github.com/xmrig/xmrig/pull/2969) Dero HE (`astrobwt/v2`) OpenCL support.
|
||||
- Fixed displayed DMI memory information for empty slots.
|
||||
- [#2932](https://github.com/xmrig/xmrig/pull/2932) Fixed GhostRider with hwloc disabled.
|
||||
|
||||
# v6.16.4
|
||||
- [#2904](https://github.com/xmrig/xmrig/pull/2904) Fixed unaligned memory accesses.
|
||||
- [#2908](https://github.com/xmrig/xmrig/pull/2908) Added MSVC/2022 to `version.h`.
|
||||
- [#2910](https://github.com/xmrig/xmrig/issues/2910) Fixed donation for GhostRider/RTM.
|
||||
|
||||
# v6.16.3
|
||||
- [#2778](https://github.com/xmrig/xmrig/pull/2778) Fixed `READY threads X/X` display after algorithm switching.
|
||||
- [#2782](https://github.com/xmrig/xmrig/pull/2782) Updated GhostRider documentation.
|
||||
- [#2815](https://github.com/xmrig/xmrig/pull/2815) Fixed `cn-heavy` in 32-bit builds.
|
||||
- [#2827](https://github.com/xmrig/xmrig/pull/2827) GhostRider: set correct priority for helper threads.
|
||||
- [#2837](https://github.com/xmrig/xmrig/pull/2837) RandomX: don't restart mining threads when the seed changes.
|
||||
- [#2848](https://github.com/xmrig/xmrig/pull/2848) GhostRider: added support for `client.reconnect` method.
|
||||
- [#2856](https://github.com/xmrig/xmrig/pull/2856) Fix for short responses from some Raptoreum pools.
|
||||
- [#2873](https://github.com/xmrig/xmrig/pull/2873) Fixed GhostRider benchmark on single-core systems.
|
||||
- [#2882](https://github.com/xmrig/xmrig/pull/2882) Fixed ARMv7 compilation.
|
||||
- [#2893](https://github.com/xmrig/xmrig/pull/2893) KawPow OpenCL: use separate UV loop for building programs.
|
||||
|
||||
# v6.16.2
|
||||
- [#2751](https://github.com/xmrig/xmrig/pull/2751) Fixed crash on CPUs supporting VAES and running GCC-compiled xmrig.
|
||||
- [#2761](https://github.com/xmrig/xmrig/pull/2761) Fixed broken auto-tuning in GCC Windows build.
|
||||
- [#2771](https://github.com/xmrig/xmrig/issues/2771) Fixed environment variables support for GhostRider and KawPow.
|
||||
- [#2769](https://github.com/xmrig/xmrig/pull/2769) Performance fixes:
|
||||
- Fixed several performance bottlenecks introduced in v6.16.1.
|
||||
- Fixed overall GCC-compiled build performance, it's the same speed as MSVC build now.
|
||||
- **Linux builds are up to 10% faster now compared to v6.16.0 GCC build.**
|
||||
- **Windows builds are up to 5% faster now compared to v6.16.0 MSVC build.**
|
||||
|
||||
# v6.16.1
|
||||
- [#2729](https://github.com/xmrig/xmrig/pull/2729) GhostRider fixes:
|
||||
- Added average hashrate display.
|
||||
- Fixed the number of threads shown at startup.
|
||||
- Fixed `--threads` or `-t` command line option (but `--cpu-max-threads-hint` is recommended to use).
|
||||
- [#2738](https://github.com/xmrig/xmrig/pull/2738) GhostRider fixes:
|
||||
- Fixed "difficulty is not a number" error when diff is high on some pools.
|
||||
- Fixed GhostRider compilation when `WITH_KAWPOW=OFF`.
|
||||
- [#2740](https://github.com/xmrig/xmrig/pull/2740) Added VAES support for Cryptonight variants **+4% speedup on Zen3**.
|
||||
- VAES instructions are available on Intel Ice Lake/AMD Zen3 and newer CPUs.
|
||||
- +4% speedup on Ryzen 5 5600X.
|
||||
|
||||
# v6.16.0
|
||||
- [#2712](https://github.com/xmrig/xmrig/pull/2712) **GhostRider algorithm (Raptoreum) support**: read the [RELEASE NOTES](src/crypto/ghostrider/README.md) for quick start guide and performance comparisons.
|
||||
- [#2682](https://github.com/xmrig/xmrig/pull/2682) Fixed: use cn-heavy optimization only for Vermeer CPUs.
|
||||
- [#2684](https://github.com/xmrig/xmrig/pull/2684) MSR mod: fix for error 183.
|
||||
|
||||
# v6.15.3
|
||||
- [#2614](https://github.com/xmrig/xmrig/pull/2614) OpenCL fixes for non-AMD platforms.
|
||||
- [#2623](https://github.com/xmrig/xmrig/pull/2623) Fixed compiling without kawpow.
|
||||
- [#2636](https://github.com/xmrig/xmrig/pull/2636) [#2639](https://github.com/xmrig/xmrig/pull/2639) AstroBWT speedup (up to +35%).
|
||||
- [#2646](https://github.com/xmrig/xmrig/pull/2646) Fixed MSVC compilation error.
|
||||
|
||||
# v6.15.2
|
||||
- [#2606](https://github.com/xmrig/xmrig/pull/2606) Fixed: AstroBWT auto-config ignored `max-threads-hint`.
|
||||
- Fixed possible crash on Windows (regression in v6.15.1).
|
||||
|
||||
# v6.15.1
|
||||
- [#2586](https://github.com/xmrig/xmrig/pull/2586) Fixed Windows 7 compatibility.
|
||||
- [#2594](https://github.com/xmrig/xmrig/pull/2594) Added Windows taskbar icon colors.
|
||||
|
||||
# v6.15.0
|
||||
- [#2548](https://github.com/xmrig/xmrig/pull/2548) Added automatic coin detection for daemon mining.
|
||||
- [#2563](https://github.com/xmrig/xmrig/pull/2563) Added new algorithm RandomX Graft (`rx/graft`).
|
||||
- [#2565](https://github.com/xmrig/xmrig/pull/2565) AstroBWT: added AVX2 Salsa20 implementation.
|
||||
- Added support for new CUDA plugin API (previous API still supported).
|
||||
|
||||
# v6.14.1
|
||||
- [#2532](https://github.com/xmrig/xmrig/pull/2532) Refactoring: stable (persistent) algorithms IDs.
|
||||
- [#2537](https://github.com/xmrig/xmrig/pull/2537) Fixed Termux build.
|
||||
|
||||
# v6.14.0
|
||||
- [#2484](https://github.com/xmrig/xmrig/pull/2484) Added ZeroMQ support for solo mining.
|
||||
- [#2476](https://github.com/xmrig/xmrig/issues/2476) Fixed crash in DMI memory reader.
|
||||
- [#2492](https://github.com/xmrig/xmrig/issues/2492) Added missing `--huge-pages-jit` command line option.
|
||||
- [#2512](https://github.com/xmrig/xmrig/pull/2512) Added show the number of transactions in pool job.
|
||||
|
||||
# v6.13.1
|
||||
- [#2468](https://github.com/xmrig/xmrig/pull/2468) Fixed regression in previous version: don't send miner signature during regular mining.
|
||||
|
||||
# v6.13.0
|
||||
- [#2445](https://github.com/xmrig/xmrig/pull/2445) Added support for solo mining with miner signatures for the upcoming Wownero fork.
|
||||
|
||||
# v6.12.2
|
||||
- [#2280](https://github.com/xmrig/xmrig/issues/2280) GPU backends are now disabled in benchmark mode.
|
||||
- [#2322](https://github.com/xmrig/xmrig/pull/2322) Improved MSR compatibility with recent Linux kernels and updated `randomx_boost.sh`.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
project(xmrig)
|
||||
|
||||
option(WITH_HWLOC "Enable hwloc support" ON)
|
||||
@@ -8,8 +8,8 @@ option(WITH_CN_PICO "Enable CryptoNight-Pico algorithm" ON)
|
||||
option(WITH_CN_FEMTO "Enable CryptoNight-UPX2 algorithm" ON)
|
||||
option(WITH_RANDOMX "Enable RandomX algorithms family" ON)
|
||||
option(WITH_ARGON2 "Enable Argon2 algorithms family" ON)
|
||||
option(WITH_ASTROBWT "Enable AstroBWT algorithms family" ON)
|
||||
option(WITH_KAWPOW "Enable KawPow algorithms family" ON)
|
||||
option(WITH_GHOSTRIDER "Enable GhostRider algorithm" ON)
|
||||
option(WITH_HTTP "Enable HTTP protocol support (client/server)" ON)
|
||||
option(WITH_DEBUG_LOG "Enable debug log output" OFF)
|
||||
option(WITH_TLS "Enable OpenSSL support" ON)
|
||||
@@ -18,6 +18,8 @@ option(WITH_MSR "Enable MSR mod & 1st-gen Ryzen fix" ON)
|
||||
option(WITH_ENV_VARS "Enable environment variables support in config file" ON)
|
||||
option(WITH_EMBEDDED_CONFIG "Enable internal embedded JSON config" OFF)
|
||||
option(WITH_OPENCL "Enable OpenCL backend" ON)
|
||||
set(WITH_OPENCL_VERSION 200 CACHE STRING "Target OpenCL version")
|
||||
set_property(CACHE WITH_OPENCL_VERSION PROPERTY STRINGS 120 200 210 220)
|
||||
option(WITH_CUDA "Enable CUDA backend" ON)
|
||||
option(WITH_NVML "Enable NVML (NVIDIA Management Library) support (only if CUDA backend enabled)" ON)
|
||||
option(WITH_ADL "Enable ADL (AMD Display Library) or sysfs support (only if OpenCL backend enabled)" ON)
|
||||
@@ -25,12 +27,15 @@ option(WITH_STRICT_CACHE "Enable strict checks for OpenCL cache" ON)
|
||||
option(WITH_INTERLEAVE_DEBUG_LOG "Enable debug log for threads interleave" OFF)
|
||||
option(WITH_PROFILING "Enable profiling for developers" OFF)
|
||||
option(WITH_SSE4_1 "Enable SSE 4.1 for Blake2" ON)
|
||||
option(WITH_AVX2 "Enable AVX2 for Blake2" ON)
|
||||
option(WITH_VAES "Enable VAES instructions for Cryptonight" ON)
|
||||
option(WITH_BENCHMARK "Enable builtin RandomX benchmark and stress test" ON)
|
||||
option(WITH_SECURE_JIT "Enable secure access to JIT memory" OFF)
|
||||
option(WITH_DMI "Enable DMI/SMBIOS reader" ON)
|
||||
|
||||
option(BUILD_STATIC "Build static binary" OFF)
|
||||
option(ARM_TARGET "Force use specific ARM target 8 or 7" 0)
|
||||
option(ARM_V8 "Force ARMv8 (64 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
|
||||
option(ARM_V7 "Force ARMv7 (32 bit) architecture, use with caution if automatic detection fails, but you sure it may work" OFF)
|
||||
option(HWLOC_DEBUG "Enable hwloc debug helpers and log" OFF)
|
||||
|
||||
|
||||
@@ -56,6 +61,7 @@ set(HEADERS
|
||||
src/core/config/usage.h
|
||||
src/core/Controller.h
|
||||
src/core/Miner.h
|
||||
src/core/Taskbar.h
|
||||
src/net/interfaces/IJobResultListener.h
|
||||
src/net/JobResult.h
|
||||
src/net/JobResults.h
|
||||
@@ -104,6 +110,7 @@ set(SOURCES
|
||||
src/core/config/ConfigTransform.cpp
|
||||
src/core/Controller.cpp
|
||||
src/core/Miner.cpp
|
||||
src/core/Taskbar.cpp
|
||||
src/net/JobResults.cpp
|
||||
src/net/Network.cpp
|
||||
src/net/strategies/DonateStrategy.cpp
|
||||
@@ -124,6 +131,19 @@ set(SOURCES_CRYPTO
|
||||
src/crypto/common/VirtualMemory.cpp
|
||||
)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU)
|
||||
set_source_files_properties(src/crypto/cn/CnHash.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize")
|
||||
endif()
|
||||
|
||||
if (WITH_VAES)
|
||||
add_definitions(-DXMRIG_VAES)
|
||||
set(HEADERS_CRYPTO "${HEADERS_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.h)
|
||||
set(SOURCES_CRYPTO "${SOURCES_CRYPTO}" src/crypto/cn/CryptoNight_x86_vaes.cpp)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/cn/CryptoNight_x86_vaes.cpp PROPERTIES COMPILE_FLAGS "-Ofast -fno-tree-vectorize -mavx2 -mvaes")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_HWLOC)
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/common/NUMAMemoryPool.h
|
||||
@@ -142,7 +162,7 @@ if (XMRIG_OS_WIN)
|
||||
src/crypto/common/VirtualMemory_win.cpp
|
||||
)
|
||||
|
||||
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv)
|
||||
set(EXTRA_LIBS ws2_32 psapi iphlpapi userenv dbghelp)
|
||||
elseif (XMRIG_OS_APPLE)
|
||||
list(APPEND SOURCES_OS
|
||||
src/App_unix.cpp
|
||||
@@ -180,8 +200,8 @@ find_package(UV REQUIRED)
|
||||
include(cmake/flags.cmake)
|
||||
include(cmake/randomx.cmake)
|
||||
include(cmake/argon2.cmake)
|
||||
include(cmake/astrobwt.cmake)
|
||||
include(cmake/kawpow.cmake)
|
||||
include(cmake/ghostrider.cmake)
|
||||
include(cmake/OpenSSL.cmake)
|
||||
include(cmake/asm.cmake)
|
||||
|
||||
@@ -217,16 +237,20 @@ if (WITH_DEBUG_LOG)
|
||||
endif()
|
||||
|
||||
add_executable(${CMAKE_PROJECT_NAME} ${HEADERS} ${SOURCES} ${SOURCES_OS} ${HEADERS_CRYPTO} ${SOURCES_CRYPTO} ${SOURCES_SYSLOG} ${TLS_SOURCES} ${XMRIG_ASM_SOURCES})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY})
|
||||
target_link_libraries(${CMAKE_PROJECT_NAME} ${XMRIG_ASM_LIBRARY} ${OPENSSL_LIBRARIES} ${UV_LIBRARIES} ${EXTRA_LIBS} ${CPUID_LIB} ${ARGON2_LIBRARY} ${ETHASH_LIBRARY} ${GHOSTRIDER_LIBRARY})
|
||||
|
||||
if (WIN32)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
if (NOT ARM_TARGET)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/bin/WinRing0/WinRing0x64.sys" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
endif()
|
||||
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_1M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/benchmark_10M.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/pool_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/solo_mine_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
add_custom_command(TARGET ${CMAKE_PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_COMMAND} -E copy_if_different "${CMAKE_SOURCE_DIR}/scripts/rtm_ghostrider_example.cmd" $<TARGET_FILE_DIR:${CMAKE_PROJECT_NAME}>)
|
||||
endif()
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES Clang AND CMAKE_BUILD_TYPE STREQUAL Release AND NOT CMAKE_GENERATOR STREQUAL Xcode)
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} ${CMAKE_PROJECT_NAME})
|
||||
add_custom_command(TARGET ${PROJECT_NAME} POST_BUILD COMMAND ${CMAKE_STRIP} "$<TARGET_FILE:${CMAKE_PROJECT_NAME}>")
|
||||
endif()
|
||||
|
||||
@@ -7,10 +7,10 @@
|
||||
[](https://github.com/xmrig/xmrig/stargazers)
|
||||
[](https://github.com/xmrig/xmrig/network)
|
||||
|
||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and AstroBWT unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||
XMRig is a high performance, open source, cross platform RandomX, KawPow, CryptoNight and [GhostRider](https://github.com/xmrig/xmrig/tree/master/src/crypto/ghostrider#readme) unified CPU/GPU miner and [RandomX benchmark](https://xmrig.com/benchmark). Official binaries are available for Windows, Linux, macOS and FreeBSD.
|
||||
|
||||
## Mining backends
|
||||
- **CPU** (x64/ARMv8)
|
||||
- **CPU** (x86/x64/ARMv7/ARMv8)
|
||||
- **OpenCL** for AMD GPUs.
|
||||
- **CUDA** for NVIDIA GPUs via external [CUDA plugin](https://github.com/xmrig/xmrig-cuda).
|
||||
|
||||
|
||||
@@ -1,45 +0,0 @@
|
||||
if (WITH_ASTROBWT)
|
||||
add_definitions(/DXMRIG_ALGO_ASTROBWT)
|
||||
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/astrobwt/AstroBWT.h
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/astrobwt/AstroBWT.cpp
|
||||
)
|
||||
|
||||
if (XMRIG_ARM)
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/astrobwt/salsa20_ref/ecrypt-config.h
|
||||
src/crypto/astrobwt/salsa20_ref/ecrypt-machine.h
|
||||
src/crypto/astrobwt/salsa20_ref/ecrypt-portable.h
|
||||
src/crypto/astrobwt/salsa20_ref/ecrypt-sync.h
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/astrobwt/salsa20_ref/salsa20.c
|
||||
)
|
||||
else()
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_definitions(/DASTROBWT_AVX2)
|
||||
if (CMAKE_C_COMPILER_ID MATCHES MSVC)
|
||||
enable_language(ASM_MASM)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/astrobwt/sha3_256_avx2.asm)
|
||||
else()
|
||||
enable_language(ASM)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/astrobwt/sha3_256_avx2.S)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
list(APPEND HEADERS_CRYPTO
|
||||
src/crypto/astrobwt/Salsa20.hpp
|
||||
)
|
||||
|
||||
list(APPEND SOURCES_CRYPTO
|
||||
src/crypto/astrobwt/Salsa20.cpp
|
||||
)
|
||||
endif()
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_ASTROBWT)
|
||||
endif()
|
||||
@@ -1,47 +1,72 @@
|
||||
if (CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
set(XMRIG_64_BIT ON)
|
||||
add_definitions(-DXMRIG_64_BIT)
|
||||
else()
|
||||
set(XMRIG_64_BIT OFF)
|
||||
endif()
|
||||
|
||||
if (NOT CMAKE_SYSTEM_PROCESSOR)
|
||||
message(WARNING "CMAKE_SYSTEM_PROCESSOR not defined")
|
||||
endif()
|
||||
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$" AND CMAKE_SIZEOF_VOID_P EQUAL 8)
|
||||
add_definitions(/DRAPIDJSON_SSE2)
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(VAES_SUPPORTED ON)
|
||||
else()
|
||||
CHECK_CXX_COMPILER_FLAG("-mavx2 -mvaes" VAES_SUPPORTED)
|
||||
endif()
|
||||
|
||||
if (NOT VAES_SUPPORTED)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
if (XMRIG_64_BIT AND CMAKE_SYSTEM_PROCESSOR MATCHES "^(x86_64|AMD64)$")
|
||||
add_definitions(-DRAPIDJSON_SSE2)
|
||||
else()
|
||||
set(WITH_SSE4_1 OFF)
|
||||
set(WITH_AVX2 OFF)
|
||||
set(WITH_VAES OFF)
|
||||
endif()
|
||||
|
||||
add_definitions(-DRAPIDJSON_WRITE_DEFAULT_FLAGS=6) # rapidjson::kWriteNanAndInfFlag | rapidjson::kWriteNanAndInfNullFlag
|
||||
|
||||
if (ARM_V8)
|
||||
set(ARM_TARGET 8)
|
||||
elseif (ARM_V7)
|
||||
set(ARM_TARGET 7)
|
||||
endif()
|
||||
|
||||
if (NOT ARM_TARGET)
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|armv8-a)$")
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64|ARM64|armv8-a)$")
|
||||
set(ARM_TARGET 8)
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l)$")
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "^(armv7|armv7f|armv7s|armv7k|armv7-a|armv7l|armv7ve)$")
|
||||
set(ARM_TARGET 7)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (ARM_TARGET AND ARM_TARGET GREATER 6)
|
||||
set(XMRIG_ARM ON)
|
||||
add_definitions(/DXMRIG_ARM)
|
||||
set(XMRIG_ARM ON)
|
||||
add_definitions(-DXMRIG_ARM=${ARM_TARGET})
|
||||
|
||||
message(STATUS "Use ARM_TARGET=${ARM_TARGET} (${CMAKE_SYSTEM_PROCESSOR})")
|
||||
|
||||
include(CheckCXXCompilerFlag)
|
||||
|
||||
if (ARM_TARGET EQUAL 8)
|
||||
set(XMRIG_ARMv8 ON)
|
||||
add_definitions(/DXMRIG_ARMv8)
|
||||
|
||||
CHECK_CXX_COMPILER_FLAG(-march=armv8-a+crypto XMRIG_ARM_CRYPTO)
|
||||
|
||||
if (XMRIG_ARM_CRYPTO)
|
||||
add_definitions(/DXMRIG_ARM_CRYPTO)
|
||||
add_definitions(-DXMRIG_ARM_CRYPTO)
|
||||
set(ARM8_CXX_FLAGS "-march=armv8-a+crypto")
|
||||
else()
|
||||
set(ARM8_CXX_FLAGS "-march=armv8-a")
|
||||
endif()
|
||||
elseif (ARM_TARGET EQUAL 7)
|
||||
set(XMRIG_ARMv7 ON)
|
||||
add_definitions(/DXMRIG_ARMv7)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_SSE4_1)
|
||||
add_definitions(/DXMRIG_FEATURE_SSE4_1)
|
||||
add_definitions(-DXMRIG_FEATURE_SSE4_1)
|
||||
endif()
|
||||
|
||||
if (WITH_AVX2)
|
||||
add_definitions(-DXMRIG_FEATURE_AVX2)
|
||||
endif()
|
||||
|
||||
@@ -10,7 +10,7 @@ if ("${CMAKE_BUILD_TYPE}" STREQUAL "")
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE STREQUAL "Release")
|
||||
add_definitions(/DNDEBUG)
|
||||
add_definitions(-DNDEBUG)
|
||||
endif()
|
||||
|
||||
include(CheckSymbolExists)
|
||||
@@ -22,17 +22,17 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-strict-aliasing -Wno-class-memaccess")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -s")
|
||||
|
||||
if (XMRIG_ARMv8)
|
||||
if (ARM_TARGET EQUAL 8)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS} -flax-vector-conversions")
|
||||
elseif (XMRIG_ARMv7)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -flax-vector-conversions")
|
||||
elseif (ARM_TARGET EQUAL 7)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -march=armv7-a -mfpu=neon")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -march=armv7-a -mfpu=neon -flax-vector-conversions")
|
||||
else()
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -maes")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -maes")
|
||||
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
endif()
|
||||
|
||||
if (WIN32)
|
||||
@@ -49,41 +49,29 @@ if (CMAKE_CXX_COMPILER_ID MATCHES GNU)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
add_definitions(/D_GNU_SOURCE)
|
||||
|
||||
if (${CMAKE_VERSION} VERSION_LESS "3.1.0")
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -std=c99")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11")
|
||||
endif()
|
||||
|
||||
#set(CMAKE_C_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -gdwarf-2")
|
||||
|
||||
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(-D_GNU_SOURCE -DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES MSVC)
|
||||
set(CMAKE_C_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "/MT /O2 /Oi /DNDEBUG /GL")
|
||||
set(CMAKE_C_FLAGS_RELEASE "/MP /MT /O2 /Oi /DNDEBUG /GL")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "/MP /MT /O2 /Oi /DNDEBUG /GL")
|
||||
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/Ob1 /Zi /DRELWITHDEBINFO")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/Ob1 /Zi /DRELWITHDEBINFO")
|
||||
set(CMAKE_C_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
|
||||
set(CMAKE_CXX_FLAGS_RELWITHDEBINFO "/MP /Ob1 /Zi /DRELWITHDEBINFO")
|
||||
|
||||
add_definitions(/D_CRT_SECURE_NO_WARNINGS)
|
||||
add_definitions(/D_CRT_NONSTDC_NO_WARNINGS)
|
||||
add_definitions(/DNOMINMAX)
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-D_CRT_SECURE_NO_WARNINGS -D_CRT_NONSTDC_NO_WARNINGS -DNOMINMAX -DHAVE_ROTR)
|
||||
|
||||
elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -Wall")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -funroll-loops -fmerge-all-constants")
|
||||
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti -Wno-missing-braces")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Ofast -funroll-loops -fmerge-all-constants")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall -fexceptions -fno-rtti")
|
||||
set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -funroll-loops -fmerge-all-constants")
|
||||
|
||||
if (XMRIG_ARMv8)
|
||||
if (ARM_TARGET EQUAL 8)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${ARM8_CXX_FLAGS}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${ARM8_CXX_FLAGS}")
|
||||
elseif (XMRIG_ARMv7)
|
||||
elseif (ARM_TARGET EQUAL 7)
|
||||
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -mfpu=neon -march=${CMAKE_SYSTEM_PROCESSOR}")
|
||||
else()
|
||||
@@ -92,19 +80,18 @@ elseif (CMAKE_CXX_COMPILER_ID MATCHES Clang)
|
||||
|
||||
check_symbol_exists("_rotr" "x86intrin.h" HAVE_ROTR)
|
||||
if (HAVE_ROTR)
|
||||
add_definitions(/DHAVE_ROTR)
|
||||
add_definitions(-DHAVE_ROTR)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (BUILD_STATIC)
|
||||
if (BUILD_STATIC OR WIN32)
|
||||
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static")
|
||||
endif()
|
||||
|
||||
endif()
|
||||
|
||||
if (NOT WIN32)
|
||||
check_symbol_exists("__builtin___clear_cache" "stdlib.h" HAVE_BUILTIN_CLEAR_CACHE)
|
||||
if (HAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(/DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
add_definitions(-DHAVE_BUILTIN_CLEAR_CACHE)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
8
cmake/ghostrider.cmake
Normal file
8
cmake/ghostrider.cmake
Normal file
@@ -0,0 +1,8 @@
|
||||
if (WITH_GHOSTRIDER)
|
||||
add_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
add_subdirectory(src/crypto/ghostrider)
|
||||
set(GHOSTRIDER_LIBRARY ghostrider)
|
||||
else()
|
||||
remove_definitions(/DXMRIG_ALGO_GHOSTRIDER)
|
||||
set(GHOSTRIDER_LIBRARY "")
|
||||
endif()
|
||||
@@ -15,39 +15,37 @@ else()
|
||||
set(XMRIG_OS_ANDROID ON)
|
||||
elseif(CMAKE_SYSTEM_NAME MATCHES "Linux")
|
||||
set(XMRIG_OS_LINUX ON)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD)
|
||||
elseif(CMAKE_SYSTEM_NAME STREQUAL FreeBSD OR CMAKE_SYSTEM_NAME STREQUAL DragonFly)
|
||||
set(XMRIG_OS_FREEBSD ON)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
if (XMRIG_OS_WIN)
|
||||
add_definitions(/DWIN32)
|
||||
add_definitions(/DXMRIG_OS_WIN)
|
||||
add_definitions(-DWIN32 -DXMRIG_OS_WIN)
|
||||
elseif(XMRIG_OS_APPLE)
|
||||
add_definitions(/DXMRIG_OS_APPLE)
|
||||
add_definitions(-DXMRIG_OS_APPLE)
|
||||
|
||||
if (XMRIG_OS_IOS)
|
||||
add_definitions(/DXMRIG_OS_IOS)
|
||||
add_definitions(-DXMRIG_OS_IOS)
|
||||
else()
|
||||
add_definitions(/DXMRIG_OS_MACOS)
|
||||
add_definitions(-DXMRIG_OS_MACOS)
|
||||
endif()
|
||||
|
||||
if (XMRIG_ARM)
|
||||
set(WITH_SECURE_JIT ON)
|
||||
endif()
|
||||
elseif(XMRIG_OS_UNIX)
|
||||
add_definitions(/DXMRIG_OS_UNIX)
|
||||
add_definitions(-DXMRIG_OS_UNIX)
|
||||
|
||||
if (XMRIG_OS_ANDROID)
|
||||
add_definitions(/DXMRIG_OS_ANDROID)
|
||||
add_definitions(-DXMRIG_OS_ANDROID)
|
||||
elseif (XMRIG_OS_LINUX)
|
||||
add_definitions(/DXMRIG_OS_LINUX)
|
||||
add_definitions(-DXMRIG_OS_LINUX)
|
||||
elseif (XMRIG_OS_FREEBSD)
|
||||
add_definitions(/DXMRIG_OS_FREEBSD)
|
||||
add_definitions(-DXMRIG_OS_FREEBSD)
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_SECURE_JIT)
|
||||
add_definitions(/DXMRIG_SECURE_JIT)
|
||||
add_definitions(-DXMRIG_SECURE_JIT)
|
||||
endif()
|
||||
|
||||
@@ -76,7 +76,15 @@ if (WITH_RANDOMX)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/blake2b_sse41.c)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS -msse4.1)
|
||||
set_source_files_properties(src/crypto/randomx/blake2/blake2b_sse41.c PROPERTIES COMPILE_FLAGS "-Ofast -msse4.1")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
if (WITH_AVX2)
|
||||
list(APPEND SOURCES_CRYPTO src/crypto/randomx/blake2/avx2/blake2b_avx2.c)
|
||||
|
||||
if (CMAKE_C_COMPILER_ID MATCHES GNU OR CMAKE_C_COMPILER_ID MATCHES Clang)
|
||||
set_source_files_properties(src/crypto/randomx/blake2/avx2/blake2b_avx2.c PROPERTIES COMPILE_FLAGS "-Ofast -mavx2")
|
||||
endif()
|
||||
endif()
|
||||
|
||||
|
||||
@@ -13,7 +13,6 @@ Option `coin` useful for pools without [algorithm negotiation](https://xmrig.com
|
||||
| Name | Memory | Version | Description | Notes |
|
||||
|------|--------|---------|-------------|-------|
|
||||
| `kawpow` | - | 6.0.0+ | KawPow (Ravencoin) | GPU only |
|
||||
| `rx/keva` | 1 MB | 5.9.0+ | RandomKEVA (RandomX variant for Keva). | |
|
||||
| `astrobwt` | 20 MB | 5.8.0+ | AstroBWT (Dero). | |
|
||||
| `cn-pico/tlo` | 256 KB | 5.5.0+ | CryptoNight-Pico (Talleo). | |
|
||||
| `rx/sfx` | 2 MB | 5.4.0+ | RandomSFX (RandomX variant for Safex). | |
|
||||
|
||||
@@ -256,7 +256,7 @@
|
||||
|
||||
# v2.8.0
|
||||
- **[#753](https://github.com/xmrig/xmrig/issues/753) Added new algorithm [CryptoNight variant 2](https://github.com/xmrig/xmrig/issues/753) for Monero fork, thanks [@SChernykh](https://github.com/SChernykh).**
|
||||
- Added global and per thread option `"asm"` and and command line equivalent.
|
||||
- Added global and per thread option `"asm"` and command line equivalent.
|
||||
- **[#758](https://github.com/xmrig/xmrig/issues/758) Added SSL/TLS support for secure connections to pools.**
|
||||
- Added per pool options `"tls"` and `"tls-fingerprint"` and command line equivalents.
|
||||
- [#767](https://github.com/xmrig/xmrig/issues/767) Added config autosave feature, same with GPU miners.
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
@echo off
|
||||
cd %~dp0
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe --bench=10M --submit
|
||||
pause
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
@echo off
|
||||
cd %~dp0
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe --bench=1M --submit
|
||||
pause
|
||||
|
||||
@@ -1,6 +1,10 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
HWLOC_VERSION="2.4.1"
|
||||
HWLOC_VERSION_MAJOR="2"
|
||||
HWLOC_VERSION_MINOR="12"
|
||||
HWLOC_VERSION_PATCH="1"
|
||||
|
||||
HWLOC_VERSION="${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}.${HWLOC_VERSION_PATCH}"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
@@ -8,7 +12,7 @@ mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://download.open-mpi.org/release/hwloc/v2.4/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
wget https://download.open-mpi.org/release/hwloc/v${HWLOC_VERSION_MAJOR}.${HWLOC_VERSION_MINOR}/hwloc-${HWLOC_VERSION}.tar.gz -O hwloc-${HWLOC_VERSION}.tar.gz
|
||||
tar -xzf hwloc-${HWLOC_VERSION}.tar.gz
|
||||
|
||||
cd hwloc-${HWLOC_VERSION}
|
||||
@@ -16,4 +20,4 @@ cd hwloc-${HWLOC_VERSION}
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp hwloc/.libs/libhwloc.a ../../deps/lib
|
||||
cd ..
|
||||
cd ..
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
HWLOC_VERSION="1.11.13"
|
||||
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
LIBRESSL_VERSION="3.0.2"
|
||||
LIBRESSL_VERSION="3.5.2"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
@@ -17,4 +17,4 @@ make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp crypto/.libs/libcrypto.a ../../deps/lib
|
||||
cp ssl/.libs/libssl.a ../../deps/lib
|
||||
cd ..
|
||||
cd ..
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
OPENSSL_VERSION="1.1.1k"
|
||||
OPENSSL_VERSION="1.1.1u"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
@@ -8,7 +8,7 @@ mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://www.openssl.org/source/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
|
||||
wget https://openssl.org/source/old/1.1.1/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
|
||||
tar -xzf openssl-${OPENSSL_VERSION}.tar.gz
|
||||
|
||||
cd openssl-${OPENSSL_VERSION}
|
||||
|
||||
20
scripts/build.openssl3.sh
Executable file
20
scripts/build.openssl3.sh
Executable file
@@ -0,0 +1,20 @@
|
||||
#!/bin/sh -e
|
||||
|
||||
OPENSSL_VERSION="3.0.16"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://github.com/openssl/openssl/releases/download/openssl-${OPENSSL_VERSION}/openssl-${OPENSSL_VERSION}.tar.gz -O openssl-${OPENSSL_VERSION}.tar.gz
|
||||
tar -xzf openssl-${OPENSSL_VERSION}.tar.gz
|
||||
|
||||
cd openssl-${OPENSSL_VERSION}
|
||||
./config -no-shared -no-asm -no-zlib -no-comp -no-dgram -no-filenames -no-cms
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp libcrypto.a ../../deps/lib
|
||||
cp libssl.a ../../deps/lib
|
||||
cd ..
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
UV_VERSION="1.41.0"
|
||||
UV_VERSION="1.51.0"
|
||||
|
||||
mkdir -p deps
|
||||
mkdir -p deps/include
|
||||
@@ -8,13 +8,13 @@ mkdir -p deps/lib
|
||||
|
||||
mkdir -p build && cd build
|
||||
|
||||
wget https://github.com/libuv/libuv/archive/v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz
|
||||
wget https://dist.libuv.org/dist/v${UV_VERSION}/libuv-v${UV_VERSION}.tar.gz -O v${UV_VERSION}.tar.gz
|
||||
tar -xzf v${UV_VERSION}.tar.gz
|
||||
|
||||
cd libuv-${UV_VERSION}
|
||||
cd libuv-v${UV_VERSION}
|
||||
sh autogen.sh
|
||||
./configure --disable-shared
|
||||
make -j$(nproc || sysctl -n hw.ncpu || sysctl -n hw.logicalcpu)
|
||||
cp -fr include ../../deps
|
||||
cp .libs/libuv.a ../../deps/lib
|
||||
cd ..
|
||||
cd ..
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
./build.uv.sh
|
||||
./build.hwloc.sh
|
||||
./build.openssl.sh
|
||||
./build.openssl3.sh
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash -e
|
||||
#!/bin/sh -e
|
||||
|
||||
# https://xmrig.com/docs/miner/hugepages#onegb-huge-pages
|
||||
|
||||
|
||||
@@ -6,7 +6,6 @@ const fs = require('fs');
|
||||
const path = require('path');
|
||||
const { text2h, text2h_bundle, addIncludes } = require('./js/opencl');
|
||||
const { opencl_minify } = require('./js/opencl_minify');
|
||||
const cwd = process.cwd();
|
||||
|
||||
|
||||
function cn()
|
||||
@@ -50,7 +49,7 @@ function rx()
|
||||
'randomx_constants_monero.h',
|
||||
'randomx_constants_wow.h',
|
||||
'randomx_constants_arqma.h',
|
||||
'randomx_constants_keva.h',
|
||||
'randomx_constants_graft.h',
|
||||
'aes.cl',
|
||||
'blake2b.cl',
|
||||
'randomx_vm.cl',
|
||||
@@ -66,15 +65,6 @@ function rx()
|
||||
}
|
||||
|
||||
|
||||
function astrobwt()
|
||||
{
|
||||
const astrobwt = opencl_minify(addIncludes('astrobwt.cl', [ 'BWT.cl', 'salsa20.cl', 'sha3.cl' ]));
|
||||
|
||||
// fs.writeFileSync('astrobwt_gen.cl', astrobwt);
|
||||
fs.writeFileSync('astrobwt_cl.h', text2h(astrobwt, 'xmrig', 'astrobwt_cl'));
|
||||
}
|
||||
|
||||
|
||||
function kawpow()
|
||||
{
|
||||
const kawpow = opencl_minify(addIncludes('kawpow.cl', [ 'defs.h' ]));
|
||||
@@ -85,23 +75,24 @@ function kawpow()
|
||||
fs.writeFileSync('kawpow_dag_cl.h', text2h(kawpow_dag, 'xmrig', 'kawpow_dag_cl'));
|
||||
}
|
||||
|
||||
for (let i = 0; i < 2; i++) {
|
||||
if (fs.existsSync('src/backend/opencl/cl/OclSource.h')) {
|
||||
break;
|
||||
}
|
||||
|
||||
process.chdir(path.resolve('src/backend/opencl/cl/cn'));
|
||||
process.chdir('..');
|
||||
}
|
||||
|
||||
process.chdir(path.resolve('src/backend/opencl/cl'));
|
||||
|
||||
const cwd = process.cwd();
|
||||
|
||||
process.chdir(path.resolve(cwd, 'cn'));
|
||||
cn();
|
||||
cn_r();
|
||||
|
||||
process.chdir(cwd);
|
||||
process.chdir(path.resolve('src/backend/opencl/cl/rx'));
|
||||
|
||||
process.chdir(path.resolve(cwd, 'rx'));
|
||||
rx();
|
||||
|
||||
process.chdir(cwd);
|
||||
process.chdir(path.resolve('src/backend/opencl/cl/astrobwt'));
|
||||
|
||||
astrobwt();
|
||||
|
||||
process.chdir(cwd);
|
||||
process.chdir(path.resolve('src/backend/opencl/cl/kawpow'));
|
||||
|
||||
process.chdir(path.resolve(cwd, 'kawpow'));
|
||||
kawpow();
|
||||
|
||||
@@ -15,6 +15,6 @@
|
||||
:: Choose pools outside of top 5 to help Monero network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o pool.hashvault.pro:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe -o xmrpool.eu:3333 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD -p x
|
||||
pause
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
#!/bin/bash
|
||||
#!/bin/sh -e
|
||||
|
||||
MSR_FILE=/sys/module/msr/parameters/allow_writes
|
||||
|
||||
@@ -8,16 +8,34 @@ else
|
||||
modprobe msr allow_writes=on
|
||||
fi
|
||||
|
||||
if cat /proc/cpuinfo | grep -E 'AMD Ryzen|AMD EPYC' > /dev/null;
|
||||
if grep -E 'AMD Ryzen|AMD EPYC|AuthenticAMD' /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
if cat /proc/cpuinfo | grep "cpu family[[:space:]]:[[:space:]]25" > /dev/null;
|
||||
if grep "cpu family[[:space:]]\{1,\}:[[:space:]]25" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
echo "Detected Zen3 CPU"
|
||||
wrmsr -a 0xc0011020 0x4480000000000
|
||||
wrmsr -a 0xc0011021 0x1c000200000040
|
||||
wrmsr -a 0xc0011022 0xc000000401500000
|
||||
wrmsr -a 0xc001102b 0x2000cc14
|
||||
echo "MSR register values for Zen3 applied"
|
||||
if grep "model[[:space:]]\{1,\}:[[:space:]]97" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
echo "Detected Zen4 CPU"
|
||||
wrmsr -a 0xc0011020 0x4400000000000
|
||||
wrmsr -a 0xc0011021 0x4000000000040
|
||||
wrmsr -a 0xc0011022 0x8680000401570000
|
||||
wrmsr -a 0xc001102b 0x2040cc10
|
||||
echo "MSR register values for Zen4 applied"
|
||||
else
|
||||
echo "Detected Zen3 CPU"
|
||||
wrmsr -a 0xc0011020 0x4480000000000
|
||||
wrmsr -a 0xc0011021 0x1c000200000040
|
||||
wrmsr -a 0xc0011022 0xc000000401570000
|
||||
wrmsr -a 0xc001102b 0x2000cc10
|
||||
echo "MSR register values for Zen3 applied"
|
||||
fi
|
||||
elif grep "cpu family[[:space:]]\{1,\}:[[:space:]]26" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
echo "Detected Zen5 CPU"
|
||||
wrmsr -a 0xc0011020 0x4400000000000
|
||||
wrmsr -a 0xc0011021 0x4000000000040
|
||||
wrmsr -a 0xc0011022 0x8680000401570000
|
||||
wrmsr -a 0xc001102b 0x2040cc10
|
||||
echo "MSR register values for Zen5 applied"
|
||||
else
|
||||
echo "Detected Zen1/Zen2 CPU"
|
||||
wrmsr -a 0xc0011020 0
|
||||
@@ -26,7 +44,7 @@ if cat /proc/cpuinfo | grep -E 'AMD Ryzen|AMD EPYC' > /dev/null;
|
||||
wrmsr -a 0xc001102b 0x2000cc16
|
||||
echo "MSR register values for Zen1/Zen2 applied"
|
||||
fi
|
||||
elif cat /proc/cpuinfo | grep "Intel" > /dev/null;
|
||||
elif grep "Intel" /proc/cpuinfo > /dev/null;
|
||||
then
|
||||
echo "Detected Intel CPU"
|
||||
wrmsr -a 0x1a4 0xf
|
||||
|
||||
23
scripts/rtm_ghostrider_example.cmd
Normal file
23
scripts/rtm_ghostrider_example.cmd
Normal file
@@ -0,0 +1,23 @@
|
||||
:: Example batch file for mining Raptoreum at a pool
|
||||
::
|
||||
:: Format:
|
||||
:: xmrig.exe -a gr -o <pool address>:<pool port> -u <pool username/wallet> -p <pool password>
|
||||
::
|
||||
:: Fields:
|
||||
:: pool address The host name of the pool stratum or its IP address, for example raptoreumemporium.com
|
||||
:: pool port The port of the pool's stratum to connect to, for example 3333. Check your pool's getting started page.
|
||||
:: pool username/wallet For most pools, this is the wallet address you want to mine to. Some pools require a username
|
||||
:: pool password For most pools this can be just 'x'. For pools using usernames, you may need to provide a password as configured on the pool.
|
||||
::
|
||||
:: List of Raptoreum mining pools:
|
||||
:: https://miningpoolstats.stream/raptoreum
|
||||
::
|
||||
:: Choose pools outside of top 5 to help Raptoreum network be more decentralized!
|
||||
:: Smaller pools also often have smaller fees/payout limits.
|
||||
|
||||
cd /d "%~dp0"
|
||||
:: Use this command line to connect to non-SSL port
|
||||
xmrig.exe -a gr -o raptoreumemporium.com:3008 -u WALLET_ADDRESS -p x
|
||||
:: Or use this command line to connect to an SSL port
|
||||
:: xmrig.exe -a gr -o rtm.suprnova.cc:4273 --tls -u WALLET_ADDRESS -p x
|
||||
pause
|
||||
@@ -11,6 +11,6 @@
|
||||
:: Mining solo is the best way to help Monero network be more decentralized!
|
||||
:: But you will only get a payout when you find a block which can take more than a year for a single low-end PC.
|
||||
|
||||
cd %~dp0
|
||||
xmrig.exe -o node.xmr.to:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
|
||||
cd /d "%~dp0"
|
||||
xmrig.exe -o YOUR_NODE_IP:18081 -a rx/0 -u 48edfHu7V9Z84YzzMa6fUueoELZ9ZRXq9VetWzYGzKt52XU5xvqgzYnDK9URnRoJMk1j8nLwEVsaSWJ4fhdUyZijBGUicoD --daemon
|
||||
pause
|
||||
|
||||
4
src/3rdparty/CL/cl_dx9_media_sharing.h
vendored
4
src/3rdparty/CL/cl_dx9_media_sharing.h
vendored
@@ -44,7 +44,7 @@ extern "C" {
|
||||
|
||||
typedef cl_uint cl_dx9_media_adapter_type_khr;
|
||||
typedef cl_uint cl_dx9_media_adapter_set_khr;
|
||||
|
||||
|
||||
#if defined(_WIN32)
|
||||
#include <d3d9.h>
|
||||
typedef struct _cl_dx9_surface_info_khr
|
||||
@@ -105,7 +105,7 @@ typedef CL_API_ENTRY cl_mem (CL_API_CALL *clCreateFromDX9MediaSurfaceKHR_fn)(
|
||||
cl_mem_flags flags,
|
||||
cl_dx9_media_adapter_type_khr adapter_type,
|
||||
void * surface_info,
|
||||
cl_uint plane,
|
||||
cl_uint plane,
|
||||
cl_int * errcode_ret) CL_API_SUFFIX__VERSION_1_2;
|
||||
|
||||
typedef CL_API_ENTRY cl_int (CL_API_CALL *clEnqueueAcquireDX9MediaSurfacesKHR_fn)(
|
||||
|
||||
2
src/3rdparty/CL/cl_gl_ext.h
vendored
2
src/3rdparty/CL/cl_gl_ext.h
vendored
@@ -35,7 +35,7 @@ extern "C" {
|
||||
|
||||
#include <CL/cl_gl.h>
|
||||
|
||||
/*
|
||||
/*
|
||||
* cl_khr_gl_event extension
|
||||
*/
|
||||
#define CL_COMMAND_GL_FENCE_SYNC_OBJECT_KHR 0x200D
|
||||
|
||||
12
src/3rdparty/adl/adl_defines.h
vendored
12
src/3rdparty/adl/adl_defines.h
vendored
@@ -1471,7 +1471,7 @@ typedef enum _ADLProfilePropertyType
|
||||
#define ADL_HDR_FREESYNC_HDR 0x0004 ///< FreeSync HDR supported
|
||||
/// @}
|
||||
|
||||
/// \defgroup define_FreesyncFlags ADLDDCInfo2 Freesync HDR flags
|
||||
/// \defgroup define_FreesyncFlags ADLDDCInfo2 Freesync HDR flags
|
||||
/// @{
|
||||
/// defines for iFreesyncFlags in ADLDDCInfo2
|
||||
#define ADL_HDR_FREESYNC_BACKLIGHT_SUPPORT 0x0001 ///< Global backlight control supported
|
||||
@@ -1738,7 +1738,7 @@ enum ADLODNDPMMaskType
|
||||
ADL_ODN_DPM_MASK = 1 << 2,
|
||||
};
|
||||
|
||||
//ODN features Bits for ADLODNCapabilitiesX2
|
||||
//ODN features Bits for ADLODNCapabilitiesX2
|
||||
enum ADLODNFeatureControl
|
||||
{
|
||||
ADL_ODN_SCLK_DPM = 1 << 0,
|
||||
@@ -1764,7 +1764,7 @@ enum ADLODNFeatureControl
|
||||
|
||||
//If any new feature is added, PPLIB only needs to add ext feature ID and Item ID(Seeting ID). These IDs should match the drive defined in CWDDEPM.h
|
||||
enum ADLODNExtFeatureControl
|
||||
{
|
||||
{
|
||||
ADL_ODN_EXT_FEATURE_MEMORY_TIMING_TUNE = 1 << 0,
|
||||
ADL_ODN_EXT_FEATURE_FAN_ZERO_RPM_CONTROL = 1 << 1,
|
||||
ADL_ODN_EXT_FEATURE_AUTO_UV_ENGINE = 1 << 2, //Auto under voltage
|
||||
@@ -1794,7 +1794,7 @@ enum ADLODNExtSettingId
|
||||
ADL_ODN_PARAMETER_FAN_CURVE_SPEED_5,
|
||||
ADL_ODN_POWERGAUGE,
|
||||
ODN_COUNT
|
||||
|
||||
|
||||
} ;
|
||||
|
||||
//OD8 Capability features bits
|
||||
@@ -1811,7 +1811,7 @@ enum ADLOD8FeatureControl
|
||||
ADL_OD8_MEMORY_TIMING_TUNE = 1 << 8,
|
||||
ADL_OD8_FAN_ZERO_RPM_CONTROL = 1 << 9 ,
|
||||
ADL_OD8_AUTO_UV_ENGINE = 1 << 10, //Auto under voltage
|
||||
ADL_OD8_AUTO_OC_ENGINE = 1 << 11, //Auto overclock engine
|
||||
ADL_OD8_AUTO_OC_ENGINE = 1 << 11, //Auto overclock engine
|
||||
ADL_OD8_AUTO_OC_MEMORY = 1 << 12, //Auto overclock memory
|
||||
ADL_OD8_FAN_CURVE = 1 << 13, //Fan curve
|
||||
ADL_OD8_WS_AUTO_FAN_ACOUSTIC_LIMIT = 1 << 14, //Workstation Manual Fan controller
|
||||
@@ -1888,7 +1888,7 @@ typedef enum _ADLSensorType
|
||||
PMLOG_TEMPERATURE_VRSOC = 24,
|
||||
PMLOG_TEMPERATURE_VRMVDD0 = 25,
|
||||
PMLOG_TEMPERATURE_VRMVDD1 = 26,
|
||||
PMLOG_TEMPERATURE_HOTSPOT = 27,
|
||||
PMLOG_TEMPERATURE_HOTSPOT = 27,
|
||||
PMLOG_TEMPERATURE_GFX = 28,
|
||||
PMLOG_TEMPERATURE_SOC = 29,
|
||||
PMLOG_GFX_POWER = 30,
|
||||
|
||||
2
src/3rdparty/adl/adl_sdk.h
vendored
2
src/3rdparty/adl/adl_sdk.h
vendored
@@ -37,7 +37,7 @@
|
||||
#define __stdcall
|
||||
#endif /* (LINUX) */
|
||||
|
||||
/// Memory Allocation Call back
|
||||
/// Memory Allocation Call back
|
||||
typedef void* ( __stdcall *ADL_MAIN_MALLOC_CALLBACK )( int );
|
||||
|
||||
|
||||
|
||||
2
src/3rdparty/adl/adl_structures.h
vendored
2
src/3rdparty/adl/adl_structures.h
vendored
@@ -1753,7 +1753,7 @@ typedef struct ADLPXConfigCaps
|
||||
///\brief Enum containing PX or HG type
|
||||
///
|
||||
/// This enum is used to get PX or hG type
|
||||
///
|
||||
///
|
||||
/// \nosubgrouping
|
||||
//////////////////////////////////////////////////////////////////////////////////////////
|
||||
enum ADLPxType
|
||||
|
||||
2
src/3rdparty/argon2/CMakeLists.txt
vendored
2
src/3rdparty/argon2/CMakeLists.txt
vendored
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required(VERSION 2.8.12)
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
|
||||
project(argon2 C)
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
|
||||
25
src/3rdparty/epee/LICENSE.txt
vendored
Normal file
25
src/3rdparty/epee/LICENSE.txt
vendored
Normal file
@@ -0,0 +1,25 @@
|
||||
Copyright (c) 2006-2013, Andrey N. Sabelnikov, www.sabelnikov.net
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
* Neither the name of the Andrey N. Sabelnikov nor the
|
||||
names of its contributors may be used to endorse or promote products
|
||||
derived from this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
|
||||
ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
|
||||
WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
|
||||
DISCLAIMED. IN NO EVENT SHALL Andrey N. Sabelnikov BE LIABLE FOR ANY
|
||||
DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
|
||||
(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
|
||||
LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
|
||||
ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
1
src/3rdparty/epee/README.md
vendored
Normal file
1
src/3rdparty/epee/README.md
vendored
Normal file
@@ -0,0 +1 @@
|
||||
epee - is a small library of helpers, wrappers, tools and so on, used to make my life easier.
|
||||
176
src/3rdparty/epee/span.h
vendored
Normal file
176
src/3rdparty/epee/span.h
vendored
Normal file
@@ -0,0 +1,176 @@
|
||||
// Copyright (c) 2017-2020, The Monero Project
|
||||
//
|
||||
// All rights reserved.
|
||||
//
|
||||
// Redistribution and use in source and binary forms, with or without modification, are
|
||||
// permitted provided that the following conditions are met:
|
||||
//
|
||||
// 1. Redistributions of source code must retain the above copyright notice, this list of
|
||||
// conditions and the following disclaimer.
|
||||
//
|
||||
// 2. Redistributions in binary form must reproduce the above copyright notice, this list
|
||||
// of conditions and the following disclaimer in the documentation and/or other
|
||||
// materials provided with the distribution.
|
||||
//
|
||||
// 3. Neither the name of the copyright holder nor the names of its contributors may be
|
||||
// used to endorse or promote products derived from this software without specific
|
||||
// prior written permission.
|
||||
//
|
||||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY
|
||||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
|
||||
// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
|
||||
// THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
// INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
|
||||
// STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF
|
||||
// THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cstdint>
|
||||
#include <memory>
|
||||
#include <string>
|
||||
#include <type_traits>
|
||||
|
||||
namespace epee
|
||||
{
|
||||
/*!
|
||||
\brief Non-owning sequence of data. Does not deep copy
|
||||
|
||||
Inspired by `gsl::span` and/or `boost::iterator_range`. This class is
|
||||
intended to be used as a parameter type for functions that need to take a
|
||||
writable or read-only sequence of data. Most common cases are `span<char>`
|
||||
and `span<std::uint8_t>`. Using as a class member is only recommended if
|
||||
clearly documented as not doing a deep-copy. C-arrays are easily convertible
|
||||
to this type.
|
||||
|
||||
\note Conversion from C string literal to `span<const char>` will include
|
||||
the NULL-terminator.
|
||||
\note Never allows derived-to-base pointer conversion; an array of derived
|
||||
types is not an array of base types.
|
||||
*/
|
||||
template<typename T>
|
||||
class span
|
||||
{
|
||||
template<typename U>
|
||||
static constexpr bool safe_conversion() noexcept
|
||||
{
|
||||
// Allow exact matches or `T*` -> `const T*`.
|
||||
using with_const = typename std::add_const<U>::type;
|
||||
return std::is_same<T, U>() ||
|
||||
(std::is_const<T>() && std::is_same<T, with_const>());
|
||||
}
|
||||
|
||||
public:
|
||||
using value_type = T;
|
||||
using size_type = std::size_t;
|
||||
using difference_type = std::ptrdiff_t;
|
||||
using pointer = T*;
|
||||
using const_pointer = const T*;
|
||||
using reference = T&;
|
||||
using const_reference = const T&;
|
||||
using iterator = pointer;
|
||||
using const_iterator = const_pointer;
|
||||
|
||||
constexpr span() noexcept : ptr(nullptr), len(0) {}
|
||||
constexpr span(std::nullptr_t) noexcept : span() {}
|
||||
|
||||
//! Prevent derived-to-base conversions; invalid in this context.
|
||||
template<typename U, typename = typename std::enable_if<safe_conversion<U>()>::type>
|
||||
constexpr span(U* const src_ptr, const std::size_t count) noexcept
|
||||
: ptr(src_ptr), len(count) {}
|
||||
|
||||
//! Conversion from C-array. Prevents common bugs with sizeof + arrays.
|
||||
template<std::size_t N>
|
||||
constexpr span(T (&src)[N]) noexcept : span(src, N) {}
|
||||
|
||||
constexpr span(const span&) noexcept = default;
|
||||
span& operator=(const span&) noexcept = default;
|
||||
|
||||
/*! Try to remove `amount` elements from beginning of span.
|
||||
\return Number of elements removed. */
|
||||
std::size_t remove_prefix(std::size_t amount) noexcept
|
||||
{
|
||||
amount = std::min(len, amount);
|
||||
ptr += amount;
|
||||
len -= amount;
|
||||
return amount;
|
||||
}
|
||||
|
||||
constexpr iterator begin() const noexcept { return ptr; }
|
||||
constexpr const_iterator cbegin() const noexcept { return ptr; }
|
||||
|
||||
constexpr iterator end() const noexcept { return begin() + size(); }
|
||||
constexpr const_iterator cend() const noexcept { return cbegin() + size(); }
|
||||
|
||||
constexpr bool empty() const noexcept { return size() == 0; }
|
||||
constexpr pointer data() const noexcept { return ptr; }
|
||||
constexpr std::size_t size() const noexcept { return len; }
|
||||
constexpr std::size_t size_bytes() const noexcept { return size() * sizeof(value_type); }
|
||||
|
||||
T &operator[](size_t idx) noexcept { return ptr[idx]; }
|
||||
const T &operator[](size_t idx) const noexcept { return ptr[idx]; }
|
||||
|
||||
private:
|
||||
T* ptr;
|
||||
std::size_t len;
|
||||
};
|
||||
|
||||
//! \return `span<const T::value_type>` from a STL compatible `src`.
|
||||
template<typename T>
|
||||
constexpr span<const typename T::value_type> to_span(const T& src)
|
||||
{
|
||||
// compiler provides diagnostic if size() is not size_t.
|
||||
return {src.data(), src.size()};
|
||||
}
|
||||
|
||||
//! \return `span<T::value_type>` from a STL compatible `src`.
|
||||
template<typename T>
|
||||
constexpr span<typename T::value_type> to_mut_span(T& src)
|
||||
{
|
||||
// compiler provides diagnostic if size() is not size_t.
|
||||
return {src.data(), src.size()};
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
constexpr bool has_padding() noexcept
|
||||
{
|
||||
return !std::is_standard_layout<T>() || alignof(T) != 1;
|
||||
}
|
||||
|
||||
//! \return Cast data from `src` as `span<const std::uint8_t>`.
|
||||
template<typename T>
|
||||
span<const std::uint8_t> to_byte_span(const span<const T> src) noexcept
|
||||
{
|
||||
static_assert(!has_padding<T>(), "source type may have padding");
|
||||
return {reinterpret_cast<const std::uint8_t*>(src.data()), src.size_bytes()};
|
||||
}
|
||||
|
||||
//! \return `span<const std::uint8_t>` which represents the bytes at `&src`.
|
||||
template<typename T>
|
||||
span<const std::uint8_t> as_byte_span(const T& src) noexcept
|
||||
{
|
||||
static_assert(!std::is_empty<T>(), "empty types will not work -> sizeof == 1");
|
||||
static_assert(!has_padding<T>(), "source type may have padding");
|
||||
return {reinterpret_cast<const std::uint8_t*>(std::addressof(src)), sizeof(T)};
|
||||
}
|
||||
|
||||
//! \return `span<std::uint8_t>` which represents the bytes at `&src`.
|
||||
template<typename T>
|
||||
span<std::uint8_t> as_mut_byte_span(T& src) noexcept
|
||||
{
|
||||
static_assert(!std::is_empty<T>(), "empty types will not work -> sizeof == 1");
|
||||
static_assert(!has_padding<T>(), "source type may have padding");
|
||||
return {reinterpret_cast<std::uint8_t*>(std::addressof(src)), sizeof(T)};
|
||||
}
|
||||
|
||||
//! make a span from a std::string
|
||||
template<typename T>
|
||||
span<const T> strspan(const std::string &s) noexcept
|
||||
{
|
||||
static_assert(std::is_same<T, char>() || std::is_same<T, unsigned char>() || std::is_same<T, int8_t>() || std::is_same<T, uint8_t>(), "Unexpected type");
|
||||
return {reinterpret_cast<const T*>(s.data()), s.size()};
|
||||
}
|
||||
}
|
||||
8
src/3rdparty/fmt/README.rst
vendored
8
src/3rdparty/fmt/README.rst
vendored
@@ -81,7 +81,7 @@ Examples
|
||||
.. code:: c++
|
||||
|
||||
#include <fmt/core.h>
|
||||
|
||||
|
||||
int main() {
|
||||
fmt::print("Hello, world!\n");
|
||||
}
|
||||
@@ -293,11 +293,11 @@ Projects using this library
|
||||
An open-source library for mathematical programming
|
||||
|
||||
* `Aseprite <https://github.com/aseprite/aseprite>`_:
|
||||
Animated sprite editor & pixel art tool
|
||||
Animated sprite editor & pixel art tool
|
||||
|
||||
* `AvioBook <https://www.aviobook.aero/en>`_: A comprehensive aircraft
|
||||
operations suite
|
||||
|
||||
|
||||
* `Celestia <https://celestia.space/>`_: Real-time 3D visualization of space
|
||||
|
||||
* `Ceph <https://ceph.com/>`_: A scalable distributed storage system
|
||||
@@ -351,7 +351,7 @@ Projects using this library
|
||||
|
||||
* `quasardb <https://www.quasardb.net/>`_: A distributed, high-performance,
|
||||
associative database
|
||||
|
||||
|
||||
* `Quill <https://github.com/odygrd/quill>`_: Asynchronous low-latency logging library
|
||||
|
||||
* `QKW <https://github.com/ravijanjam/qkw>`_: Generalizing aliasing to simplify
|
||||
|
||||
10
src/3rdparty/getopt/getopt.h
vendored
10
src/3rdparty/getopt/getopt.h
vendored
@@ -3,9 +3,9 @@
|
||||
* DISCLAIMER
|
||||
* This file is part of the mingw-w64 runtime package.
|
||||
*
|
||||
* The mingw-w64 runtime package and its code is distributed in the hope that it
|
||||
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
|
||||
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
|
||||
* The mingw-w64 runtime package and its code is distributed in the hope that it
|
||||
* will be useful but WITHOUT ANY WARRANTY. ALL WARRANTIES, EXPRESSED OR
|
||||
* IMPLIED ARE HEREBY DISCLAIMED. This includes but is not limited to
|
||||
* warranties of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
|
||||
*/
|
||||
/*
|
||||
@@ -109,11 +109,7 @@ char *optarg; /* argument associated with option */
|
||||
extern char __declspec(dllimport) *__progname;
|
||||
#endif
|
||||
|
||||
#ifdef __CYGWIN__
|
||||
static char EMSG[] = "";
|
||||
#else
|
||||
#define EMSG ""
|
||||
#endif
|
||||
|
||||
static int getopt_internal(int, char * const *, const char *,
|
||||
const struct option *, int *, int);
|
||||
|
||||
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
2
src/3rdparty/hwloc/CMakeLists.txt
vendored
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
project (hwloc C)
|
||||
|
||||
include_directories(include)
|
||||
|
||||
368
src/3rdparty/hwloc/NEWS
vendored
368
src/3rdparty/hwloc/NEWS
vendored
@@ -1,5 +1,5 @@
|
||||
Copyright © 2009 CNRS
|
||||
Copyright © 2009-2020 Inria. All rights reserved.
|
||||
Copyright © 2009-2024 Inria. All rights reserved.
|
||||
Copyright © 2009-2013 Université Bordeaux
|
||||
Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
Copyright © 2020 Hewlett Packard Enterprise. All rights reserved.
|
||||
@@ -17,6 +17,372 @@ bug fixes (and other actions) for each version of hwloc since version
|
||||
0.9.
|
||||
|
||||
|
||||
Version 2.11.2
|
||||
--------------
|
||||
* Add missing CPU info attrs on aarch64 on Linux.
|
||||
* Use ACPI CPPC on Linux to get better information about cpukinds,
|
||||
at least on AMD CPUs.
|
||||
* Fix crash when manipulating cpukinds after topology
|
||||
duplication, thanks to Hadrien Grasland for the report.
|
||||
* Fix missing input target checks in memattr functions,
|
||||
thanks to Hadrien Grasland for the report.
|
||||
* Fix a memory leak when ignoring NUMA distances on FreeBSD.
|
||||
* Fix build failure on old Linux distributions without accessat().
|
||||
* Fix non-Windows importing of XML topologies and CPUID dumps exported
|
||||
on Windows.
|
||||
* hwloc-calc --cpuset-output-format systemd-dbus-api now allows
|
||||
to generate AllowedCPUs information for systemd slices.
|
||||
See the hwloc-calc manpage for examples. Thanks to Pierre Neyron.
|
||||
* Some fixes in manpage EXAMPLES and split them into subsections.
|
||||
|
||||
|
||||
Version 2.11.1
|
||||
--------------
|
||||
* Fix bash completions, thanks Tavis Rudd.
|
||||
|
||||
|
||||
Version 2.11.0
|
||||
--------------
|
||||
* API
|
||||
+ Add HWLOC_MEMBIND_WEIGHTED_INTERLEAVE memory binding policy on
|
||||
Linux 6.9+. Thanks to Honggyu Kim for the patch.
|
||||
- weighted_interleave_membind is added to membind support bits.
|
||||
- The "weighted" policy is added to the hwloc-bind tool.
|
||||
+ Add hwloc_obj_set_subtype(). Thanks to Hadrien Grasland for the report.
|
||||
* GPU support
|
||||
+ Don't hide the GPU NUMA node on NVIDIA Grace Hopper.
|
||||
+ Get Intel GPU OpenCL device locality.
|
||||
+ Add bandwidths between subdevices in the LevelZero XeLinkBandwidth
|
||||
matrix.
|
||||
+ Fix PCI Gen4+ link speed of NVIDIA GPU obtained from NVML,
|
||||
thanks to Akram Sbaih for the report.
|
||||
* Windows support
|
||||
+ Fix Windows support when UNICODE is enabled, several hwloc features
|
||||
were missing, thanks to Martin for the report.
|
||||
+ Fix the enabling of CUDA in Windows CMake build,
|
||||
Thanks to Moritz Kreutzer for the patch.
|
||||
+ Fix CUDA/OpenCL test source path in Windows CMake.
|
||||
* Tools
|
||||
+ Option --best-memattr may now return multiple nodes. Additional
|
||||
configuration flags may be given to tweak its behavior.
|
||||
+ hwloc-info has a new --get-attr option to get a single attribute.
|
||||
+ hwloc-info now supports "levels", "support" and "topology"
|
||||
special keywords for backward compatibility for hwloc 3.0.
|
||||
+ The --taskset command-line option is superseded by the new
|
||||
--cpuset-output-format which also allows to export as list.
|
||||
+ hwloc-calc may now import bitmasks described as a list of bits
|
||||
with the new "--cpuset-input-format list".
|
||||
* Misc
|
||||
+ The MemoryTiersNr info attribute in the root object now says how many
|
||||
memory tiers were built. Thanks to Antoine Morvan for the report.
|
||||
+ Fix the management of infinite cpusets in the bitmap printf/sscanf
|
||||
API as well as in command-line tools.
|
||||
+ Add section "Compiling software on top of hwloc's C API" in the
|
||||
documentation with examples for GNU Make and CMake,
|
||||
thanks to Florent Pruvost for the help.
|
||||
|
||||
|
||||
Version 2.10.0
|
||||
--------------
|
||||
* Heterogeneous Memory core improvements
|
||||
+ Better heuristics to identify the subtype of memory such as HBM,
|
||||
DRAM, NVM, CXL-DRAM, etc.
|
||||
+ Build memory tiers, i.e. sets of NUMA nodes with the same subtype
|
||||
and similar performance.
|
||||
- NUMA node tier ranks are exposed in the new MemoryTier info
|
||||
attribute (starts from 0 for highest bandwidth tier)..
|
||||
+ See the new Heterogeneous Memory section in the documentation.
|
||||
* API
|
||||
+ Add hwloc_topology_free_group_object() to discard a Group created
|
||||
by hwloc_topology_alloc_group_object().
|
||||
* Linux backend
|
||||
+ Fix cpukinds on NVIDIA Grace to report identical cores even if they
|
||||
actually have very small frequency differences.
|
||||
Thanks to John C. Linford for the report.
|
||||
+ Add CXLDevice attributes to CXL DAX objects and NUMA nodes to show
|
||||
which PCI device implements which window.
|
||||
+ Ignore buggy memory-side caches and memory attributes when fake NUMA
|
||||
emulation is enabled on the Linux kernel command-line.
|
||||
+ Add more info attributes in MemoryModule Misc objects,
|
||||
thanks to Zubiao Xiong for the patch.
|
||||
+ Get CPUModel and CPUFamily info attributes on LoongArch platforms.
|
||||
* x86 backend
|
||||
+ Add support for new AMD CPUID leaf 0x80000026 for better detection
|
||||
of Core Complex and Die on Zen4 processors.
|
||||
+ Improve Zhaoxin CPU topology detection.
|
||||
* Tools
|
||||
+ Input locations and many command-line options (e.g. hwloc-calc -I -N -H,
|
||||
lstopo --only) now accept filters such as "NUMA[HBM]" so that only
|
||||
objects are that type and subtype are considered.
|
||||
- NUMA[tier=1] is also accepted for selecting NUMA nodes depending
|
||||
on their MemoryTier info attribute.
|
||||
+ Add --object-output to hwloc-calc to report the type as a prefix to
|
||||
object indexes, e.g. Core:2 instead of 2 in the output of -I.
|
||||
+ hwloc-info --ancestor and --descendants now accepts kinds of objects
|
||||
instead of single types.
|
||||
- The new --first option only shows the first matching object.
|
||||
+ Add --children-of-pid to hwloc-ps to show a hierarchy of processes.
|
||||
Thanks to Antoine Morvan for the suggestion.
|
||||
+ Add --misc-from to lstopo to add Misc objects described in a file.
|
||||
- To be combined with the new hwloc-ps --lstopo-misc for a customizable
|
||||
lstopo --top replacement.
|
||||
* Misc
|
||||
+ lstopo may now configure the layout of memory object placed above,
|
||||
for instance with --children-order memory:above:vert.
|
||||
+ Fix XML import from memory or stdin when using libxml2 2.12.
|
||||
+ Fix installation failures when configuring with --target,
|
||||
thanks to Clement Foyer for the patch.
|
||||
+ Fix support for 128bit pointer architectures.
|
||||
+ Remove Netloc.
|
||||
|
||||
|
||||
Version 2.9.3
|
||||
-------------
|
||||
* Handle Linux glibc allocation errors in binding routines (CVE-2022-47022).
|
||||
* Fix hwloc-calc when searching objects on heterogeneous memory platforms,
|
||||
thanks to Antoine Morvan for the report.
|
||||
* Fix hwloc_get_next_child() when there are some memory-side caches.
|
||||
* Don't crash if the topology is empty because Linux cgroups are wrong.
|
||||
* Improve some hwloc-bind warnings in case of command-line parsing errors.
|
||||
* Many documentation improvements all over the place, including:
|
||||
+ hwloc_topology_restrict() and hwloc_topology_insert_group() may reorder
|
||||
children, causing the logical indexes of objects to change.
|
||||
|
||||
|
||||
Version 2.9.2
|
||||
-------------
|
||||
* Don't forget L3i when defining filters for multiple levels of caches
|
||||
with hwloc_topology_set_cache/icache_types_filter().
|
||||
* Fix object total_memory after hwloc_topology_insert_group_object().
|
||||
* Fix the (non-yet) exporting in synthetic description for complex memory
|
||||
hierarchies with memory-side caches, etc.
|
||||
* Fix some default size attributes when building synthetic topologies.
|
||||
* Fix size units in hwloc-annotate.
|
||||
* Improve bitmap reallocation error management in many functions.
|
||||
* Documentation improvements:
|
||||
+ Better document return values of functions.
|
||||
+ Add "Error reporting" section (in hwloc.h and in the doxygen doc).
|
||||
+ Add FAQ entry "What may I disable to make hwloc faster?"
|
||||
+ Improve FAQ entries "Why is lstopo slow?" and
|
||||
"I only need ..., why should I use hwloc?"
|
||||
+ Clarify how to deal with cpukinds in hwloc-calc and hwloc-bind
|
||||
manpages.
|
||||
|
||||
|
||||
Version 2.9.1
|
||||
-------------
|
||||
* Don't forget to apply object type filters to "perflevel" caches detected
|
||||
on recent Mac OS X releases, thanks to Michel Lesoinne for the report.
|
||||
* Fix a failed assertion in hwloc_topology_restrict() when some NUMA nodes
|
||||
are removed because of HWLOC_RESTRICT_FLAG_REMOVE_CPULESS but no PUs are.
|
||||
Thanks to Mark Grondona for reporting the issue.
|
||||
* Mark HPE Cray Slingshot NICs with subtype "Slingshot".
|
||||
|
||||
|
||||
Version 2.9.0
|
||||
-------------
|
||||
* Backends
|
||||
+ Expose the memory size of CXL memory devices (Type 3) on Linux.
|
||||
+ The LevelZero backend now reports the "XeLinkBandwidth" distance
|
||||
matrix between L0 devices (and subdevices) when available.
|
||||
+ Add support for CUDA compute capability up to 9.0.
|
||||
* Tools
|
||||
+ lstopo now switches to console mode when its output is redirected.
|
||||
Graphical window mode may be forced back with --of window.
|
||||
+ hwloc-calc now accepts "numa" in -H, and I/O subtypes such as "gpu"
|
||||
in -I and -N.
|
||||
|
||||
|
||||
Version 2.8.0
|
||||
-------------
|
||||
* API
|
||||
+ Add HWLOC_TOPOLOGY_FLAG_NO_DISTANCES, _NO_MEMATTRS and _NO_CPUKINDS
|
||||
to reduce the overhead when unneeded.
|
||||
+ Add separate Read/Write Bandwidth/Latency memory attributes and
|
||||
implement them on Linux.
|
||||
* Backends
|
||||
+ NUMA nodes may now have a subtype such as DRAM, HBM, SPM, or NVM
|
||||
on heterogeneous memory platforms on Linux.
|
||||
- Add DAXType and DAXParent attributes on Linux to tell where a
|
||||
DAX device or its corresponding NUMA node come from (SPM for
|
||||
Specific-Purpose or NVM for Non-Volatile Memory).
|
||||
+ Detect heterogeneous caches in hybrid CPUs on MacOS X,
|
||||
thanks to Paul Bone for the help.
|
||||
+ Max frequencies are not ignored in Linux cpukinds anymore (they were
|
||||
ignored in hwloc 2.7.0), but they may be slightly adjusted to avoid
|
||||
reporting hybrid CPUs because Intel Turbo Boost Max 3.0.
|
||||
- See the documentation of environment variable HWLOC_CPUKINDS_MAXFREQ.
|
||||
+ Hardwire the PCI locality of HPE Cray EX235a nodes.
|
||||
* Tools
|
||||
+ lstopo and other tools may now load Linux and x86 cpuid topology files
|
||||
from a tarball.
|
||||
+ lstopo may now replace the P# and L# index prefixes with custom strings
|
||||
thanks to --os-index-prefix and --logical-index-prefix options.
|
||||
* Misc
|
||||
+ Add --disable-readme to avoid regenerating the top-level hwloc README
|
||||
file from the documentation.
|
||||
|
||||
|
||||
Version 2.7.2
|
||||
-------------
|
||||
* Fix a crash when LevelZero devices have multiple subdevices,
|
||||
e.g. on PonteVecchio GPUs, thanks to Jonathan Peyton.
|
||||
* Fix a leak when importing cpukinds from XML,
|
||||
thanks to Hui Zhou.
|
||||
|
||||
|
||||
Version 2.7.1
|
||||
-------------
|
||||
* Workaround crashes when virtual machines report incoherent x86 CPUID
|
||||
information about numbers of cores and threads.
|
||||
Thanks to Peter Bense for the report.
|
||||
* Use setenv() instead of putenv() when trying to force enable oneAPI L0
|
||||
support, to avoid issues with applications that touch the environment,
|
||||
thanks to Josh Hursey for the patch.
|
||||
* Add some warnings at the end of configure when GPU libraries are
|
||||
missing on the system or their path is missing in the environment.
|
||||
|
||||
|
||||
Version 2.7.0
|
||||
-------------
|
||||
* Backends
|
||||
+ Add support for NUMA nodes and caches with more than 64 PUs across
|
||||
multiple processor groups on Windows 11 and Windows Server 2022.
|
||||
+ Group objects are not created for Windows processor groups anymore,
|
||||
except if HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS=1 in the environment.
|
||||
+ Expose "Cluster" group objects on Linux kernel 5.16+ for CPUs
|
||||
that share some internal cache or bus. This can be equivalent
|
||||
to the L2 Cache level on some platforms (e.g. x86) or a specific
|
||||
level between L2 and L3 on others (e.g. ARM Kungpeng 920).
|
||||
Thanks to Jonathan Cameron for the help.
|
||||
- HWLOC_DONT_MERGE_CLUSTER_GROUPS=1 may be set in the environment
|
||||
to prevent these groups from being merged with identical caches, etc.
|
||||
+ Improve the oneAPI LevelZero backend:
|
||||
- Expose subdevices such as "ze0.1" inside root OS devices ("ze0")
|
||||
when the hardware contains multiple subdevices.
|
||||
- Add many new attributes to describe device type, and the
|
||||
numbers of slices, subslices, execution units and threads.
|
||||
- Expose the memory information as LevelZeroHBM/DDR/MemorySize infos.
|
||||
+ Ignore the max frequencies of cores in Linux cpukinds when the
|
||||
base frequencies are available (to avoid exposing hybrid CPUs
|
||||
when Intel Turbo Boost Max 3.0 gives slightly different max
|
||||
frequencies to CPU cores).
|
||||
- May be reverted by setting HWLOC_CPUKINDS_MAXFREQ=1 in the environment.
|
||||
* Tools
|
||||
+ Add --grey and --palette options to switch lstopo to greyscale or
|
||||
white-background-only graphics, or to tune individual colors.
|
||||
* Build
|
||||
+ Windows CMake builds now support non-MSVC compilers, detect several
|
||||
features at build time, can build/run tests, etc.
|
||||
Thanks to Michael Hirsch and Alexander Neumann .
|
||||
|
||||
|
||||
Version 2.6.0
|
||||
-------------
|
||||
* Backends
|
||||
+ Expose two cpukinds for energy-efficient cores (icestorm) and
|
||||
high-performance cores (firestorm) on Apple M1 on Mac OS X.
|
||||
+ Use sysfs CPU "capacity" to rank hybrid cores by efficiency
|
||||
on Linux when available (mostly on recent ARM platforms for now).
|
||||
+ Improve HWLOC_MEMBIND_BIND (without the STRICT flag) on Linux kernel
|
||||
>= 5.15: If more than one node is given, the kernel may now use all
|
||||
of them instead of only the first one before falling back to others.
|
||||
+ Expose cache os_index when available on Linux, it may be needed
|
||||
when using resctrl to configure cache partitioning, memory bandwidth
|
||||
monitoring, etc.
|
||||
+ Add a "XGMIHops" distances matrix in the RSMI backend for AMD GPU
|
||||
interconnected through XGMI links.
|
||||
+ Expose AMD GPU memory information (VRAM and GTT) in the RSMI backend.
|
||||
+ Add OS devices such as "bxi0" for Atos/Bull BXI HCAs on Linux.
|
||||
* Tools
|
||||
+ lstopo has a better placement algorithm with respect to I/O
|
||||
objects, see --children-order in the manpage for details.
|
||||
+ hwloc-annotate may now change object subtypes and cache or memory
|
||||
sizes.
|
||||
* Build
|
||||
+ Allow to specify the ROCm installation for building the RSMI backend:
|
||||
- Use a custom installation path if specified with --with-rocm=<dir>.
|
||||
- Use /opt/rocm-<version> if specified with --with-rocm-version=<version>
|
||||
or the ROCM_VERSION environment variable.
|
||||
- Try /opt/rocm if it exists.
|
||||
- See "How do I enable ROCm SMI and select which version to use?"
|
||||
in the FAQ for details.
|
||||
+ Add a CMakeLists for Windows under contrib/windows-cmake/ .
|
||||
* Documentation
|
||||
+ Add FAQ entry "How do I create a custom heterogeneous and
|
||||
asymmetric topology?"
|
||||
|
||||
|
||||
Version 2.5.0
|
||||
-------------
|
||||
* API
|
||||
+ Add hwloc/windows.h to query Windows processor groups.
|
||||
+ Add hwloc_get_obj_with_same_locality() to convert between objects
|
||||
with same locality, for instance NUMA nodes and Packages,
|
||||
or OS devices within a PCI device.
|
||||
+ Add hwloc_distances_transform() to modify distances structures.
|
||||
- hwloc-annotate and lstopo have new distances-transform options.
|
||||
+ hwloc_distances_add() is replaced with _add_create() followed by
|
||||
_add_values() and _add_commit(). See hwloc/distances.h for details.
|
||||
+ Add topology flags to mitigate binding modifications during
|
||||
hwloc discovery, especially on Windows:
|
||||
- HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING and _MEMBINDING
|
||||
restrict discovery to PUs and NUMA nodes inside the binding.
|
||||
- HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING prevents from ever
|
||||
changing the binding during discovery.
|
||||
* Backends
|
||||
+ Add a levelzero backend for oneAPI L0 devices, exposed as OS devices
|
||||
of subtype "LevelZero" and name such as "ze0".
|
||||
- Add hwloc/levelzero.h for interoperability between converting
|
||||
between L0 API devices and hwloc cpusets or OS devices.
|
||||
+ Expose NEC Vector Engine cards on Linux as OS devices of subtype
|
||||
"VectorEngine" and name "ve0", etc.
|
||||
Thanks to Anara Kozhokanova, Tim Cramer and Erich Focht for the help.
|
||||
+ Add a NVLinkBandwidth distances structure between NVIDIA GPUs
|
||||
(and POWER processor or NVSwitches) in the NVML backend,
|
||||
and a XGMIBandwidth distances structure between AMD GPUs
|
||||
in the RSMI backends.
|
||||
- See "Topology Attributes: Distances, Memory Attributes and CPU Kinds"
|
||||
in the documentation for details about these new distances.
|
||||
+ Add support for NUMA node 0 being offline in Linux, thanks to Jirka Hladky.
|
||||
* Build
|
||||
+ Add --with-cuda-version=<version> or look at the CUDA_VERSION
|
||||
environment variable to find the appropriate CUDA pkg-config files.
|
||||
Thanks to Stephen Herbein for the suggestion.
|
||||
- Also add --with-cuda=<dir> to specify the CUDA installation path
|
||||
manually (and its NVML and OpenCL components).
|
||||
Thanks to Andrea Bocci for the suggestion.
|
||||
- See "How do I enable CUDA and select which CUDA version to use?"
|
||||
in the FAQ for details.
|
||||
* Tools
|
||||
+ lstopo now has a --windows-processor-groups option on Windows.
|
||||
+ hwloc-ps now has a --short-name option to avoid long/truncated
|
||||
command path.
|
||||
+ hwloc-ps now has a --single-ancestor option to return a single
|
||||
(possibly too large) object where a process is bound.
|
||||
+ hwloc-ps --pid-cmd may now query environment variables,
|
||||
including MPI-specific variables to find out process ranks.
|
||||
|
||||
|
||||
Version 2.4.1
|
||||
-------------
|
||||
* Fix AMD OpenCL device locality when PCI bus or device number >= 128.
|
||||
Thanks to Edgar Leon for reporting the issue.
|
||||
+ Applications using any of the following inline functions must
|
||||
be recompiled to get the fix: hwloc_opencl_get_device_pci_busid()
|
||||
hwloc_opencl_get_device_cpuset(), hwloc_opencl_get_device_osdev().
|
||||
* Fix the ranking of cpukinds on non-Windows systems,
|
||||
thanks to Ivan Kochin for the report.
|
||||
* Fix the insertion of custom Groups after loading the topology,
|
||||
thanks to Scott Hicks.
|
||||
* Add support for CPU0 being offline in Linux, thanks to Garrett Clay.
|
||||
* Fix missing x86 Package and Core objects FreeBSD/NetBSD.
|
||||
Thanks to Thibault Payet and Yuri Victorovich for the report.
|
||||
* Fix the import of very large distances with heterogeneous object types.
|
||||
* Fix a memory leak in the Linux backend,
|
||||
thanks to Perceval Anichini.
|
||||
|
||||
|
||||
Version 2.4.0
|
||||
-------------
|
||||
* API
|
||||
|
||||
492
src/3rdparty/hwloc/README
vendored
492
src/3rdparty/hwloc/README
vendored
@@ -1,4 +1,8 @@
|
||||
Introduction
|
||||
This is a truncated and poorly-formatted version of the documentation main page.
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more.
|
||||
|
||||
|
||||
hwloc Overview
|
||||
|
||||
The Hardware Locality (hwloc) software project aims at easing the process of
|
||||
discovering hardware resources in parallel architectures. It offers
|
||||
@@ -8,66 +12,450 @@ high-performance computing (HPC) applications, but is also applicable to any
|
||||
project seeking to exploit code and/or data locality on modern computing
|
||||
platforms.
|
||||
|
||||
hwloc is actually made of two subprojects distributed together:
|
||||
hwloc provides command line tools and a C API to obtain the hierarchical map of
|
||||
key computing elements within a node, such as: NUMA memory nodes, shared
|
||||
caches, processor packages, dies and cores, processing units (logical
|
||||
processors or "threads") and even I/O devices. hwloc also gathers various
|
||||
attributes such as cache and memory information, and is portable across a
|
||||
variety of different operating systems and platforms.
|
||||
|
||||
* The original hwloc project for describing the internals of computing nodes.
|
||||
It is described in details starting at section Hardware Locality (hwloc)
|
||||
Introduction.
|
||||
* The network-oriented companion called netloc (Network Locality), described
|
||||
in details starting with section Network Locality (netloc).
|
||||
hwloc primarily aims at helping high-performance computing (HPC) applications,
|
||||
but is also applicable to any project seeking to exploit code and/or data
|
||||
locality on modern computing platforms.
|
||||
|
||||
See also the Related pages tab above for links to other sections.
|
||||
hwloc supports the following operating systems:
|
||||
|
||||
Netloc may be disabled, but the original hwloc cannot. Both hwloc and netloc
|
||||
APIs are documented after these sections.
|
||||
* Linux (with knowledge of cgroups and cpusets, memory targets/initiators,
|
||||
etc.) on all supported hardware, including Intel Xeon Phi, ScaleMP vSMP,
|
||||
and NumaScale NumaConnect.
|
||||
* Solaris (with support for processor sets and logical domains)
|
||||
* AIX
|
||||
* Darwin / OS X
|
||||
* FreeBSD and its variants (such as kFreeBSD/GNU)
|
||||
* NetBSD
|
||||
* HP-UX
|
||||
* Microsoft Windows
|
||||
* IBM BlueGene/Q Compute Node Kernel (CNK)
|
||||
|
||||
Installation
|
||||
Since it uses standard Operating System information, hwloc's support is mostly
|
||||
independant from the processor type (x86, powerpc, ...) and just relies on the
|
||||
Operating System support. The main exception is BSD operating systems (NetBSD,
|
||||
FreeBSD, etc.) because they do not provide support topology information, hence
|
||||
hwloc uses an x86-only CPUID-based backend (which can be used for other OSes
|
||||
too, see the Components and plugins section).
|
||||
|
||||
hwloc (https://www.open-mpi.org/projects/hwloc/) is available under the BSD
|
||||
license. It is hosted as a sub-project of the overall Open MPI project (https:/
|
||||
/www.open-mpi.org/). Note that hwloc does not require any functionality from
|
||||
Open MPI -- it is a wholly separate (and much smaller!) project and code base.
|
||||
It just happens to be hosted as part of the overall Open MPI project.
|
||||
To check whether hwloc works on a particular machine, just try to build it and
|
||||
run lstopo or lstopo-no-graphics. If some things do not look right (e.g. bogus
|
||||
or missing cache information), see Questions and Bugs.
|
||||
|
||||
Basic Installation
|
||||
hwloc only reports the number of processors on unsupported operating systems;
|
||||
no topology information is available.
|
||||
|
||||
Installation is the fairly common GNU-based process:
|
||||
For development and debugging purposes, hwloc also offers the ability to work
|
||||
on "fake" topologies:
|
||||
|
||||
shell$ ./configure --prefix=...
|
||||
shell$ make
|
||||
shell$ make install
|
||||
* Symmetrical tree of resources generated from a list of level arities, see
|
||||
Synthetic topologies.
|
||||
* Remote machine simulation through the gathering of topology as XML files,
|
||||
see Importing and exporting topologies from/to XML files.
|
||||
|
||||
hwloc- and netloc-specific configure options and requirements are documented in
|
||||
sections hwloc Installation and Netloc Installation respectively.
|
||||
hwloc can display the topology in a human-readable format, either in graphical
|
||||
mode (X11), or by exporting in one of several different formats, including:
|
||||
plain text, LaTeX tikzpicture, PDF, PNG, and FIG (see Command-line Examples
|
||||
below). Note that some of the export formats require additional support
|
||||
libraries.
|
||||
|
||||
Also note that if you install supplemental libraries in non-standard locations,
|
||||
hwloc's configure script may not be able to find them without some help. You
|
||||
may need to specify additional CPPFLAGS, LDFLAGS, or PKG_CONFIG_PATH values on
|
||||
the configure command line.
|
||||
hwloc offers a programming interface for manipulating topologies and objects.
|
||||
It also brings a powerful CPU bitmap API that is used to describe topology
|
||||
objects location on physical/logical processors. See the Programming Interface
|
||||
below. It may also be used to binding applications onto certain cores or memory
|
||||
nodes. Several utility programs are also provided to ease command-line
|
||||
manipulation of topology objects, binding of processes, and so on.
|
||||
|
||||
For example, if libpciaccess was installed into /opt/pciaccess, hwloc's
|
||||
configure script may not find it be default. Try adding PKG_CONFIG_PATH to the
|
||||
./configure command line, like this:
|
||||
Bindings for several other languages are available from the project website.
|
||||
|
||||
./configure PKG_CONFIG_PATH=/opt/pciaccess/lib/pkgconfig ...
|
||||
Command-line Examples
|
||||
|
||||
Running the "lstopo" tool is a good way to check as a graphical output whether
|
||||
hwloc properly detected the architecture of your node. Netloc command-line
|
||||
tools can be used to display the network topology interconnecting your nodes.
|
||||
On a 4-package 2-core machine with hyper-threading, the lstopo tool may show
|
||||
the following graphical output:
|
||||
|
||||
Installing from a Git clone
|
||||
[dudley]
|
||||
|
||||
Additionally, the code can be directly cloned from Git:
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
shell$ git clone https://github.com/open-mpi/hwloc.git
|
||||
shell$ cd hwloc
|
||||
shell$ ./autogen.sh
|
||||
Machine
|
||||
NUMANode L#0 (P#0)
|
||||
Package L#0 + L3 L#0 (4096KB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (16KB) + Core L#0
|
||||
PU L#0 (P#0)
|
||||
PU L#1 (P#8)
|
||||
L2 L#1 (1024KB) + L1 L#1 (16KB) + Core L#1
|
||||
PU L#2 (P#4)
|
||||
PU L#3 (P#12)
|
||||
Package L#1 + L3 L#1 (4096KB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (16KB) + Core L#2
|
||||
PU L#4 (P#1)
|
||||
PU L#5 (P#9)
|
||||
L2 L#3 (1024KB) + L1 L#3 (16KB) + Core L#3
|
||||
PU L#6 (P#5)
|
||||
PU L#7 (P#13)
|
||||
Package L#2 + L3 L#2 (4096KB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (16KB) + Core L#4
|
||||
PU L#8 (P#2)
|
||||
PU L#9 (P#10)
|
||||
L2 L#5 (1024KB) + L1 L#5 (16KB) + Core L#5
|
||||
PU L#10 (P#6)
|
||||
PU L#11 (P#14)
|
||||
Package L#3 + L3 L#3 (4096KB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (16KB) + Core L#6
|
||||
PU L#12 (P#3)
|
||||
PU L#13 (P#11)
|
||||
L2 L#7 (1024KB) + L1 L#7 (16KB) + Core L#7
|
||||
PU L#14 (P#7)
|
||||
PU L#15 (P#15)
|
||||
|
||||
Note that GNU Autoconf >=2.63, Automake >=1.11 and Libtool >=2.2.6 are required
|
||||
when building from a Git clone.
|
||||
Note that there is also an equivalent output in XML that is meant for exporting
|
||||
/importing topologies but it is hardly readable to human-beings (see Importing
|
||||
and exporting topologies from/to XML files for details).
|
||||
|
||||
Nightly development snapshots are available on the web site, they can be
|
||||
configured and built without any need for Git or GNU Autotools.
|
||||
On a 4-package 2-core Opteron NUMA machine (with two core cores disallowed by
|
||||
the administrator), the lstopo tool may show the following graphical output
|
||||
(with --disallowed for displaying disallowed objects):
|
||||
|
||||
[hagrid]
|
||||
|
||||
Here's the equivalent output in textual form:
|
||||
|
||||
Machine (32GB total)
|
||||
Package L#0
|
||||
NUMANode L#0 (P#0 8190MB)
|
||||
L2 L#0 (1024KB) + L1 L#0 (64KB) + Core L#0 + PU L#0 (P#0)
|
||||
L2 L#1 (1024KB) + L1 L#1 (64KB) + Core L#1 + PU L#1 (P#1)
|
||||
Package L#1
|
||||
NUMANode L#1 (P#1 8192MB)
|
||||
L2 L#2 (1024KB) + L1 L#2 (64KB) + Core L#2 + PU L#2 (P#2)
|
||||
L2 L#3 (1024KB) + L1 L#3 (64KB) + Core L#3 + PU L#3 (P#3)
|
||||
Package L#2
|
||||
NUMANode L#2 (P#2 8192MB)
|
||||
L2 L#4 (1024KB) + L1 L#4 (64KB) + Core L#4 + PU L#4 (P#4)
|
||||
L2 L#5 (1024KB) + L1 L#5 (64KB) + Core L#5 + PU L#5 (P#5)
|
||||
Package L#3
|
||||
NUMANode L#3 (P#3 8192MB)
|
||||
L2 L#6 (1024KB) + L1 L#6 (64KB) + Core L#6 + PU L#6 (P#6)
|
||||
L2 L#7 (1024KB) + L1 L#7 (64KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
On a 2-package quad-core Xeon (pre-Nehalem, with 2 dual-core dies into each
|
||||
package):
|
||||
|
||||
[emmett]
|
||||
|
||||
Here's the same output in textual form:
|
||||
|
||||
Machine (total 16GB)
|
||||
NUMANode L#0 (P#0 16GB)
|
||||
Package L#0
|
||||
L2 L#0 (4096KB)
|
||||
L1 L#0 (32KB) + Core L#0 + PU L#0 (P#0)
|
||||
L1 L#1 (32KB) + Core L#1 + PU L#1 (P#4)
|
||||
L2 L#1 (4096KB)
|
||||
L1 L#2 (32KB) + Core L#2 + PU L#2 (P#2)
|
||||
L1 L#3 (32KB) + Core L#3 + PU L#3 (P#6)
|
||||
Package L#1
|
||||
L2 L#2 (4096KB)
|
||||
L1 L#4 (32KB) + Core L#4 + PU L#4 (P#1)
|
||||
L1 L#5 (32KB) + Core L#5 + PU L#5 (P#5)
|
||||
L2 L#3 (4096KB)
|
||||
L1 L#6 (32KB) + Core L#6 + PU L#6 (P#3)
|
||||
L1 L#7 (32KB) + Core L#7 + PU L#7 (P#7)
|
||||
|
||||
Programming Interface
|
||||
|
||||
The basic interface is available in hwloc.h. Some higher-level functions are
|
||||
available in hwloc/helper.h to reduce the need to manually manipulate objects
|
||||
and follow links between them. Documentation for all these is provided later in
|
||||
this document. Developers may also want to look at hwloc/inlines.h which
|
||||
contains the actual inline code of some hwloc.h routines, and at this document,
|
||||
which provides good higher-level topology traversal examples.
|
||||
|
||||
To precisely define the vocabulary used by hwloc, a Terms and Definitions
|
||||
section is available and should probably be read first.
|
||||
|
||||
Each hwloc object contains a cpuset describing the list of processing units
|
||||
that it contains. These bitmaps may be used for CPU binding and Memory binding.
|
||||
hwloc offers an extensive bitmap manipulation interface in hwloc/bitmap.h.
|
||||
|
||||
Moreover, hwloc also comes with additional helpers for interoperability with
|
||||
several commonly used environments. See the Interoperability With Other
|
||||
Software section for details.
|
||||
|
||||
The complete API documentation is available in a full set of HTML pages, man
|
||||
pages, and self-contained PDF files (formatted for both both US letter and A4
|
||||
formats) in the source tarball in doc/doxygen-doc/.
|
||||
|
||||
NOTE: If you are building the documentation from a Git clone, you will need to
|
||||
have Doxygen and pdflatex installed -- the documentation will be built during
|
||||
the normal "make" process. The documentation is installed during "make install"
|
||||
to $prefix/share/doc/hwloc/ and your systems default man page tree (under
|
||||
$prefix, of course).
|
||||
|
||||
Portability
|
||||
|
||||
Operating System have varying support for CPU and memory binding, e.g. while
|
||||
some Operating Systems provide interfaces for all kinds of CPU and memory
|
||||
bindings, some others provide only interfaces for a limited number of kinds of
|
||||
CPU and memory binding, and some do not provide any binding interface at all.
|
||||
Hwloc's binding functions would then simply return the ENOSYS error (Function
|
||||
not implemented), meaning that the underlying Operating System does not provide
|
||||
any interface for them. CPU binding and Memory binding provide more information
|
||||
on which hwloc binding functions should be preferred because interfaces for
|
||||
them are usually available on the supported Operating Systems.
|
||||
|
||||
Similarly, the ability of reporting topology information varies from one
|
||||
platform to another. As shown in Command-line Examples, hwloc can obtain
|
||||
information on a wide variety of hardware topologies. However, some platforms
|
||||
and/or operating system versions will only report a subset of this information.
|
||||
For example, on an PPC64-based system with 8 cores (each with 2 hardware
|
||||
threads) running a default 2.6.18-based kernel from RHEL 5.4, hwloc is only
|
||||
able to glean information about NUMA nodes and processor units (PUs). No
|
||||
information about caches, packages, or cores is available.
|
||||
|
||||
Here's the graphical output from lstopo on this platform when Simultaneous
|
||||
Multi-Threading (SMT) is enabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
And here's the graphical output from lstopo on this platform when SMT is
|
||||
disabled:
|
||||
|
||||
[ppc64-with]
|
||||
|
||||
Notice that hwloc only sees half the PUs when SMT is disabled. PU L#6, for
|
||||
example, seems to change location from NUMA node #0 to #1. In reality, no PUs
|
||||
"moved" -- they were simply re-numbered when hwloc only saw half as many (see
|
||||
also Logical index in Indexes and Sets). Hence, PU L#6 in the SMT-disabled
|
||||
picture probably corresponds to PU L#12 in the SMT-enabled picture.
|
||||
|
||||
This same "PUs have disappeared" effect can be seen on other platforms -- even
|
||||
platforms / OSs that provide much more information than the above PPC64 system.
|
||||
This is an unfortunate side-effect of how operating systems report information
|
||||
to hwloc.
|
||||
|
||||
Note that upgrading the Linux kernel on the same PPC64 system mentioned above
|
||||
to 2.6.34, hwloc is able to discover all the topology information. The
|
||||
following picture shows the entire topology layout when SMT is enabled:
|
||||
|
||||
[ppc64-full]
|
||||
|
||||
Developers using the hwloc API or XML output for portable applications should
|
||||
therefore be extremely careful to not make any assumptions about the structure
|
||||
of data that is returned. For example, per the above reported PPC topology, it
|
||||
is not safe to assume that PUs will always be descendants of cores.
|
||||
|
||||
Additionally, future hardware may insert new topology elements that are not
|
||||
available in this version of hwloc. Long-lived applications that are meant to
|
||||
span multiple different hardware platforms should also be careful about making
|
||||
structure assumptions. For example, a new element may someday exist between a
|
||||
core and a PU.
|
||||
|
||||
API Example
|
||||
|
||||
The following small C example (available in the source tree as ``doc/examples/
|
||||
hwloc-hello.c'') prints the topology of the machine and performs some thread
|
||||
and memory binding. More examples are available in the doc/examples/ directory
|
||||
of the source tree.
|
||||
|
||||
/* Example hwloc API program.
|
||||
*
|
||||
* See other examples under doc/examples/ in the source tree
|
||||
* for more details.
|
||||
*
|
||||
* Copyright (c) 2009-2016 Inria. All rights reserved.
|
||||
* Copyright (c) 2009-2011 Universit?eacute; Bordeaux
|
||||
* Copyright (c) 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* hwloc-hello.c
|
||||
*/
|
||||
#include "hwloc.h"
|
||||
#include <errno.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
static void print_children(hwloc_topology_t topology, hwloc_obj_t obj,
|
||||
int depth)
|
||||
{
|
||||
char type[32], attr[1024];
|
||||
unsigned i;
|
||||
hwloc_obj_type_snprintf(type, sizeof(type), obj, 0);
|
||||
printf("%*s%s", 2*depth, "", type);
|
||||
if (obj->os_index != (unsigned) -1)
|
||||
printf("#%u", obj->os_index);
|
||||
hwloc_obj_attr_snprintf(attr, sizeof(attr), obj, " ", 0);
|
||||
if (*attr)
|
||||
printf("(%s)", attr);
|
||||
printf("\n");
|
||||
for (i = 0; i < obj->arity; i++) {
|
||||
print_children(topology, obj->children[i], depth + 1);
|
||||
}
|
||||
}
|
||||
int main(void)
|
||||
{
|
||||
int depth;
|
||||
unsigned i, n;
|
||||
unsigned long size;
|
||||
int levels;
|
||||
char string[128];
|
||||
int topodepth;
|
||||
void *m;
|
||||
hwloc_topology_t topology;
|
||||
hwloc_cpuset_t cpuset;
|
||||
hwloc_obj_t obj;
|
||||
/* Allocate and initialize topology object. */
|
||||
hwloc_topology_init(&topology);
|
||||
/* ... Optionally, put detection configuration here to ignore
|
||||
some objects types, define a synthetic topology, etc....
|
||||
The default is to detect all the objects of the machine that
|
||||
the caller is allowed to access. See Configure Topology
|
||||
Detection. */
|
||||
/* Perform the topology detection. */
|
||||
hwloc_topology_load(topology);
|
||||
/* Optionally, get some additional topology information
|
||||
in case we need the topology depth later. */
|
||||
topodepth = hwloc_topology_get_depth(topology);
|
||||
/*****************************************************************
|
||||
* First example:
|
||||
* Walk the topology with an array style, from level 0 (always
|
||||
* the system level) to the lowest level (always the proc level).
|
||||
*****************************************************************/
|
||||
for (depth = 0; depth < topodepth; depth++) {
|
||||
printf("*** Objects at level %d\n", depth);
|
||||
for (i = 0; i < hwloc_get_nbobjs_by_depth(topology, depth);
|
||||
i++) {
|
||||
hwloc_obj_type_snprintf(string, sizeof(string),
|
||||
hwloc_get_obj_by_depth(topology, depth, i), 0);
|
||||
printf("Index %u: %s\n", i, string);
|
||||
}
|
||||
}
|
||||
/*****************************************************************
|
||||
* Second example:
|
||||
* Walk the topology with a tree style.
|
||||
*****************************************************************/
|
||||
printf("*** Printing overall tree\n");
|
||||
print_children(topology, hwloc_get_root_obj(topology), 0);
|
||||
/*****************************************************************
|
||||
* Third example:
|
||||
* Print the number of packages.
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_depth(topology, HWLOC_OBJ_PACKAGE);
|
||||
if (depth == HWLOC_TYPE_DEPTH_UNKNOWN) {
|
||||
printf("*** The number of packages is unknown\n");
|
||||
} else {
|
||||
printf("*** %u package(s)\n",
|
||||
hwloc_get_nbobjs_by_depth(topology, depth));
|
||||
}
|
||||
/*****************************************************************
|
||||
* Fourth example:
|
||||
* Compute the amount of cache that the first logical processor
|
||||
* has above it.
|
||||
*****************************************************************/
|
||||
levels = 0;
|
||||
size = 0;
|
||||
for (obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_PU, 0);
|
||||
obj;
|
||||
obj = obj->parent)
|
||||
if (hwloc_obj_type_is_cache(obj->type)) {
|
||||
levels++;
|
||||
size += obj->attr->cache.size;
|
||||
}
|
||||
printf("*** Logical processor 0 has %d caches totaling %luKB\n",
|
||||
levels, size / 1024);
|
||||
/*****************************************************************
|
||||
* Fifth example:
|
||||
* Bind to only one thread of the last core of the machine.
|
||||
*
|
||||
* First find out where cores are, or else smaller sets of CPUs if
|
||||
* the OS doesn't have the notion of a "core".
|
||||
*****************************************************************/
|
||||
depth = hwloc_get_type_or_below_depth(topology, HWLOC_OBJ_CORE);
|
||||
/* Get last core. */
|
||||
obj = hwloc_get_obj_by_depth(topology, depth,
|
||||
hwloc_get_nbobjs_by_depth(topology, depth) - 1);
|
||||
if (obj) {
|
||||
/* Get a copy of its cpuset that we may modify. */
|
||||
cpuset = hwloc_bitmap_dup(obj->cpuset);
|
||||
/* Get only one logical processor (in case the core is
|
||||
SMT/hyper-threaded). */
|
||||
hwloc_bitmap_singlify(cpuset);
|
||||
/* And try to bind ourself there. */
|
||||
if (hwloc_set_cpubind(topology, cpuset, 0)) {
|
||||
char *str;
|
||||
int error = errno;
|
||||
hwloc_bitmap_asprintf(&str, obj->cpuset);
|
||||
printf("Couldn't bind to cpuset %s: %s\n", str, strerror(error));
|
||||
free(str);
|
||||
}
|
||||
/* Free our cpuset copy */
|
||||
hwloc_bitmap_free(cpuset);
|
||||
}
|
||||
/*****************************************************************
|
||||
* Sixth example:
|
||||
* Allocate some memory on the last NUMA node, bind some existing
|
||||
* memory to the last NUMA node.
|
||||
*****************************************************************/
|
||||
/* Get last node. There's always at least one. */
|
||||
n = hwloc_get_nbobjs_by_type(topology, HWLOC_OBJ_NUMANODE);
|
||||
obj = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, n - 1);
|
||||
size = 1024*1024;
|
||||
m = hwloc_alloc_membind(topology, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
hwloc_free(topology, m, size);
|
||||
m = malloc(size);
|
||||
hwloc_set_area_membind(topology, m, size, obj->nodeset,
|
||||
HWLOC_MEMBIND_BIND, HWLOC_MEMBIND_BYNODESET);
|
||||
free(m);
|
||||
/* Destroy topology object. */
|
||||
hwloc_topology_destroy(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
hwloc provides a pkg-config executable to obtain relevant compiler and linker
|
||||
flags. See Compiling software on top of hwloc's C API for details on building
|
||||
program on top of hwloc's API using GNU Make or CMake.
|
||||
|
||||
On a machine 2 processor packages -- each package of which has two processing
|
||||
cores -- the output from running hwloc-hello could be something like the
|
||||
following:
|
||||
|
||||
shell$ ./hwloc-hello
|
||||
*** Objects at level 0
|
||||
Index 0: Machine
|
||||
*** Objects at level 1
|
||||
Index 0: Package#0
|
||||
Index 1: Package#1
|
||||
*** Objects at level 2
|
||||
Index 0: Core#0
|
||||
Index 1: Core#1
|
||||
Index 2: Core#3
|
||||
Index 3: Core#2
|
||||
*** Objects at level 3
|
||||
Index 0: PU#0
|
||||
Index 1: PU#1
|
||||
Index 2: PU#2
|
||||
Index 3: PU#3
|
||||
*** Printing overall tree
|
||||
Machine
|
||||
Package#0
|
||||
Core#0
|
||||
PU#0
|
||||
Core#1
|
||||
PU#1
|
||||
Package#1
|
||||
Core#3
|
||||
PU#2
|
||||
Core#2
|
||||
PU#3
|
||||
*** 2 package(s)
|
||||
*** Logical processor 0 has 0 caches totaling 0KB
|
||||
shell$
|
||||
|
||||
Questions and Bugs
|
||||
|
||||
@@ -78,8 +466,22 @@ debug and report issues.
|
||||
Questions may be sent to the users or developers mailing lists (https://
|
||||
www.open-mpi.org/community/lists/hwloc.php).
|
||||
|
||||
There is also a #hwloc IRC channel on Freenode (irc.freenode.net).
|
||||
There is also a #hwloc IRC channel on Libera Chat (irc.libera.chat).
|
||||
|
||||
History / Credits
|
||||
|
||||
hwloc is the evolution and merger of the libtopology project and the Portable
|
||||
Linux Processor Affinity (PLPA) (https://www.open-mpi.org/projects/plpa/)
|
||||
project. Because of functional and ideological overlap, these two code bases
|
||||
and ideas were merged and released under the name "hwloc" as an Open MPI
|
||||
sub-project.
|
||||
|
||||
libtopology was initially developed by the Inria Runtime Team-Project. PLPA was
|
||||
initially developed by the Open MPI development team as a sub-project. Both are
|
||||
now deprecated in favor of hwloc, which is distributed as an Open MPI
|
||||
sub-project.
|
||||
|
||||
|
||||
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation.
|
||||
See https://www.open-mpi.org/projects/hwloc/doc/ for more hwloc documentation,
|
||||
actual links to related pages, images, etc.
|
||||
|
||||
9
src/3rdparty/hwloc/VERSION
vendored
9
src/3rdparty/hwloc/VERSION
vendored
@@ -8,8 +8,8 @@
|
||||
# Please update HWLOC_VERSION* in contrib/windows/hwloc_config.h too.
|
||||
|
||||
major=2
|
||||
minor=4
|
||||
release=0
|
||||
minor=11
|
||||
release=2
|
||||
|
||||
# greek is used for alpha or beta release tags. If it is non-empty,
|
||||
# it will be appended to the version number. It does not have to be
|
||||
@@ -22,7 +22,7 @@ greek=
|
||||
|
||||
# The date when this release was created
|
||||
|
||||
date="Nov 26, 2020"
|
||||
date="Sep 26, 2024"
|
||||
|
||||
# If snapshot=1, then use the value from snapshot_version as the
|
||||
# entire hwloc version (i.e., ignore major, minor, release, and
|
||||
@@ -41,7 +41,6 @@ snapshot_version=${major}.${minor}.${release}${greek}-git
|
||||
# 2. Version numbers are described in the Libtool current:revision:age
|
||||
# format.
|
||||
|
||||
libhwloc_so_version=19:0:4
|
||||
libnetloc_so_version=0:0:0
|
||||
libhwloc_so_version=23:1:8
|
||||
|
||||
# Please also update the <TargetName> lines in contrib/windows/libhwloc.vcxproj
|
||||
|
||||
639
src/3rdparty/hwloc/include/hwloc.h
vendored
639
src/3rdparty/hwloc/include/hwloc.h
vendored
File diff suppressed because it is too large
Load Diff
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -11,10 +11,10 @@
|
||||
#ifndef HWLOC_CONFIG_H
|
||||
#define HWLOC_CONFIG_H
|
||||
|
||||
#define HWLOC_VERSION "2.4.1"
|
||||
#define HWLOC_VERSION "2.11.2"
|
||||
#define HWLOC_VERSION_MAJOR 2
|
||||
#define HWLOC_VERSION_MINOR 4
|
||||
#define HWLOC_VERSION_RELEASE 1
|
||||
#define HWLOC_VERSION_MINOR 11
|
||||
#define HWLOC_VERSION_RELEASE 2
|
||||
#define HWLOC_VERSION_GREEK ""
|
||||
|
||||
#define __hwloc_restrict
|
||||
|
||||
63
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
63
src/3rdparty/hwloc/include/hwloc/bitmap.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -50,9 +50,10 @@ extern "C" {
|
||||
* hwloc_bitmap_free(set);
|
||||
* \endcode
|
||||
*
|
||||
* \note Most functions below return an int that may be negative in case of
|
||||
* error. The usual error case would be an internal failure to realloc/extend
|
||||
* \note Most functions below return 0 on success and -1 on error.
|
||||
* The usual error case would be an internal failure to realloc/extend
|
||||
* the storage of the bitmap (\p errno would be set to \c ENOMEM).
|
||||
* See also \ref hwlocality_api_error_reporting.
|
||||
*
|
||||
* \note Several examples of using the bitmap API are available under the
|
||||
* doc/examples/ directory in the source tree.
|
||||
@@ -83,7 +84,13 @@ typedef const struct hwloc_bitmap_s * hwloc_const_bitmap_t;
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Allocate a new full bitmap. */
|
||||
/** \brief Allocate a new full bitmap.
|
||||
*
|
||||
* \returns A valid bitmap or \c NULL.
|
||||
*
|
||||
* The bitmap should be freed by a corresponding call to
|
||||
* hwloc_bitmap_free().
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_bitmap_t hwloc_bitmap_alloc_full(void) __hwloc_attribute_malloc;
|
||||
|
||||
/** \brief Free bitmap \p bitmap.
|
||||
@@ -112,18 +119,20 @@ HWLOC_DECLSPEC int hwloc_bitmap_copy(hwloc_bitmap_t dst, hwloc_const_bitmap_t sr
|
||||
*
|
||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||
*
|
||||
* \return the number of character that were actually written if not truncating,
|
||||
* \return the number of characters that were actually written if not truncating,
|
||||
* or that would have been written (not including the ending \\0).
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Stringify a bitmap into a newly allocated string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
@@ -137,18 +146,20 @@ HWLOC_DECLSPEC int hwloc_bitmap_sscanf(hwloc_bitmap_t bitmap, const char * __hwl
|
||||
*
|
||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||
*
|
||||
* \return the number of character that were actually written if not truncating,
|
||||
* \return the number of characters that were actually written if not truncating,
|
||||
* or that would have been written (not including the ending \\0).
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Stringify a bitmap into a newly allocated list string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a list string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
@@ -161,18 +172,20 @@ HWLOC_DECLSPEC int hwloc_bitmap_list_sscanf(hwloc_bitmap_t bitmap, const char *
|
||||
*
|
||||
* If \p buflen is 0, \p buf may safely be \c NULL.
|
||||
*
|
||||
* \return the number of character that were actually written if not truncating,
|
||||
* \return the number of characters that were actually written if not truncating,
|
||||
* or that would have been written (not including the ending \\0).
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Stringify a bitmap into a newly allocated taskset-specific string.
|
||||
*
|
||||
* \return -1 on error.
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_asprintf(char ** strp, hwloc_const_bitmap_t bitmap);
|
||||
|
||||
/** \brief Parse a taskset-specific bitmap string and stores it in bitmap \p bitmap.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_taskset_sscanf(hwloc_bitmap_t bitmap, const char * __hwloc_restrict string);
|
||||
|
||||
@@ -279,6 +292,7 @@ HWLOC_DECLSPEC int hwloc_bitmap_to_ulongs(hwloc_const_bitmap_t bitmap, unsigned
|
||||
* When called on the output of hwloc_topology_get_topology_cpuset(),
|
||||
* the returned number is large enough for all cpusets of the topology.
|
||||
*
|
||||
* \return the number of unsigned longs required.
|
||||
* \return -1 if \p bitmap is infinite.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_nr_ulongs(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
@@ -305,21 +319,23 @@ HWLOC_DECLSPEC int hwloc_bitmap_isfull(hwloc_const_bitmap_t bitmap) __hwloc_attr
|
||||
|
||||
/** \brief Compute the first index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap.
|
||||
* \return the first index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first index is returned.
|
||||
*
|
||||
* \return the first index set in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index set in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is set in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is set in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index set in \p bitmap.
|
||||
* \return -1 if \p bitmap is empty, or if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
@@ -327,28 +343,29 @@ HWLOC_DECLSPEC int hwloc_bitmap_last(hwloc_const_bitmap_t bitmap) __hwloc_attrib
|
||||
* indexes that are in the bitmap).
|
||||
*
|
||||
* \return the number of indexes that are in the bitmap.
|
||||
*
|
||||
* \return -1 if \p bitmap is infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_weight(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the first unset index (least significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap.
|
||||
* \return the first unset index in \p bitmap.
|
||||
* \return -1 if \p bitmap is full.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_first_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the next unset index in bitmap \p bitmap which is after index \p prev
|
||||
*
|
||||
* If \p prev is -1, the first unset index is returned.
|
||||
*
|
||||
* \return the first index unset in \p bitmap if \p prev is \c -1.
|
||||
* \return the next index unset in \p bitmap if \p prev is not \c -1.
|
||||
* \return -1 if no index with higher index is unset in \p bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_next_unset(hwloc_const_bitmap_t bitmap, int prev) __hwloc_attribute_pure;
|
||||
|
||||
/** \brief Compute the last unset index (most significant bit) in bitmap \p bitmap
|
||||
*
|
||||
* \return -1 if no index is unset in \p bitmap, or if \p bitmap is infinitely set.
|
||||
* \return the last index unset in \p bitmap.
|
||||
* \return -1 if \p bitmap is full, or if \p bitmap is not infinitely set.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_attribute_pure;
|
||||
|
||||
@@ -357,11 +374,11 @@ HWLOC_DECLSPEC int hwloc_bitmap_last_unset(hwloc_const_bitmap_t bitmap) __hwloc_
|
||||
* The loop must start with hwloc_bitmap_foreach_begin() and end
|
||||
* with hwloc_bitmap_foreach_end() followed by a terminating ';'.
|
||||
*
|
||||
* \p index is the loop variable; it should be an unsigned int. The
|
||||
* first iteration will set \p index to the lowest index in the bitmap.
|
||||
* \p id is the loop variable; it should be an unsigned int. The
|
||||
* first iteration will set \p id to the lowest index in the bitmap.
|
||||
* Successive iterations will iterate through, in order, all remaining
|
||||
* indexes set in the bitmap. To be specific: each iteration will return a
|
||||
* value for \p index such that hwloc_bitmap_isset(bitmap, index) is true.
|
||||
* value for \p id such that hwloc_bitmap_isset(bitmap, id) is true.
|
||||
*
|
||||
* The assert prevents the loop from being infinite if the bitmap is infinitely set.
|
||||
*
|
||||
@@ -428,6 +445,8 @@ HWLOC_DECLSPEC int hwloc_bitmap_not (hwloc_bitmap_t res, hwloc_const_bitmap_t bi
|
||||
/** \brief Test whether bitmaps \p bitmap1 and \p bitmap2 intersects.
|
||||
*
|
||||
* \return 1 if bitmaps intersect, 0 otherwise.
|
||||
*
|
||||
* \note The empty bitmap does not intersect any other bitmap.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_bitmap_intersects (hwloc_const_bitmap_t bitmap1, hwloc_const_bitmap_t bitmap2) __hwloc_attribute_pure;
|
||||
|
||||
|
||||
25
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
25
src/3rdparty/hwloc/include/hwloc/cpukinds.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2020 Inria. All rights reserved.
|
||||
* Copyright © 2020-2021 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -42,18 +42,23 @@ extern "C" {
|
||||
* (for instance the "CoreType" and "FrequencyMaxMHz",
|
||||
* see \ref topoattrs_cpukinds).
|
||||
*
|
||||
* A higher efficiency value means intrinsic greater performance
|
||||
* A higher efficiency value means greater intrinsic performance
|
||||
* (and possibly less performance/power efficiency).
|
||||
* Kinds with lower efficiency are ranked first:
|
||||
* Kinds with lower efficiency values are ranked first:
|
||||
* Passing 0 as \p kind_index to hwloc_cpukinds_get_info() will
|
||||
* return information about the less efficient CPU kind.
|
||||
* return information about the CPU kind with lower performance
|
||||
* but higher energy-efficiency.
|
||||
* Higher \p kind_index values would rather return information
|
||||
* about power-hungry high-performance cores.
|
||||
*
|
||||
* When available, efficiency values are gathered from the operating
|
||||
* system (when \p cpukind_efficiency is set in the
|
||||
* struct hwloc_topology_discovery_support array, only on Windows 10 for now).
|
||||
* Otherwise hwloc tries to compute efficiencies
|
||||
* by comparing CPU kinds using frequencies (on ARM),
|
||||
* or core types and frequencies (on other architectures).
|
||||
* When available, efficiency values are gathered from the operating system.
|
||||
* If so, \p cpukind_efficiency is set in the struct hwloc_topology_discovery_support array.
|
||||
* This is currently available on Windows 10, Mac OS X (Darwin),
|
||||
* and on some Linux platforms where core "capacity" is exposed in sysfs.
|
||||
*
|
||||
* If the operating system does not expose core efficiencies natively,
|
||||
* hwloc tries to compute efficiencies by comparing CPU kinds using
|
||||
* frequencies (on ARM), or core types and frequencies (on other architectures).
|
||||
* The environment variable HWLOC_CPUKINDS_RANKING may be used
|
||||
* to change this heuristics, see \ref envvar.
|
||||
*
|
||||
|
||||
22
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
22
src/3rdparty/hwloc/include/hwloc/cuda.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -42,6 +42,9 @@ extern "C" {
|
||||
/** \brief Return the domain, bus and device IDs of the CUDA device \p cudevice.
|
||||
*
|
||||
* Device \p cudevice must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -75,7 +78,7 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p cudevice.
|
||||
*
|
||||
* Return the CPU set describing the locality of the CUDA device \p cudevice.
|
||||
* Store in \p set the CPU-set describing the locality of the CUDA device \p cudevice.
|
||||
*
|
||||
* Topology \p topology and device \p cudevice must match the local machine.
|
||||
* I/O devices detection and the CUDA component are not needed in the topology.
|
||||
@@ -87,6 +90,9 @@ hwloc_cuda_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -120,8 +126,8 @@ hwloc_cuda_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
/** \brief Get the hwloc PCI device object corresponding to the
|
||||
* CUDA device \p cudevice.
|
||||
*
|
||||
* Return the PCI device object describing the CUDA device \p cudevice.
|
||||
* Return NULL if there is none.
|
||||
* \return The hwloc PCI device object describing the CUDA device \p cudevice.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p cudevice must match the local machine.
|
||||
* I/O devices detection must be enabled in topology \p topology.
|
||||
@@ -140,8 +146,8 @@ hwloc_cuda_get_device_pcidev(hwloc_topology_t topology, CUdevice cudevice)
|
||||
|
||||
/** \brief Get the hwloc OS device object corresponding to CUDA device \p cudevice.
|
||||
*
|
||||
* Return the hwloc OS device object that describes the given
|
||||
* CUDA device \p cudevice. Return NULL if there is none.
|
||||
* \return The hwloc OS device object that describes the given CUDA device \p cudevice.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p cudevice must match the local machine.
|
||||
* I/O devices detection and the CUDA component must be enabled in the topology.
|
||||
@@ -183,8 +189,8 @@ hwloc_cuda_get_device_osdev(hwloc_topology_t topology, CUdevice cudevice)
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* CUDA device whose index is \p idx.
|
||||
*
|
||||
* Return the OS device object describing the CUDA device whose
|
||||
* index is \p idx. Return NULL if there is none.
|
||||
* \return The hwloc OS device object describing the CUDA device whose index is \p idx.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
|
||||
18
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
18
src/3rdparty/hwloc/include/hwloc/cudart.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2023 Inria. All rights reserved.
|
||||
* Copyright © 2010-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -43,6 +43,9 @@ extern "C" {
|
||||
/** \brief Return the domain, bus and device IDs of the CUDA device whose index is \p idx.
|
||||
*
|
||||
* Device index \p idx must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -72,7 +75,7 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p idx.
|
||||
*
|
||||
* Return the CPU set describing the locality of the CUDA device
|
||||
* Store in \p set the CPU-set describing the locality of the CUDA device
|
||||
* whose index is \p idx.
|
||||
*
|
||||
* Topology \p topology and device \p idx must match the local machine.
|
||||
@@ -84,6 +87,9 @@ hwloc_cudart_get_device_pci_ids(hwloc_topology_t topology __hwloc_attribute_unus
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -117,8 +123,8 @@ hwloc_cudart_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
|
||||
/** \brief Get the hwloc PCI device object corresponding to the
|
||||
* CUDA device whose index is \p idx.
|
||||
*
|
||||
* Return the PCI device object describing the CUDA device whose
|
||||
* index is \p idx. Return NULL if there is none.
|
||||
* \return The hwloc PCI device object describing the CUDA device whose index is \p idx.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p idx must match the local machine.
|
||||
* I/O devices detection must be enabled in topology \p topology.
|
||||
@@ -138,8 +144,8 @@ hwloc_cudart_get_device_pcidev(hwloc_topology_t topology, int idx)
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* CUDA device whose index is \p idx.
|
||||
*
|
||||
* Return the OS device object describing the CUDA device whose
|
||||
* index is \p idx. Return NULL if there is none.
|
||||
* \return The hwloc OS device object describing the CUDA device whose index is \p idx.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
|
||||
13
src/3rdparty/hwloc/include/hwloc/deprecated.h
vendored
13
src/3rdparty/hwloc/include/hwloc/deprecated.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2018 Inria. All rights reserved.
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2009-2010 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -30,6 +30,15 @@ extern "C" {
|
||||
/* backward compat with v1.10 before Node->NUMANode clarification */
|
||||
#define HWLOC_OBJ_NODE HWLOC_OBJ_NUMANODE
|
||||
|
||||
/** \brief Add a distances structure.
|
||||
*
|
||||
* Superseded by hwloc_distances_add_create()+hwloc_distances_add_values()+hwloc_distances_add_commit()
|
||||
* in v2.5.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
|
||||
unsigned long kind, unsigned long flags) __hwloc_attribute_deprecated;
|
||||
|
||||
/** \brief Insert a misc object by parent.
|
||||
*
|
||||
* Identical to hwloc_topology_insert_misc_object().
|
||||
@@ -46,7 +55,7 @@ hwloc_topology_insert_misc_object_by_parent(hwloc_topology_t topology, hwloc_obj
|
||||
*
|
||||
* If \p size is 0, \p string may safely be \c NULL.
|
||||
*
|
||||
* \return the number of character that were actually written if not truncating,
|
||||
* \return the number of characters that were actually written if not truncating,
|
||||
* or that would have been written (not including the ending \\0).
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
|
||||
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
17
src/3rdparty/hwloc/include/hwloc/diff.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -222,6 +222,8 @@ enum hwloc_topology_diff_apply_flags_e {
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_apply(hwloc_topology_t topology, hwloc_topology_diff_t diff, unsigned long flags);
|
||||
|
||||
/** \brief Destroy a list of topology differences.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
||||
|
||||
@@ -233,6 +235,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_destroy(hwloc_topology_diff_t diff);
|
||||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
@@ -246,10 +250,17 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xml(const char *xmlpath, hwloc_topol
|
||||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
* This attribute is given back when reading the diff from XML.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, const char *refname, const char *xmlpath);
|
||||
|
||||
/** \brief Load a list of topology differences from a XML buffer.
|
||||
*
|
||||
* Build a list of differences from the XML memory buffer given
|
||||
* at \p xmlbuffer and of length \p buflen (including an ending \0).
|
||||
* This buffer may have been filled earlier with
|
||||
* hwloc_topology_diff_export_xmlbuffer().
|
||||
*
|
||||
* If not \c NULL, \p refname will be filled with the identifier
|
||||
* string of the reference topology for the difference file,
|
||||
@@ -257,6 +268,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_export_xml(hwloc_topology_diff_t diff, co
|
||||
* This identifier is usually the name of the other XML file
|
||||
* that contains the reference topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note the pointer returned in refname should later be freed
|
||||
* by the caller.
|
||||
*/
|
||||
@@ -274,6 +287,8 @@ HWLOC_DECLSPEC int hwloc_topology_diff_load_xmlbuffer(const char *xmlbuffer, int
|
||||
* The returned buffer ends with a \0 that is included in the returned
|
||||
* length.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note The XML buffer should later be freed with hwloc_free_xmlbuffer().
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_topology_diff_export_xmlbuffer(hwloc_topology_diff_t diff, const char *refname, char **xmlbuffer, int *buflen);
|
||||
|
||||
244
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
244
src/3rdparty/hwloc/include/hwloc/distances.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -28,16 +28,27 @@ extern "C" {
|
||||
|
||||
/** \brief Matrix of distances between a set of objects.
|
||||
*
|
||||
* This matrix often contains latencies between NUMA nodes
|
||||
* The most common matrix contains latencies between NUMA nodes
|
||||
* (as reported in the System Locality Distance Information Table (SLIT)
|
||||
* in the ACPI specification), which may or may not be physically accurate.
|
||||
* It corresponds to the latency for accessing the memory of one node
|
||||
* from a core in another node.
|
||||
* The corresponding kind is ::HWLOC_DISTANCES_KIND_FROM_OS | ::HWLOC_DISTANCES_KIND_FROM_USER.
|
||||
* The corresponding kind is ::HWLOC_DISTANCES_KIND_MEANS_LATENCY | ::HWLOC_DISTANCES_KIND_FROM_USER.
|
||||
* The name of this distances structure is "NUMALatency".
|
||||
*
|
||||
* The matrix may also contain bandwidths between random sets of objects,
|
||||
* possibly provided by the user, as specified in the \p kind attribute.
|
||||
* Others common distance structures include and "XGMIBandwidth", "XGMIHops",
|
||||
* "XeLinkBandwidth" and "NVLinkBandwidth".
|
||||
*
|
||||
* Pointers \p objs and \p values should not be replaced, reallocated, freed, etc.
|
||||
* However callers are allowed to modify \p kind as well as the contents
|
||||
* of \p objs and \p values arrays.
|
||||
* For instance, if there is a single NUMA node per Package,
|
||||
* hwloc_get_obj_with_same_locality() may be used to convert between them
|
||||
* and replace NUMA nodes in the \p objs array with the corresponding Packages.
|
||||
* See also hwloc_distances_transform() for applying some transformations
|
||||
* to the structure.
|
||||
*/
|
||||
struct hwloc_distances_s {
|
||||
unsigned nbobjs; /**< \brief Number of objects described by the distance matrix. */
|
||||
@@ -59,11 +70,10 @@ struct hwloc_distances_s {
|
||||
* The \p kind attribute of struct hwloc_distances_s is a OR'ed set
|
||||
* of kinds.
|
||||
*
|
||||
* A kind of format HWLOC_DISTANCES_KIND_FROM_* specifies where the
|
||||
* distance information comes from, if known.
|
||||
*
|
||||
* A kind of format HWLOC_DISTANCES_KIND_MEANS_* specifies whether
|
||||
* values are latencies or bandwidths, if applicable.
|
||||
* Each distance matrix may have only one kind among HWLOC_DISTANCES_KIND_FROM_*
|
||||
* specifying where distance information comes from,
|
||||
* and one kind among HWLOC_DISTANCES_KIND_MEANS_* specifying
|
||||
* whether values are latencies or bandwidths.
|
||||
*/
|
||||
enum hwloc_distances_kind_e {
|
||||
/** \brief These distances were obtained from the operating system or hardware.
|
||||
@@ -91,6 +101,8 @@ enum hwloc_distances_kind_e {
|
||||
HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH = (1UL<<3),
|
||||
|
||||
/** \brief This distances structure covers objects of different types.
|
||||
* This may apply to the "NVLinkBandwidth" structure in presence
|
||||
* of a NVSwitch or POWER processor NVLink port.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES = (1UL<<4)
|
||||
@@ -118,6 +130,8 @@ enum hwloc_distances_kind_e {
|
||||
*
|
||||
* Each distance matrix returned in the \p distances array should be released
|
||||
* by the caller using hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get(hwloc_topology_t topology,
|
||||
@@ -127,6 +141,8 @@ hwloc_distances_get(hwloc_topology_t topology,
|
||||
/** \brief Retrieve distance matrices for object at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p depth filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
||||
@@ -136,6 +152,8 @@ hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
||||
/** \brief Retrieve distance matrices for object of a specific type.
|
||||
*
|
||||
* Identical to hwloc_distances_get() with the additional \p type filter.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
||||
@@ -147,6 +165,10 @@ hwloc_distances_get_by_type(hwloc_topology_t topology, hwloc_obj_type_t type,
|
||||
* Usually only one distances structure may match a given name.
|
||||
*
|
||||
* The name of the most common structure is "NUMALatency".
|
||||
* Others include "XGMIBandwidth", "XGMIHops", "XeLinkBandwidth",
|
||||
* and "NVLinkBandwidth".
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||
@@ -156,7 +178,12 @@ hwloc_distances_get_by_name(hwloc_topology_t topology, const char *name,
|
||||
/** \brief Get a description of what a distances structure contains.
|
||||
*
|
||||
* For instance "NUMALatency" for hardware-provided NUMA distances (ACPI SLIT),
|
||||
* or NULL if unknown.
|
||||
* or \c NULL if unknown.
|
||||
*
|
||||
* \return the constant string with the name of the distance structure.
|
||||
*
|
||||
* \note The returned name should not be freed by the caller,
|
||||
* it belongs to the hwloc library.
|
||||
*/
|
||||
HWLOC_DECLSPEC const char *
|
||||
hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
@@ -168,6 +195,87 @@ hwloc_distances_get_name(hwloc_topology_t topology, struct hwloc_distances_s *di
|
||||
HWLOC_DECLSPEC void
|
||||
hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
||||
/** \brief Transformations of distances structures. */
|
||||
enum hwloc_distances_transform_e {
|
||||
/** \brief Remove \c NULL objects from the distances structure.
|
||||
*
|
||||
* Every object that was replaced with \c NULL in the \p objs array
|
||||
* is removed and the \p values array is updated accordingly.
|
||||
*
|
||||
* At least \c 2 objects must remain, otherwise hwloc_distances_transform()
|
||||
* will return \c -1 with \p errno set to \c EINVAL.
|
||||
*
|
||||
* \p kind will be updated with or without ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES
|
||||
* according to the remaining objects.
|
||||
*
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL = 0,
|
||||
|
||||
/** \brief Replace bandwidth values with a number of links.
|
||||
*
|
||||
* Usually all values will be either \c 0 (no link) or \c 1 (one link).
|
||||
* However some matrices could get larger values if some pairs of
|
||||
* peers are connected by different numbers of links.
|
||||
*
|
||||
* Values on the diagonal are set to \c 0.
|
||||
*
|
||||
* This transformation only applies to bandwidth matrices.
|
||||
*
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_TRANSFORM_LINKS = 1,
|
||||
|
||||
/** \brief Merge switches with multiple ports into a single object.
|
||||
* This currently only applies to NVSwitches where GPUs seem connected to different
|
||||
* separate switch ports in the NVLinkBandwidth matrix. This transformation will
|
||||
* replace all of them with the same port connected to all GPUs.
|
||||
* Other ports are removed by applying ::HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL internally.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS = 2,
|
||||
|
||||
/** \brief Apply a transitive closure to the matrix to connect objects across switches.
|
||||
* This currently only applies to GPUs and NVSwitches in the NVLinkBandwidth matrix.
|
||||
* All pairs of GPUs will be reported as directly connected.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE = 3
|
||||
};
|
||||
|
||||
/** \brief Apply a transformation to a distances structure.
|
||||
*
|
||||
* Modify a distances structure that was previously obtained with
|
||||
* hwloc_distances_get() or one of its variants.
|
||||
*
|
||||
* This modifies the local copy of the distances structures but does
|
||||
* not modify the distances information stored inside the topology
|
||||
* (retrieved by another call to hwloc_distances_get() or exported to XML).
|
||||
* To do so, one should add a new distances structure with same
|
||||
* name, kind, objects and values (see \ref hwlocality_distances_add)
|
||||
* and then remove this old one with hwloc_distances_release_remove().
|
||||
*
|
||||
* \p transform must be one of the transformations listed
|
||||
* in ::hwloc_distances_transform_e.
|
||||
*
|
||||
* These transformations may modify the contents of the \p objs or \p values arrays.
|
||||
*
|
||||
* \p transform_attr must be \c NULL for now.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success, -1 on error for instance if flags are invalid.
|
||||
*
|
||||
* \note Objects in distances array \p objs may be directly modified
|
||||
* in place without using hwloc_distances_transform().
|
||||
* One may use hwloc_get_obj_with_same_locality() to easily convert
|
||||
* between similar objects of different types.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_transform(hwloc_topology_t topology, struct hwloc_distances_s *distances,
|
||||
enum hwloc_distances_transform_e transform,
|
||||
void *transform_attr,
|
||||
unsigned long flags);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
@@ -178,6 +286,7 @@ hwloc_distances_release(hwloc_topology_t topology, struct hwloc_distances_s *dis
|
||||
|
||||
/** \brief Find the index of an object in a distances structure.
|
||||
*
|
||||
* \return the index of the object in the distances structure if any.
|
||||
* \return -1 if object \p obj is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
@@ -195,6 +304,7 @@ hwloc_distances_obj_index(struct hwloc_distances_s *distances, hwloc_obj_t obj)
|
||||
* The distance from \p obj1 to \p obj2 is stored in the value pointed by
|
||||
* \p value1to2 and reciprocally.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 if object \p obj1 or \p obj2 is not involved in structure \p distances.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
@@ -215,13 +325,87 @@ hwloc_distances_obj_pair_values(struct hwloc_distances_s *distances,
|
||||
|
||||
|
||||
|
||||
/** \defgroup hwlocality_distances_add Add or remove distances between objects
|
||||
/** \defgroup hwlocality_distances_add Add distances between objects
|
||||
*
|
||||
* The usual way to add distances is:
|
||||
* \code
|
||||
* hwloc_distances_add_handle_t handle;
|
||||
* int err = -1;
|
||||
* handle = hwloc_distances_add_create(topology, "name", kind, 0);
|
||||
* if (handle) {
|
||||
* err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
|
||||
* if (!err)
|
||||
* err = hwloc_distances_add_commit(topology, handle, flags);
|
||||
* }
|
||||
* \endcode
|
||||
* If \p err is \c 0 at the end, then addition was successful.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Handle to a new distances structure during its addition to the topology. */
|
||||
typedef void * hwloc_distances_add_handle_t;
|
||||
|
||||
/** \brief Create a new empty distances structure.
|
||||
*
|
||||
* Create an empty distances structure
|
||||
* to be filled with hwloc_distances_add_values()
|
||||
* and then committed with hwloc_distances_add_commit().
|
||||
*
|
||||
* Parameter \p name is optional, it may be \c NULL.
|
||||
* Otherwise, it will be copied internally and may later be freed by the caller.
|
||||
*
|
||||
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
|
||||
* Only one kind of meaning and one kind of provenance may be given if appropriate
|
||||
* (e.g. ::HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH and ::HWLOC_DISTANCES_KIND_FROM_USER).
|
||||
* Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically set
|
||||
* according to objects having different types in hwloc_distances_add_values().
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return A hwloc_distances_add_handle_t that should then be passed
|
||||
* to hwloc_distances_add_values() and hwloc_distances_add_commit().
|
||||
*
|
||||
* \return \c NULL on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_distances_add_handle_t
|
||||
hwloc_distances_add_create(hwloc_topology_t topology,
|
||||
const char *name, unsigned long kind,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Specify the objects and values in a new empty distances structure.
|
||||
*
|
||||
* Specify the objects and values for a new distances structure
|
||||
* that was returned as a handle by hwloc_distances_add_create().
|
||||
* The structure must then be committed with hwloc_distances_add_commit().
|
||||
*
|
||||
* The number of objects is \p nbobjs and the array of objects is \p objs.
|
||||
* Distance values are stored as a one-dimension array in \p values.
|
||||
* The distance from object i to object j is in slot i*nbobjs+j.
|
||||
*
|
||||
* \p nbobjs must be at least 2.
|
||||
*
|
||||
* Arrays \p objs and \p values will be copied internally,
|
||||
* they may later be freed by the caller.
|
||||
*
|
||||
* On error, the temporary distances structure and its content are destroyed.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_values(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
unsigned nbobjs, hwloc_obj_t *objs,
|
||||
hwloc_uint64_t *values,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Flags for adding a new distances to a topology. */
|
||||
enum hwloc_distances_add_flag_e {
|
||||
/** \brief Try to group objects based on the newly provided distance information.
|
||||
* Grouping is only performed when the distances structure contains latencies,
|
||||
* and when all objects are of the same type.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_DISTANCES_ADD_FLAG_GROUP = (1UL<<0),
|
||||
@@ -233,23 +417,33 @@ enum hwloc_distances_add_flag_e {
|
||||
HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE = (1UL<<1)
|
||||
};
|
||||
|
||||
/** \brief Provide a new distance matrix.
|
||||
/** \brief Commit a new distances structure.
|
||||
*
|
||||
* Provide the matrix of distances between a set of objects given by \p nbobjs
|
||||
* and the \p objs array. \p nbobjs must be at least 2.
|
||||
* The distances are stored as a one-dimension array in \p values.
|
||||
* The distance from object i to object j is in slot i*nbobjs+j.
|
||||
* This function finalizes the distances structure and inserts in it the topology.
|
||||
*
|
||||
* \p kind specifies the kind of distance as a OR'ed set of ::hwloc_distances_kind_e.
|
||||
* Kind ::HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES will be automatically added
|
||||
* if objects of different types are given.
|
||||
* Parameter \p handle was previously returned by hwloc_distances_add_create().
|
||||
* Then objects and values were specified with hwloc_distances_add_values().
|
||||
*
|
||||
* \p flags configures the behavior of the function using an optional OR'ed set of
|
||||
* ::hwloc_distances_add_flag_e.
|
||||
* It may be used to request the grouping of existing objects based on distances.
|
||||
*
|
||||
* On error, the temporary distances structure and its content are destroyed.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add_commit(hwloc_topology_t topology,
|
||||
hwloc_distances_add_handle_t handle,
|
||||
unsigned long flags);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
|
||||
/** \defgroup hwlocality_distances_remove Remove distances between objects
|
||||
* @{
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
|
||||
unsigned long kind, unsigned long flags);
|
||||
|
||||
/** \brief Remove all distance matrices from a topology.
|
||||
*
|
||||
@@ -258,18 +452,24 @@ HWLOC_DECLSPEC int hwloc_distances_add(hwloc_topology_t topology,
|
||||
*
|
||||
* If these distances were used to group objects, these additional
|
||||
* Group objects are not removed from the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove(hwloc_topology_t topology);
|
||||
|
||||
/** \brief Remove distance matrices for objects at a specific depth in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth);
|
||||
|
||||
/** \brief Remove distance matrices for objects of a specific type in the topology.
|
||||
*
|
||||
* Identical to hwloc_distances_remove() but only applies to one level of the topology.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
||||
@@ -283,6 +483,8 @@ hwloc_distances_remove_by_type(hwloc_topology_t topology, hwloc_obj_type_t type)
|
||||
/** \brief Release and remove the given distance matrice from the topology.
|
||||
*
|
||||
* This function includes a call to hwloc_distances_release().
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_distances_release_remove(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
||||
|
||||
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
16
src/3rdparty/hwloc/include/hwloc/export.h
vendored
@@ -55,7 +55,7 @@ enum hwloc_topology_export_xml_flags_e {
|
||||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
@@ -91,7 +91,7 @@ HWLOC_DECLSPEC int hwloc_topology_export_xml(hwloc_topology_t topology, const ch
|
||||
*
|
||||
* \p flags is a OR'ed set of ::hwloc_topology_export_xml_flags_e.
|
||||
*
|
||||
* \return -1 if a failure occured.
|
||||
* \return 0 on success, or -1 on error.
|
||||
*
|
||||
* \note See also hwloc_topology_set_userdata_export_callback()
|
||||
* for exporting application-specific object userdata.
|
||||
@@ -145,13 +145,15 @@ HWLOC_DECLSPEC void hwloc_topology_set_userdata_export_callback(hwloc_topology_t
|
||||
* that were given to the export callback.
|
||||
*
|
||||
* Only printable characters may be exported to XML string attributes.
|
||||
* If a non-printable character is passed in \p name or \p buffer,
|
||||
* the function returns -1 with errno set to EINVAL.
|
||||
*
|
||||
* If exporting binary data, the application should first encode into
|
||||
* printable characters only (or use hwloc_export_obj_userdata_base64()).
|
||||
* It should also take care of portability issues if the export may
|
||||
* be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name or \b buffer.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
@@ -165,8 +167,14 @@ HWLOC_DECLSPEC int hwloc_export_obj_userdata(void *reserved, hwloc_topology_t to
|
||||
* This function may only be called from within the export() callback passed
|
||||
* to hwloc_topology_set_userdata_export_callback().
|
||||
*
|
||||
* The name must be made of printable characters for export to XML string attributes.
|
||||
*
|
||||
* The function does not take care of portability issues if the export
|
||||
* may be reimported on a different architecture.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if a non-printable character is
|
||||
* passed in \p name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_export_obj_userdata_base64(void *reserved, hwloc_topology_t topology, hwloc_obj_t obj, const char *name, const void *buffer, size_t length);
|
||||
|
||||
|
||||
16
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
16
src/3rdparty/hwloc/include/hwloc/gl.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2012 Blue Brain Project, EPFL. All rights reserved.
|
||||
* Copyright © 2012-2013 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -39,9 +39,9 @@ extern "C" {
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* OpenGL display given by port and device index.
|
||||
*
|
||||
* Return the OS device object describing the OpenGL display
|
||||
* \return The hwloc OS device object describing the OpenGL display
|
||||
* whose port (server) is \p port and device (screen) is \p device.
|
||||
* Return NULL if there is none.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
@@ -70,9 +70,9 @@ hwloc_gl_get_display_osdev_by_port_device(hwloc_topology_t topology,
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* OpenGL display given by name.
|
||||
*
|
||||
* Return the OS device object describing the OpenGL display
|
||||
* \return The hwloc OS device object describing the OpenGL display
|
||||
* whose name is \p name, built as ":port.device" such as ":0.0" .
|
||||
* Return NULL if there is none.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
@@ -99,9 +99,11 @@ hwloc_gl_get_display_osdev_by_name(hwloc_topology_t topology,
|
||||
/** \brief Get the OpenGL display port and device corresponding
|
||||
* to the given hwloc OS object.
|
||||
*
|
||||
* Return the OpenGL display port (server) in \p port and device (screen)
|
||||
* Retrieves the OpenGL display port (server) in \p port and device (screen)
|
||||
* in \p screen that correspond to the given hwloc OS device object.
|
||||
* Return \c -1 if there is none.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
|
||||
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
13
src/3rdparty/hwloc/include/hwloc/glibc-sched.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -52,6 +52,8 @@ extern "C" {
|
||||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_const_cpuset_t hwlocset,
|
||||
@@ -80,6 +82,9 @@ hwloc_cpuset_to_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute
|
||||
* that takes a cpu_set_t as input parameter.
|
||||
*
|
||||
* \p schedsetsize should be sizeof(cpu_set_t) unless \p schedset was dynamically allocated with CPU_ALLOC
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_cpuset_t hwlocset,
|
||||
@@ -95,7 +100,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
||||
cpu = 0;
|
||||
while (count) {
|
||||
if (CPU_ISSET_S(cpu, schedsetsize, schedset)) {
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
count--;
|
||||
}
|
||||
cpu++;
|
||||
@@ -107,7 +113,8 @@ hwloc_cpuset_from_glibc_sched_affinity(hwloc_topology_t topology __hwloc_attribu
|
||||
assert(schedsetsize == sizeof(cpu_set_t));
|
||||
for(cpu=0; cpu<CPU_SETSIZE; cpu++)
|
||||
if (CPU_ISSET(cpu, schedset))
|
||||
hwloc_bitmap_set(hwlocset, cpu);
|
||||
if (hwloc_bitmap_set(hwlocset, cpu) < 0)
|
||||
return -1;
|
||||
#endif /* !CPU_ZERO_S */
|
||||
return 0;
|
||||
}
|
||||
|
||||
1095
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
1095
src/3rdparty/hwloc/include/hwloc/helper.h
vendored
File diff suppressed because it is too large
Load Diff
136
src/3rdparty/hwloc/include/hwloc/intel-mic.h
vendored
136
src/3rdparty/hwloc/include/hwloc/intel-mic.h
vendored
@@ -1,136 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2013-2016 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Macros to help interaction between hwloc and Intel Xeon Phi (MIC).
|
||||
*
|
||||
* Applications that use both hwloc and Intel Xeon Phi (MIC) may want to
|
||||
* include this file so as to get topology information for MIC devices.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_INTEL_MIC_H
|
||||
#define HWLOC_INTEL_MIC_H
|
||||
|
||||
#include "hwloc.h"
|
||||
#include "hwloc/autogen/config.h"
|
||||
#include "hwloc/helper.h"
|
||||
|
||||
#ifdef HWLOC_LINUX_SYS
|
||||
#include "hwloc/linux.h"
|
||||
|
||||
#include <dirent.h>
|
||||
#include <string.h>
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/** \defgroup hwlocality_intel_mic Interoperability with Intel Xeon Phi (MIC)
|
||||
*
|
||||
* This interface offers ways to retrieve topology information about
|
||||
* Intel Xeon Phi (MIC) devices.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
* close to MIC device whose index is \p idx.
|
||||
*
|
||||
* Return the CPU set describing the locality of the MIC device whose index is \p idx.
|
||||
*
|
||||
* Topology \p topology and device index \p idx must match the local machine.
|
||||
* I/O devices detection is not needed in the topology.
|
||||
*
|
||||
* The function only returns the locality of the device.
|
||||
* If more information about the device is needed, OS objects should
|
||||
* be used instead, see hwloc_intel_mic_get_device_osdev_by_index().
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_intel_mic_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
int idx __hwloc_attribute_unused,
|
||||
hwloc_cpuset_t set)
|
||||
{
|
||||
#ifdef HWLOC_LINUX_SYS
|
||||
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
|
||||
#define HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX 128
|
||||
char path[HWLOC_INTEL_MIC_DEVICE_SYSFS_PATH_MAX];
|
||||
DIR *sysdir = NULL;
|
||||
struct dirent *dirent;
|
||||
unsigned pcibus, pcidev, pcifunc;
|
||||
|
||||
if (!hwloc_topology_is_thissystem(topology)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
sprintf(path, "/sys/class/mic/mic%d", idx);
|
||||
sysdir = opendir(path);
|
||||
if (!sysdir)
|
||||
return -1;
|
||||
|
||||
while ((dirent = readdir(sysdir)) != NULL) {
|
||||
if (sscanf(dirent->d_name, "pci_%02x:%02x.%02x", &pcibus, &pcidev, &pcifunc) == 3) {
|
||||
sprintf(path, "/sys/class/mic/mic%d/pci_%02x:%02x.%02x/local_cpus", idx, pcibus, pcidev, pcifunc);
|
||||
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|
||||
|| hwloc_bitmap_iszero(set))
|
||||
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
closedir(sysdir);
|
||||
#else
|
||||
/* Non-Linux systems simply get a full cpuset */
|
||||
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* MIC device for the given index.
|
||||
*
|
||||
* Return the OS device object describing the MIC device whose index is \p idx.
|
||||
* Return NULL if there is none.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
* I/O devices detection must be enabled in the topology.
|
||||
*
|
||||
* \note The corresponding PCI device object can be obtained by looking
|
||||
* at the OS device parent object.
|
||||
*/
|
||||
static __hwloc_inline hwloc_obj_t
|
||||
hwloc_intel_mic_get_device_osdev_by_index(hwloc_topology_t topology,
|
||||
unsigned idx)
|
||||
{
|
||||
hwloc_obj_t osdev = NULL;
|
||||
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
|
||||
if (HWLOC_OBJ_OSDEV_COPROC == osdev->attr->osdev.type
|
||||
&& osdev->name
|
||||
&& !strncmp("mic", osdev->name, 3)
|
||||
&& atoi(osdev->name + 3) == (int) idx)
|
||||
return osdev;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* HWLOC_INTEL_MIC_H */
|
||||
161
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
Normal file
161
src/3rdparty/hwloc/include/hwloc/levelzero.h
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
/*
|
||||
* Copyright © 2021-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Macros to help interaction between hwloc and the oneAPI Level Zero interface.
|
||||
*
|
||||
* Applications that use both hwloc and Level Zero may want to
|
||||
* include this file so as to get topology information for L0 devices.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_LEVELZERO_H
|
||||
#define HWLOC_LEVELZERO_H
|
||||
|
||||
#include "hwloc.h"
|
||||
#include "hwloc/autogen/config.h"
|
||||
#include "hwloc/helper.h"
|
||||
#ifdef HWLOC_LINUX_SYS
|
||||
#include "hwloc/linux.h"
|
||||
#endif
|
||||
|
||||
#include <level_zero/ze_api.h>
|
||||
#include <level_zero/zes_api.h>
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/** \defgroup hwlocality_levelzero Interoperability with the oneAPI Level Zero interface.
|
||||
*
|
||||
* This interface offers ways to retrieve topology information about
|
||||
* devices managed by the Level Zero API.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
* close to the Level Zero device \p device
|
||||
*
|
||||
* Store in \p set the CPU-set describing the locality of
|
||||
* the Level Zero device \p device.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* The Level Zero library must have been initialized with Sysman enabled
|
||||
* (by calling zesInit(0) if supported,
|
||||
* or by setting ZES_ENABLE_SYSMAN=1 in the environment).
|
||||
* I/O devices detection and the Level Zero component are not needed in the
|
||||
* topology.
|
||||
*
|
||||
* The function only returns the locality of the device.
|
||||
* If more information about the device is needed, OS objects should
|
||||
* be used instead, see hwloc_levelzero_get_device_osdev().
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_levelzero_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
ze_device_handle_t device, hwloc_cpuset_t set)
|
||||
{
|
||||
#ifdef HWLOC_LINUX_SYS
|
||||
/* If we're on Linux, use the sysfs mechanism to get the local cpus */
|
||||
#define HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX 128
|
||||
char path[HWLOC_LEVELZERO_DEVICE_SYSFS_PATH_MAX];
|
||||
zes_pci_properties_t pci;
|
||||
zes_device_handle_t sdevice = device;
|
||||
ze_result_t res;
|
||||
|
||||
if (!hwloc_topology_is_thissystem(topology)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
res = zesDevicePciGetProperties(sdevice, &pci);
|
||||
if (res != ZE_RESULT_SUCCESS) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
sprintf(path, "/sys/bus/pci/devices/%04x:%02x:%02x.%01x/local_cpus",
|
||||
pci.address.domain, pci.address.bus, pci.address.device, pci.address.function);
|
||||
if (hwloc_linux_read_path_as_cpumask(path, set) < 0
|
||||
|| hwloc_bitmap_iszero(set))
|
||||
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||
#else
|
||||
/* Non-Linux systems simply get a full cpuset */
|
||||
hwloc_bitmap_copy(set, hwloc_topology_get_complete_cpuset(topology));
|
||||
#endif
|
||||
return 0;
|
||||
}
|
||||
|
||||
/** \brief Get the hwloc OS device object corresponding to Level Zero device
|
||||
* \p device.
|
||||
*
|
||||
* \return The hwloc OS device object that describes the given Level Zero device \p device.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p dv_ind must match the local machine.
|
||||
* I/O devices detection and the Level Zero component must be enabled in the
|
||||
* topology. If not, the locality of the object may still be found using
|
||||
* hwloc_levelzero_get_device_cpuset().
|
||||
*
|
||||
* \note The corresponding hwloc PCI device may be found by looking
|
||||
* at the result parent pointer (unless PCI devices are filtered out).
|
||||
*/
|
||||
static __hwloc_inline hwloc_obj_t
|
||||
hwloc_levelzero_get_device_osdev(hwloc_topology_t topology, ze_device_handle_t device)
|
||||
{
|
||||
zes_device_handle_t sdevice = device;
|
||||
zes_pci_properties_t pci;
|
||||
ze_result_t res;
|
||||
hwloc_obj_t osdev;
|
||||
|
||||
if (!hwloc_topology_is_thissystem(topology)) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
res = zesDevicePciGetProperties(sdevice, &pci);
|
||||
if (res != ZE_RESULT_SUCCESS) {
|
||||
/* L0 was likely initialized without sysman, don't bother */
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
osdev = NULL;
|
||||
while ((osdev = hwloc_get_next_osdev(topology, osdev)) != NULL) {
|
||||
hwloc_obj_t pcidev = osdev->parent;
|
||||
|
||||
if (strncmp(osdev->name, "ze", 2))
|
||||
continue;
|
||||
|
||||
if (pcidev
|
||||
&& pcidev->type == HWLOC_OBJ_PCI_DEVICE
|
||||
&& pcidev->attr->pcidev.domain == pci.address.domain
|
||||
&& pcidev->attr->pcidev.bus == pci.address.bus
|
||||
&& pcidev->attr->pcidev.dev == pci.address.device
|
||||
&& pcidev->attr->pcidev.func == pci.address.function)
|
||||
return osdev;
|
||||
|
||||
/* FIXME: when we'll have serialnumber, try it in case PCI is filtered-out */
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* HWLOC_LEVELZERO_H */
|
||||
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
34
src/3rdparty/hwloc/include/hwloc/linux-libnuma.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2017 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2012 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -50,6 +50,8 @@ extern "C" {
|
||||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset,
|
||||
@@ -84,6 +86,8 @@ hwloc_cpuset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_cpus
|
||||
* This function may be used before calling set_mempolicy, mbind, migrate_pages
|
||||
* or any other function that takes an array of unsigned long and a maximal
|
||||
* node number as input parameter.
|
||||
*
|
||||
* \return 0.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset,
|
||||
@@ -119,6 +123,9 @@ hwloc_nodeset_to_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_const_nod
|
||||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if failing an internal reallocation.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
@@ -130,7 +137,8 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -142,6 +150,9 @@ hwloc_cpuset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_cpuset_t
|
||||
* This function may be used after calling get_mempolicy or any other function
|
||||
* that takes an array of unsigned long as output parameter (and possibly
|
||||
* a maximal node number as input parameter).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
@@ -153,7 +164,8 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (node->os_index < maxnode
|
||||
&& (mask[node->os_index/sizeof(*mask)/8] & (1UL << (node->os_index % (sizeof(*mask)*8)))))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -184,7 +196,7 @@ hwloc_nodeset_from_linux_libnuma_ulongs(hwloc_topology_t topology, hwloc_nodeset
|
||||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpuset_t cpuset) __hwloc_attribute_malloc;
|
||||
@@ -209,7 +221,7 @@ hwloc_cpuset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_cpu
|
||||
* This function may be used before calling many numa_ functions
|
||||
* that use a struct bitmask as an input parameter.
|
||||
*
|
||||
* \return newly allocated struct bitmask.
|
||||
* \return newly allocated struct bitmask, or \c NULL on error.
|
||||
*/
|
||||
static __hwloc_inline struct bitmask *
|
||||
hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_nodeset_t nodeset) __hwloc_attribute_malloc;
|
||||
@@ -231,6 +243,9 @@ hwloc_nodeset_to_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_const_no
|
||||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
|
||||
@@ -241,7 +256,8 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
||||
hwloc_bitmap_zero(cpuset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_or(cpuset, cpuset, node->cpuset);
|
||||
if (hwloc_bitmap_or(cpuset, cpuset, node->cpuset) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -249,6 +265,9 @@ hwloc_cpuset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_cpuset_
|
||||
*
|
||||
* This function may be used after calling many numa_ functions
|
||||
* that use a struct bitmask as an output parameter.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOMEM if some internal reallocation failed.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodeset_t nodeset,
|
||||
@@ -259,7 +278,8 @@ hwloc_nodeset_from_linux_libnuma_bitmask(hwloc_topology_t topology, hwloc_nodese
|
||||
hwloc_bitmap_zero(nodeset);
|
||||
while ((node = hwloc_get_next_obj_by_depth(topology, depth, node)) != NULL)
|
||||
if (numa_bitmask_isbitset(bitmask, node->os_index))
|
||||
hwloc_bitmap_set(nodeset, node->os_index);
|
||||
if (hwloc_bitmap_set(nodeset, node->os_index) < 0)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
17
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
17
src/3rdparty/hwloc/include/hwloc/linux.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2016 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -38,22 +38,35 @@ extern "C" {
|
||||
* The behavior is exactly the same as the Linux sched_setaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_set_proc_cpubind() with
|
||||
* HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_linux_set_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_const_cpuset_t set);
|
||||
|
||||
/** \brief Get the current binding of thread \p tid
|
||||
*
|
||||
* The CPU-set \p set (previously allocated by the caller)
|
||||
* is filled with the list of PUs which the thread
|
||||
* was last bound to.
|
||||
*
|
||||
* The behavior is exactly the same as the Linux sched_getaffinity system call,
|
||||
* but uses a hwloc cpuset.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_cpubind() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_linux_get_tid_cpubind(hwloc_topology_t topology, pid_t tid, hwloc_cpuset_t set);
|
||||
|
||||
/** \brief Get the last physical CPU where thread \p tid ran.
|
||||
*
|
||||
* The CPU-set \p set (previously allocated by the caller)
|
||||
* is filled with the PU which the thread last ran on.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This is equivalent to calling hwloc_get_proc_last_cpu_location() with
|
||||
* ::HWLOC_CPUBIND_THREAD as flags.
|
||||
@@ -65,6 +78,8 @@ HWLOC_DECLSPEC int hwloc_linux_get_tid_last_cpu_location(hwloc_topology_t topolo
|
||||
* Might be used when reading CPU set from sysfs attributes such as topology
|
||||
* and caches for processors, or local_cpus for devices.
|
||||
*
|
||||
* \return 0 on success, -1 on error.
|
||||
*
|
||||
* \note This function ignores the HWLOC_FSROOT environment variable.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_linux_read_path_as_cpumask(const char *path, hwloc_bitmap_t set);
|
||||
|
||||
355
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
355
src/3rdparty/hwloc/include/hwloc/memattrs.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2019-2020 Inria. All rights reserved.
|
||||
* Copyright © 2019-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -54,6 +54,12 @@ extern "C" {
|
||||
* Attribute values for these nodes, if any, may then be obtained with
|
||||
* hwloc_memattr_get_value() and manually compared with the desired criteria.
|
||||
*
|
||||
* Memory attributes are also used internally to build Memory Tiers which provide
|
||||
* an easy way to distinguish NUMA nodes of different kinds, as explained
|
||||
* in \ref heteromem.
|
||||
*
|
||||
* \sa An example is available in doc/examples/memory-attributes.c in the source tree.
|
||||
*
|
||||
* \note The API also supports specific objects as initiator,
|
||||
* but it is currently not used internally by hwloc.
|
||||
* Users may for instance use it to provide custom performance
|
||||
@@ -63,21 +69,26 @@ extern "C" {
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Memory node attributes. */
|
||||
/** \brief Predefined memory attribute IDs.
|
||||
* See ::hwloc_memattr_id_t for the generic definition of IDs
|
||||
* for predefined or custom attributes.
|
||||
*/
|
||||
enum hwloc_memattr_id_e {
|
||||
/** \brief "Capacity".
|
||||
* The capacity is returned in bytes
|
||||
* (local_memory attribute in objects).
|
||||
/** \brief
|
||||
* The \"Capacity\" is returned in bytes (local_memory attribute in objects).
|
||||
*
|
||||
* Best capacity nodes are nodes with <b>higher capacity</b>.
|
||||
*
|
||||
* No initiator is involved when looking at this attribute.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
|
||||
*
|
||||
* Capacity values may not be modified using hwloc_memattr_set_value().
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_CAPACITY = 0,
|
||||
|
||||
/** \brief "Locality".
|
||||
* The locality is returned as the number of PUs in that locality
|
||||
/** \brief
|
||||
* The \"Locality\" is returned as the number of PUs in that locality
|
||||
* (e.g. the weight of its cpuset).
|
||||
*
|
||||
* Best locality nodes are nodes with <b>smaller locality</b>
|
||||
@@ -87,34 +98,108 @@ enum hwloc_memattr_id_e {
|
||||
*
|
||||
* No initiator is involved when looking at this attribute.
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST.
|
||||
|
||||
* Locality values may not be modified using hwloc_memattr_set_value().
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_LOCALITY = 1,
|
||||
|
||||
/** \brief "Bandwidth".
|
||||
* The bandwidth is returned in MiB/s, as seen from the given initiator location.
|
||||
/** \brief
|
||||
* The \"Bandwidth\" is returned in MiB/s, as seen from the given initiator location.
|
||||
*
|
||||
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
*
|
||||
* This is the average bandwidth for read and write accesses. If the platform
|
||||
* provides individual read and write bandwidths but no explicit average value,
|
||||
* hwloc computes and returns the average.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_BANDWIDTH = 2,
|
||||
|
||||
/** \brief "Latency".
|
||||
* The latency is returned as nanoseconds, as seen from the given initiator location.
|
||||
/** \brief
|
||||
* The \"ReadBandwidth\" is returned in MiB/s, as seen from the given initiator location.
|
||||
*
|
||||
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_READ_BANDWIDTH = 4,
|
||||
|
||||
/** \brief
|
||||
* The \"WriteBandwidth\" is returned in MiB/s, as seen from the given initiator location.
|
||||
*
|
||||
* Best bandwidth nodes are nodes with <b>higher bandwidth</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_WRITE_BANDWIDTH = 5,
|
||||
|
||||
/** \brief
|
||||
* The \"Latency\" is returned as nanoseconds, as seen from the given initiator location.
|
||||
*
|
||||
* Best latency nodes are nodes with <b>smaller latency</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
*
|
||||
* This is the average latency for read and write accesses. If the platform
|
||||
* provides individual read and write latencies but no explicit average value,
|
||||
* hwloc computes and returns the average.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_LATENCY = 3
|
||||
HWLOC_MEMATTR_ID_LATENCY = 3,
|
||||
|
||||
/* TODO read vs write, persistence? */
|
||||
/** \brief
|
||||
* The \"ReadLatency\" is returned as nanoseconds, as seen from the given initiator location.
|
||||
*
|
||||
* Best latency nodes are nodes with <b>smaller latency</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_READ_LATENCY = 6,
|
||||
|
||||
/** \brief
|
||||
* The \"WriteLatency\" is returned as nanoseconds, as seen from the given initiator location.
|
||||
*
|
||||
* Best latency nodes are nodes with <b>smaller latency</b>.
|
||||
*
|
||||
* The corresponding attribute flags are ::HWLOC_MEMATTR_FLAG_LOWER_FIRST
|
||||
* and ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR.
|
||||
* \hideinitializer
|
||||
*/
|
||||
HWLOC_MEMATTR_ID_WRITE_LATENCY = 7,
|
||||
|
||||
/* TODO persistence? */
|
||||
|
||||
HWLOC_MEMATTR_ID_MAX /**< \private
|
||||
* Sentinel value for predefined attributes.
|
||||
* Dynamically registered custom attributes start here.
|
||||
*/
|
||||
};
|
||||
|
||||
/** \brief A memory attribute identifier.
|
||||
* May be either one of ::hwloc_memattr_id_e or a new id returned by hwloc_memattr_register().
|
||||
*
|
||||
* hwloc predefines some commonly-used attributes in ::hwloc_memattr_id_e.
|
||||
* One may then dynamically register custom ones with hwloc_memattr_register(),
|
||||
* they will be assigned IDs immediately after the predefined ones.
|
||||
* See \ref hwlocality_memattrs_manage for more information about
|
||||
* existing attribute IDs.
|
||||
*/
|
||||
typedef unsigned hwloc_memattr_id_t;
|
||||
|
||||
/** \brief Return the identifier of the memory attribute with the given name.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if no such attribute exists.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_by_name(hwloc_topology_t topology,
|
||||
@@ -184,6 +269,8 @@ enum hwloc_local_numanode_flag_e {
|
||||
* or the number of nodes that would have been stored if there were
|
||||
* enough room.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note Some of these NUMA nodes may not have any memory attribute
|
||||
* values and hence not be reported as actual targets in other functions.
|
||||
*
|
||||
@@ -211,8 +298,16 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* \p target_node cannot be \c NULL. If \p attribute is ::HWLOC_MEMATTR_ID_CAPACITY,
|
||||
* \p target_node must be a NUMA node. If it is ::HWLOC_MEMATTR_ID_LOCALITY,
|
||||
* \p target_node must have a CPU set.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance with errno set to \c EINVAL if flags
|
||||
* are invalid or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
@@ -244,7 +339,10 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching targets, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching targets.
|
||||
* \return -1 with errno set to \c EINVAL if flags are invalid,
|
||||
* or no such attribute exists.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
@@ -260,10 +358,6 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
||||
hwloc_obj_t *best_target, hwloc_uint64_t *value);
|
||||
|
||||
/** \brief Return the best initiator for the given attribute and target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* \c -1 is returned and \p errno is set to \c EINVAL.
|
||||
*
|
||||
* If \p value is non \c NULL, the corresponding value is returned there.
|
||||
*
|
||||
@@ -277,96 +371,22 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
||||
* The returned initiator should not be modified or freed,
|
||||
* it belongs to the topology.
|
||||
*
|
||||
* \p target_node cannot be \c NULL.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If there are no matching initiators, \c -1 is returned with \p errno set to \c ENOENT;
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c ENOENT if there are no matching initiators.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR).
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target,
|
||||
hwloc_obj_t target_node,
|
||||
unsigned long flags,
|
||||
struct hwloc_location *best_initiator, hwloc_uint64_t *value);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Return the name of a memory attribute.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_name(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
const char **name);
|
||||
|
||||
/** \brief Return the flags of the given attribute.
|
||||
*
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
unsigned long *flags);
|
||||
|
||||
/** \brief Memory attribute flags.
|
||||
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags().
|
||||
*/
|
||||
enum hwloc_memattr_flag_e {
|
||||
/** \brief The best nodes for this memory attribute are those with the higher values.
|
||||
* For instance Bandwidth.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0),
|
||||
/** \brief The best nodes for this memory attribute are those with the lower values.
|
||||
* For instance Latency.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1),
|
||||
/** \brief The value returned for this memory attribute depends on the given initiator.
|
||||
* For instance Bandwidth and Latency, but not Capacity.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2)
|
||||
};
|
||||
|
||||
/** \brief Register a new memory attribute.
|
||||
*
|
||||
* Add a specific memory attribute that is not defined in ::hwloc_memattr_id_e.
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain at least
|
||||
* one of ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_register(hwloc_topology_t topology,
|
||||
const char *name,
|
||||
unsigned long flags,
|
||||
hwloc_memattr_id_t *id);
|
||||
|
||||
/** \brief Set an attribute value for a specific target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* The initiator will be copied into the topology,
|
||||
* the caller should free anything allocated to store the initiator,
|
||||
* for instance the cpuset.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target_node,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
hwloc_uint64_t value);
|
||||
|
||||
/** \brief Return the target NUMA nodes that have some values for a given attribute.
|
||||
*
|
||||
* Return targets for the given attribute in the \p targets array
|
||||
@@ -397,8 +417,10 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
* values.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when refering to accesses performed by CPU cores.
|
||||
* when referring to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
@@ -408,7 +430,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values);
|
||||
unsigned *nr, hwloc_obj_t *targets, hwloc_uint64_t *values);
|
||||
|
||||
/** \brief Return the initiators that have values for a given attribute for a specific target NUMA node.
|
||||
*
|
||||
@@ -428,12 +450,16 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
|
||||
* The returned initiators should not be modified or freed,
|
||||
* they belong to the topology.
|
||||
*
|
||||
* \p target_node cannot be \c NULL.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* no initiator is returned.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*
|
||||
* \note This function is meant for tools and debugging (listing internal information)
|
||||
* rather than for application queries. Applications should rather select useful
|
||||
* NUMA nodes with hwloc_get_local_numanode_objs() and then look at their attribute
|
||||
@@ -445,6 +471,131 @@ hwloc_memattr_get_initiators(hwloc_topology_t topology,
|
||||
hwloc_obj_t target_node,
|
||||
unsigned long flags,
|
||||
unsigned *nr, struct hwloc_location *initiators, hwloc_uint64_t *values);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
/** \defgroup hwlocality_memattrs_manage Managing memory attributes
|
||||
*
|
||||
* Memory attribues are identified by an ID (::hwloc_memattr_id_t)
|
||||
* and a name. hwloc_memattr_get_name() and hwloc_memattr_get_by_name()
|
||||
* convert between them (or return error if the attribute does not exist).
|
||||
*
|
||||
* The set of valid ::hwloc_memattr_id_t is a contigous set starting at \c 0.
|
||||
* It first contains predefined attributes, as listed
|
||||
* in ::hwloc_memattr_id_e (from \c 0 to \c HWLOC_MEMATTR_ID_MAX-1).
|
||||
* Then custom attributes may be dynamically registered with
|
||||
* hwloc_memattr_register(). They will get the following IDs
|
||||
* (\c HWLOC_MEMATTR_ID_MAX for the first one, etc.).
|
||||
*
|
||||
* To iterate over all valid attributes
|
||||
* (either predefined or dynamically registered custom ones),
|
||||
* one may iterate over IDs starting from \c 0 until hwloc_memattr_get_name()
|
||||
* or hwloc_memattr_get_flags() returns an error.
|
||||
*
|
||||
* The values for an existing attribute or for custom dynamically registered ones
|
||||
* may be set or modified with hwloc_memattr_set_value().
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Return the name of a memory attribute.
|
||||
*
|
||||
* The output pointer \p name cannot be \c NULL.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_name(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
const char **name);
|
||||
|
||||
/** \brief Return the flags of the given attribute.
|
||||
*
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e.
|
||||
*
|
||||
* The output pointer \p flags cannot be \c NULL.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if the attribute does not exist.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_get_flags(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
unsigned long *flags);
|
||||
|
||||
/** \brief Memory attribute flags.
|
||||
* Given to hwloc_memattr_register() and returned by hwloc_memattr_get_flags().
|
||||
*/
|
||||
enum hwloc_memattr_flag_e {
|
||||
/** \brief The best nodes for this memory attribute are those with the higher values.
|
||||
* For instance Bandwidth.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST = (1UL<<0),
|
||||
/** \brief The best nodes for this memory attribute are those with the lower values.
|
||||
* For instance Latency.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST = (1UL<<1),
|
||||
/** \brief The value returned for this memory attribute depends on the given initiator.
|
||||
* For instance Bandwidth and Latency, but not Capacity.
|
||||
*/
|
||||
HWLOC_MEMATTR_FLAG_NEED_INITIATOR = (1UL<<2)
|
||||
};
|
||||
|
||||
/** \brief Register a new memory attribute.
|
||||
*
|
||||
* Add a new custom memory attribute.
|
||||
* Flags are a OR'ed set of ::hwloc_memattr_flag_e. It must contain one of
|
||||
* ::HWLOC_MEMATTR_FLAG_HIGHER_FIRST or ::HWLOC_MEMATTR_FLAG_LOWER_FIRST but not both.
|
||||
*
|
||||
* The new attribute \p id is immediately after the last existing attribute ID
|
||||
* (which is either the ID of the last registered attribute if any,
|
||||
* or the ID of the last predefined attribute in ::hwloc_memattr_id_e).
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EINVAL if an invalid set of flags is given.
|
||||
* \return -1 with errno set to \c EBUSY if another attribute already uses this name.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_register(hwloc_topology_t topology,
|
||||
const char *name,
|
||||
unsigned long flags,
|
||||
hwloc_memattr_id_t *id);
|
||||
|
||||
/** \brief Set an attribute value for a specific target NUMA node.
|
||||
*
|
||||
* If the attribute does not relate to a specific initiator
|
||||
* (it does not have the flag ::HWLOC_MEMATTR_FLAG_NEED_INITIATOR),
|
||||
* location \p initiator is ignored and may be \c NULL.
|
||||
*
|
||||
* The initiator will be copied into the topology,
|
||||
* the caller should free anything allocated to store the initiator,
|
||||
* for instance the cpuset.
|
||||
*
|
||||
* \p target_node cannot be \c NULL.
|
||||
*
|
||||
* \p attribute cannot be ::HWLOC_MEMATTR_FLAG_ID_CAPACITY or
|
||||
* ::HWLOC_MEMATTR_FLAG_ID_LOCALITY.
|
||||
*
|
||||
* \p flags must be \c 0 for now.
|
||||
*
|
||||
* \note The initiator \p initiator should be of type ::HWLOC_LOCATION_TYPE_CPUSET
|
||||
* when referring to accesses performed by CPU cores.
|
||||
* ::HWLOC_LOCATION_TYPE_OBJECT is currently unused internally by hwloc,
|
||||
* but users may for instance use it to provide custom information about
|
||||
* host memory accesses performed by GPUs.
|
||||
*
|
||||
* \return 0 on success or -1 on error.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
hwloc_memattr_id_t attribute,
|
||||
hwloc_obj_t target_node,
|
||||
struct hwloc_location *initiator,
|
||||
unsigned long flags,
|
||||
hwloc_uint64_t value);
|
||||
|
||||
/** @} */
|
||||
|
||||
#ifdef __cplusplus
|
||||
|
||||
15
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
15
src/3rdparty/hwloc/include/hwloc/nvml.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2012-2020 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -39,7 +39,7 @@ extern "C" {
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to NVML device \p device.
|
||||
*
|
||||
* Return the CPU set describing the locality of the NVML device \p device.
|
||||
* Store in \p set the CPU-set describing the locality of the NVML device \p device.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* I/O devices detection and the NVML component are not needed in the topology.
|
||||
@@ -51,6 +51,9 @@ extern "C" {
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -88,8 +91,8 @@ hwloc_nvml_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* NVML device whose index is \p idx.
|
||||
*
|
||||
* Return the OS device object describing the NVML device whose
|
||||
* index is \p idx. Returns NULL if there is none.
|
||||
* \return The hwloc OS device object describing the NVML device whose index is \p idx.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
@@ -114,8 +117,8 @@ hwloc_nvml_get_device_osdev_by_index(hwloc_topology_t topology, unsigned idx)
|
||||
|
||||
/** \brief Get the hwloc OS device object corresponding to NVML device \p device.
|
||||
*
|
||||
* Return the hwloc OS device object that describes the given
|
||||
* NVML device \p device. Return NULL if there is none.
|
||||
* \return The hwloc OS device object that describes the given NVML device \p device.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* I/O devices detection and the NVML component must be enabled in the topology.
|
||||
|
||||
38
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
38
src/3rdparty/hwloc/include/hwloc/opencl.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2012-2021 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright © 2013, 2018 Université Bordeaux. All right reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -41,6 +41,15 @@ extern "C" {
|
||||
*/
|
||||
/* Copyright (c) 2008-2018 The Khronos Group Inc. */
|
||||
|
||||
/* needs "cl_khr_pci_bus_info" device extension, but not strictly required for clGetDeviceInfo() */
|
||||
typedef struct {
|
||||
cl_uint pci_domain;
|
||||
cl_uint pci_bus;
|
||||
cl_uint pci_device;
|
||||
cl_uint pci_function;
|
||||
} hwloc_cl_device_pci_bus_info_khr;
|
||||
#define HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR 0x410F
|
||||
|
||||
/* needs "cl_amd_device_attribute_query" device extension, but not strictly required for clGetDeviceInfo() */
|
||||
#define HWLOC_CL_DEVICE_TOPOLOGY_AMD 0x4037
|
||||
typedef union {
|
||||
@@ -69,15 +78,28 @@ typedef union {
|
||||
/** \brief Return the domain, bus and device IDs of the OpenCL device \p device.
|
||||
*
|
||||
* Device \p device must match the local machine.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
||||
unsigned *domain, unsigned *bus, unsigned *dev, unsigned *func)
|
||||
{
|
||||
hwloc_cl_device_topology_amd amdtopo;
|
||||
hwloc_cl_device_pci_bus_info_khr khrbusinfo;
|
||||
cl_uint nvbus, nvslot, nvdomain;
|
||||
cl_int clret;
|
||||
|
||||
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_PCI_BUS_INFO_KHR, sizeof(khrbusinfo), &khrbusinfo, NULL);
|
||||
if (CL_SUCCESS == clret) {
|
||||
*domain = (unsigned) khrbusinfo.pci_domain;
|
||||
*bus = (unsigned) khrbusinfo.pci_bus;
|
||||
*dev = (unsigned) khrbusinfo.pci_device;
|
||||
*func = (unsigned) khrbusinfo.pci_function;
|
||||
return 0;
|
||||
}
|
||||
|
||||
clret = clGetDeviceInfo(device, HWLOC_CL_DEVICE_TOPOLOGY_AMD, sizeof(amdtopo), &amdtopo, NULL);
|
||||
if (CL_SUCCESS == clret
|
||||
&& HWLOC_CL_DEVICE_TOPOLOGY_TYPE_PCIE_AMD == amdtopo.raw.type) {
|
||||
@@ -113,7 +135,7 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to OpenCL device \p device.
|
||||
*
|
||||
* Return the CPU set describing the locality of the OpenCL device \p device.
|
||||
* Store in \p set the CPU-set describing the locality of the OpenCL device \p device.
|
||||
*
|
||||
* Topology \p topology and device \p device must match the local machine.
|
||||
* I/O devices detection and the OpenCL component are not needed in the topology.
|
||||
@@ -126,6 +148,9 @@ hwloc_opencl_get_device_pci_busid(cl_device_id device,
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux with the AMD or NVIDIA OpenCL implementation; other systems will simply
|
||||
* get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if the device could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -162,10 +187,10 @@ hwloc_opencl_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unuse
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* OpenCL device for the given indexes.
|
||||
*
|
||||
* Return the OS device object describing the OpenCL device
|
||||
* \return The hwloc OS device object describing the OpenCL device
|
||||
* whose platform index is \p platform_index,
|
||||
* and whose device index within this platform if \p device_index.
|
||||
* Return NULL if there is none.
|
||||
* \return \c NULL if there is none.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
@@ -192,8 +217,9 @@ hwloc_opencl_get_device_osdev_by_index(hwloc_topology_t topology,
|
||||
|
||||
/** \brief Get the hwloc OS device object corresponding to OpenCL device \p deviceX.
|
||||
*
|
||||
* Use OpenCL device attributes to find the corresponding hwloc OS device object.
|
||||
* Return NULL if there is none or if useful attributes are not available.
|
||||
* \return The hwloc OS device object corresponding to the given OpenCL device \p device.
|
||||
* \return \c NULL if none could be found, for instance
|
||||
* if required OpenCL attributes are not available.
|
||||
*
|
||||
* This function currently only works on AMD and NVIDIA OpenCL devices that support
|
||||
* relevant OpenCL extensions. hwloc_opencl_get_device_osdev_by_index()
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -44,7 +44,7 @@ extern "C" {
|
||||
/** \brief Get the CPU set of processors that are physically
|
||||
* close to device \p ibdev.
|
||||
*
|
||||
* Return the CPU set describing the locality of the OpenFabrics
|
||||
* Store in \p set the CPU-set describing the locality of the OpenFabrics
|
||||
* device \p ibdev (InfiniBand, etc).
|
||||
*
|
||||
* Topology \p topology and device \p ibdev must match the local machine.
|
||||
@@ -57,6 +57,9 @@ extern "C" {
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -88,10 +91,11 @@ hwloc_ibv_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
|
||||
* device named \p ibname.
|
||||
*
|
||||
* Return the OS device object describing the OpenFabrics device
|
||||
* \return The hwloc OS device object describing the OpenFabrics device
|
||||
* (InfiniBand, Omni-Path, usNIC, etc) whose name is \p ibname
|
||||
* (mlx5_0, hfi1_0, usnic_0, qib0, etc).
|
||||
* Returns NULL if there is none.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The name \p ibname is usually obtained from ibv_get_device_name().
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
@@ -117,8 +121,9 @@ hwloc_ibv_get_device_osdev_by_name(hwloc_topology_t topology,
|
||||
/** \brief Get the hwloc OS device object corresponding to the OpenFabrics
|
||||
* device \p ibdev.
|
||||
*
|
||||
* Return the OS device object describing the OpenFabrics device \p ibdev
|
||||
* (InfiniBand, etc). Returns NULL if there is none.
|
||||
* \return The hwloc OS device object describing the OpenFabrics
|
||||
* device \p ibdev (InfiniBand, etc).
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p ibdev must match the local machine.
|
||||
* I/O devices detection must be enabled in the topology.
|
||||
|
||||
125
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
125
src/3rdparty/hwloc/include/hwloc/plugins.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013-2024 Inria. All rights reserved.
|
||||
* Copyright © 2016 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -27,6 +27,9 @@ struct hwloc_backend;
|
||||
|
||||
|
||||
/** \defgroup hwlocality_disc_components Components and Plugins: Discovery components
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -93,6 +96,9 @@ struct hwloc_disc_component {
|
||||
|
||||
|
||||
/** \defgroup hwlocality_disc_backends Components and Plugins: Discovery backends
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -158,7 +164,7 @@ struct hwloc_disc_status {
|
||||
*/
|
||||
unsigned excluded_phases;
|
||||
|
||||
/** \brief OR'ed set of hwloc_disc_status_flag_e */
|
||||
/** \brief OR'ed set of ::hwloc_disc_status_flag_e */
|
||||
unsigned long flags;
|
||||
};
|
||||
|
||||
@@ -241,6 +247,9 @@ HWLOC_DECLSPEC int hwloc_backend_enable(struct hwloc_backend *backend);
|
||||
|
||||
|
||||
/** \defgroup hwlocality_generic_components Components and Plugins: Generic components
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -310,12 +319,34 @@ struct hwloc_component {
|
||||
|
||||
|
||||
/** \defgroup hwlocality_components_core_funcs Components and Plugins: Core functions to be used by components
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Check whether insertion errors are hidden */
|
||||
/** \brief Check whether error messages are hidden.
|
||||
*
|
||||
* Callers should print critical error messages
|
||||
* (e.g. invalid hw topo info, invalid config)
|
||||
* only if this function returns strictly less than 2.
|
||||
*
|
||||
* Callers should print non-critical error messages
|
||||
* (e.g. failure to initialize CUDA)
|
||||
* if this function returns 0.
|
||||
*
|
||||
* This function return 1 by default (show critical only),
|
||||
* 0 in lstopo (show all),
|
||||
* or anything set in HWLOC_HIDE_ERRORS in the environment.
|
||||
*
|
||||
* Use macros HWLOC_SHOW_CRITICAL_ERRORS() and HWLOC_SHOW_ALL_ERRORS()
|
||||
* for clarity.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_hide_errors(void);
|
||||
|
||||
#define HWLOC_SHOW_CRITICAL_ERRORS() (hwloc_hide_errors() < 2)
|
||||
#define HWLOC_SHOW_ALL_ERRORS() (hwloc_hide_errors() == 0)
|
||||
|
||||
/** \brief Add an object to the topology.
|
||||
*
|
||||
* Insert new object \p obj in the topology starting under existing object \p root
|
||||
@@ -455,6 +486,9 @@ hwloc_plugin_check_namespace(const char *pluginname __hwloc_attribute_unused, co
|
||||
|
||||
|
||||
/** \defgroup hwlocality_components_filtering Components and Plugins: Filtering objects
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -469,9 +503,12 @@ hwloc_filter_check_pcidev_subtype_important(unsigned classid)
|
||||
return (baseclass == 0x03 /* PCI_BASE_CLASS_DISPLAY */
|
||||
|| baseclass == 0x02 /* PCI_BASE_CLASS_NETWORK */
|
||||
|| baseclass == 0x01 /* PCI_BASE_CLASS_STORAGE */
|
||||
|| baseclass == 0x00 /* Unclassified, for Atos/Bull BXI */
|
||||
|| baseclass == 0x0b /* PCI_BASE_CLASS_PROCESSOR */
|
||||
|| classid == 0x0c04 /* PCI_CLASS_SERIAL_FIBER */
|
||||
|| classid == 0x0c06 /* PCI_CLASS_SERIAL_INFINIBAND */
|
||||
|| classid == 0x0502 /* PCI_CLASS_MEMORY_CXL */
|
||||
|| baseclass == 0x06 /* PCI_BASE_CLASS_BRIDGE with non-PCI downstream. the core will drop the useless ones later */
|
||||
|| baseclass == 0x12 /* Processing Accelerators */);
|
||||
}
|
||||
|
||||
@@ -527,6 +564,9 @@ hwloc_filter_check_keep_object(hwloc_topology_t topology, hwloc_obj_t obj)
|
||||
|
||||
|
||||
/** \defgroup hwlocality_components_pcidisc Components and Plugins: helpers for PCI discovery
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
@@ -578,18 +618,89 @@ HWLOC_DECLSPEC int hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, st
|
||||
|
||||
|
||||
/** \defgroup hwlocality_components_pcifind Components and Plugins: finding PCI objects during other discoveries
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Find the normal parent of a PCI bus ID.
|
||||
/** \brief Find the object or a parent of a PCI bus ID.
|
||||
*
|
||||
* Look at PCI affinity to find out where the given PCI bus ID should be attached.
|
||||
* When attaching a new object (typically an OS device) whose locality
|
||||
* is specified by PCI bus ID, this function returns the PCI object
|
||||
* to use as a parent for attaching.
|
||||
*
|
||||
* This function should be used to attach an I/O device under the corresponding
|
||||
* PCI object (if any), or under a normal (non-I/O) object with same locality.
|
||||
* If the exact PCI device with this bus ID exists, it is returned.
|
||||
* Otherwise (for instance if it was filtered out), the function returns
|
||||
* another object with similar locality (for instance a parent bridge,
|
||||
* or the local CPU Package).
|
||||
*/
|
||||
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_parent_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
|
||||
|
||||
/** \brief Find the PCI device or bridge matching a PCI bus ID exactly.
|
||||
*
|
||||
* This is useful for adding specific information about some objects
|
||||
* based on their PCI id. When it comes to attaching objects based on
|
||||
* PCI locality, hwloc_pci_find_parent_by_busid() should be preferred.
|
||||
*/
|
||||
HWLOC_DECLSPEC struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
|
||||
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
|
||||
|
||||
/** \defgroup hwlocality_components_distances Components and Plugins: distances
|
||||
*
|
||||
* \note These structures and functions may change when ::HWLOC_COMPONENT_ABI is modified.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
/** \brief Handle to a new distances structure during its addition to the topology. */
|
||||
typedef void * hwloc_backend_distances_add_handle_t;
|
||||
|
||||
/** \brief Create a new empty distances structure.
|
||||
*
|
||||
* This is identical to hwloc_distances_add_create()
|
||||
* but this variant is designed for backend inserting
|
||||
* distances during topology discovery.
|
||||
*/
|
||||
HWLOC_DECLSPEC hwloc_backend_distances_add_handle_t
|
||||
hwloc_backend_distances_add_create(hwloc_topology_t topology,
|
||||
const char *name, unsigned long kind,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Specify the objects and values in a new empty distances structure.
|
||||
*
|
||||
* This is similar to hwloc_distances_add_values()
|
||||
* but this variant is designed for backend inserting
|
||||
* distances during topology discovery.
|
||||
*
|
||||
* The only semantical difference is that \p objs and \p values
|
||||
* are not duplicated, but directly attached to the topology.
|
||||
* On success, these arrays are given to the core and should not
|
||||
* ever be freed by the caller anymore.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_backend_distances_add_values(hwloc_topology_t topology,
|
||||
hwloc_backend_distances_add_handle_t handle,
|
||||
unsigned nbobjs, hwloc_obj_t *objs,
|
||||
hwloc_uint64_t *values,
|
||||
unsigned long flags);
|
||||
|
||||
/** \brief Commit a new distances structure.
|
||||
*
|
||||
* This is similar to hwloc_distances_add_commit()
|
||||
* but this variant is designed for backend inserting
|
||||
* distances during topology discovery.
|
||||
*/
|
||||
HWLOC_DECLSPEC int
|
||||
hwloc_backend_distances_add_commit(hwloc_topology_t topology,
|
||||
hwloc_backend_distances_add_handle_t handle,
|
||||
unsigned long flags);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
|
||||
55
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
55
src/3rdparty/hwloc/include/hwloc/rename.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -120,6 +120,12 @@ extern "C" {
|
||||
#define HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IS_THISSYSTEM)
|
||||
#define HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES)
|
||||
#define HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT HWLOC_NAME_CAPS(TOPOLOGY_FLAG_IMPORT_SUPPORT)
|
||||
#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING)
|
||||
#define HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING)
|
||||
#define HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING HWLOC_NAME_CAPS(TOPOLOGY_FLAG_DONT_CHANGE_BINDING)
|
||||
#define HWLOC_TOPOLOGY_FLAG_NO_DISTANCES HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_DISTANCES)
|
||||
#define HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_MEMATTRS)
|
||||
#define HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS HWLOC_NAME_CAPS(TOPOLOGY_FLAG_NO_CPUKINDS)
|
||||
|
||||
#define hwloc_topology_set_pid HWLOC_NAME(topology_set_pid)
|
||||
#define hwloc_topology_set_synthetic HWLOC_NAME(topology_set_synthetic)
|
||||
@@ -170,6 +176,7 @@ extern "C" {
|
||||
|
||||
#define hwloc_topology_insert_misc_object HWLOC_NAME(topology_insert_misc_object)
|
||||
#define hwloc_topology_alloc_group_object HWLOC_NAME(topology_alloc_group_object)
|
||||
#define hwloc_topology_free_group_object HWLOC_NAME(topology_free_group_object)
|
||||
#define hwloc_topology_insert_group_object HWLOC_NAME(topology_insert_group_object)
|
||||
#define hwloc_obj_add_other_obj_sets HWLOC_NAME(obj_add_other_obj_sets)
|
||||
#define hwloc_topology_refresh HWLOC_NAME(topology_refresh)
|
||||
@@ -203,6 +210,7 @@ extern "C" {
|
||||
|
||||
#define hwloc_obj_get_info_by_name HWLOC_NAME(obj_get_info_by_name)
|
||||
#define hwloc_obj_add_info HWLOC_NAME(obj_add_info)
|
||||
#define hwloc_obj_set_subtype HWLOC_NAME(obj_set_subtype)
|
||||
|
||||
#define HWLOC_CPUBIND_PROCESS HWLOC_NAME_CAPS(CPUBIND_PROCESS)
|
||||
#define HWLOC_CPUBIND_THREAD HWLOC_NAME_CAPS(CPUBIND_THREAD)
|
||||
@@ -225,6 +233,7 @@ extern "C" {
|
||||
#define HWLOC_MEMBIND_FIRSTTOUCH HWLOC_NAME_CAPS(MEMBIND_FIRSTTOUCH)
|
||||
#define HWLOC_MEMBIND_BIND HWLOC_NAME_CAPS(MEMBIND_BIND)
|
||||
#define HWLOC_MEMBIND_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_INTERLEAVE)
|
||||
#define HWLOC_MEMBIND_WEIGHTED_INTERLEAVE HWLOC_NAME_CAPS(MEMBIND_WEIGHTED_INTERLEAVE)
|
||||
#define HWLOC_MEMBIND_NEXTTOUCH HWLOC_NAME_CAPS(MEMBIND_NEXTTOUCH)
|
||||
#define HWLOC_MEMBIND_MIXED HWLOC_NAME_CAPS(MEMBIND_MIXED)
|
||||
|
||||
@@ -356,6 +365,7 @@ extern "C" {
|
||||
#define hwloc_get_closest_objs HWLOC_NAME(get_closest_objs)
|
||||
#define hwloc_get_obj_below_by_type HWLOC_NAME(get_obj_below_by_type)
|
||||
#define hwloc_get_obj_below_array_by_type HWLOC_NAME(get_obj_below_array_by_type)
|
||||
#define hwloc_get_obj_with_same_locality HWLOC_NAME(get_obj_with_same_locality)
|
||||
#define hwloc_distrib_flags_e HWLOC_NAME(distrib_flags_e)
|
||||
#define HWLOC_DISTRIB_FLAG_REVERSE HWLOC_NAME_CAPS(DISTRIB_FLAG_REVERSE)
|
||||
#define hwloc_distrib HWLOC_NAME(distrib)
|
||||
@@ -377,6 +387,11 @@ extern "C" {
|
||||
#define HWLOC_MEMATTR_ID_LOCALITY HWLOC_NAME_CAPS(MEMATTR_ID_LOCALITY)
|
||||
#define HWLOC_MEMATTR_ID_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_BANDWIDTH)
|
||||
#define HWLOC_MEMATTR_ID_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_LATENCY)
|
||||
#define HWLOC_MEMATTR_ID_READ_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_READ_BANDWIDTH)
|
||||
#define HWLOC_MEMATTR_ID_WRITE_BANDWIDTH HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_BANDWIDTH)
|
||||
#define HWLOC_MEMATTR_ID_READ_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_READ_LATENCY)
|
||||
#define HWLOC_MEMATTR_ID_WRITE_LATENCY HWLOC_NAME_CAPS(MEMATTR_ID_WRITE_LATENCY)
|
||||
#define HWLOC_MEMATTR_ID_MAX HWLOC_NAME_CAPS(MEMATTR_ID_MAX)
|
||||
|
||||
#define hwloc_memattr_id_t HWLOC_NAME(memattr_id_t)
|
||||
#define hwloc_memattr_get_by_name HWLOC_NAME(memattr_get_by_name)
|
||||
@@ -454,11 +469,22 @@ extern "C" {
|
||||
#define hwloc_distances_obj_index HWLOC_NAME(distances_obj_index)
|
||||
#define hwloc_distances_obj_pair_values HWLOC_NAME(distances_pair_values)
|
||||
|
||||
#define hwloc_distances_transform_e HWLOC_NAME(distances_transform_e)
|
||||
#define HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_REMOVE_NULL)
|
||||
#define HWLOC_DISTANCES_TRANSFORM_LINKS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_LINKS)
|
||||
#define HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS)
|
||||
#define HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE HWLOC_NAME_CAPS(DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE)
|
||||
#define hwloc_distances_transform HWLOC_NAME(distances_transform)
|
||||
|
||||
#define hwloc_distances_add_flag_e HWLOC_NAME(distances_add_flag_e)
|
||||
#define HWLOC_DISTANCES_ADD_FLAG_GROUP HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP)
|
||||
#define HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE HWLOC_NAME_CAPS(DISTANCES_ADD_FLAG_GROUP_INACCURATE)
|
||||
|
||||
#define hwloc_distances_add HWLOC_NAME(distances_add)
|
||||
#define hwloc_distances_add_handle_t HWLOC_NAME(distances_add_handle_t)
|
||||
#define hwloc_distances_add_create HWLOC_NAME(distances_add_create)
|
||||
#define hwloc_distances_add_values HWLOC_NAME(distances_add_values)
|
||||
#define hwloc_distances_add_commit HWLOC_NAME(distances_add_commit)
|
||||
|
||||
#define hwloc_distances_remove HWLOC_NAME(distances_remove)
|
||||
#define hwloc_distances_remove_by_depth HWLOC_NAME(distances_remove_by_depth)
|
||||
#define hwloc_distances_remove_by_type HWLOC_NAME(distances_remove_by_type)
|
||||
@@ -523,6 +549,11 @@ extern "C" {
|
||||
#define hwloc_linux_get_tid_last_cpu_location HWLOC_NAME(linux_get_tid_last_cpu_location)
|
||||
#define hwloc_linux_read_path_as_cpumask HWLOC_NAME(linux_read_file_cpumask)
|
||||
|
||||
/* windows.h */
|
||||
|
||||
#define hwloc_windows_get_nr_processor_groups HWLOC_NAME(windows_get_nr_processor_groups)
|
||||
#define hwloc_windows_get_processor_group_cpuset HWLOC_NAME(windows_get_processor_group_cpuset)
|
||||
|
||||
/* openfabrics-verbs.h */
|
||||
|
||||
#define hwloc_ibv_get_device_cpuset HWLOC_NAME(ibv_get_device_cpuset)
|
||||
@@ -531,6 +562,7 @@ extern "C" {
|
||||
|
||||
/* opencl.h */
|
||||
|
||||
#define hwloc_cl_device_pci_bus_info_khr HWLOC_NAME(cl_device_pci_bus_info_khr)
|
||||
#define hwloc_cl_device_topology_amd HWLOC_NAME(cl_device_topology_amd)
|
||||
#define hwloc_opencl_get_device_pci_busid HWLOC_NAME(opencl_get_device_pci_ids)
|
||||
#define hwloc_opencl_get_device_cpuset HWLOC_NAME(opencl_get_device_cpuset)
|
||||
@@ -564,6 +596,11 @@ extern "C" {
|
||||
#define hwloc_rsmi_get_device_osdev HWLOC_NAME(rsmi_get_device_osdev)
|
||||
#define hwloc_rsmi_get_device_osdev_by_index HWLOC_NAME(rsmi_get_device_osdev_by_index)
|
||||
|
||||
/* levelzero.h */
|
||||
|
||||
#define hwloc_levelzero_get_device_cpuset HWLOC_NAME(levelzero_get_device_cpuset)
|
||||
#define hwloc_levelzero_get_device_osdev HWLOC_NAME(levelzero_get_device_osdev)
|
||||
|
||||
/* gl.h */
|
||||
|
||||
#define hwloc_gl_get_display_osdev_by_port_device HWLOC_NAME(gl_get_display_osdev_by_port_device)
|
||||
@@ -620,10 +657,18 @@ extern "C" {
|
||||
#define hwloc_pcidisc_tree_insert_by_busid HWLOC_NAME(pcidisc_tree_insert_by_busid)
|
||||
#define hwloc_pcidisc_tree_attach HWLOC_NAME(pcidisc_tree_attach)
|
||||
|
||||
#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
|
||||
#define hwloc_pci_find_parent_by_busid HWLOC_NAME(pcidisc_find_busid_parent)
|
||||
|
||||
#define hwloc_backend_distances_add_handle_t HWLOC_NAME(backend_distances_add_handle_t)
|
||||
#define hwloc_backend_distances_add_create HWLOC_NAME(backend_distances_add_create)
|
||||
#define hwloc_backend_distances_add_values HWLOC_NAME(backend_distances_add_values)
|
||||
#define hwloc_backend_distances_add_commit HWLOC_NAME(backend_distances_add_commit)
|
||||
|
||||
/* hwloc/deprecated.h */
|
||||
|
||||
#define hwloc_distances_add HWLOC_NAME(distances_add)
|
||||
|
||||
#define hwloc_topology_insert_misc_object_by_parent HWLOC_NAME(topology_insert_misc_object_by_parent)
|
||||
#define hwloc_obj_cpuset_snprintf HWLOC_NAME(obj_cpuset_snprintf)
|
||||
#define hwloc_obj_type_sscanf HWLOC_NAME(obj_type_sscanf)
|
||||
@@ -673,6 +718,8 @@ extern "C" {
|
||||
#define hwloc__obj_type_is_dcache HWLOC_NAME(_obj_type_is_dcache)
|
||||
#define hwloc__obj_type_is_icache HWLOC_NAME(_obj_type_is_icache)
|
||||
|
||||
#define hwloc__pci_link_speed HWLOC_NAME(_pci_link_speed)
|
||||
|
||||
/* private/cpuid-x86.h */
|
||||
|
||||
#define hwloc_have_x86_cpuid HWLOC_NAME(have_x86_cpuid)
|
||||
@@ -733,6 +780,7 @@ extern "C" {
|
||||
|
||||
#define hwloc_cuda_component HWLOC_NAME(cuda_component)
|
||||
#define hwloc_gl_component HWLOC_NAME(gl_component)
|
||||
#define hwloc_levelzero_component HWLOC_NAME(levelzero_component)
|
||||
#define hwloc_nvml_component HWLOC_NAME(nvml_component)
|
||||
#define hwloc_rsmi_component HWLOC_NAME(rsmi_component)
|
||||
#define hwloc_opencl_component HWLOC_NAME(opencl_component)
|
||||
@@ -772,7 +820,6 @@ extern "C" {
|
||||
#define hwloc_pci_discovery_init HWLOC_NAME(pci_discovery_init)
|
||||
#define hwloc_pci_discovery_prepare HWLOC_NAME(pci_discovery_prepare)
|
||||
#define hwloc_pci_discovery_exit HWLOC_NAME(pci_discovery_exit)
|
||||
#define hwloc_pci_find_by_busid HWLOC_NAME(pcidisc_find_by_busid)
|
||||
#define hwloc_find_insert_io_parent_by_complete_cpuset HWLOC_NAME(hwloc_find_insert_io_parent_by_complete_cpuset)
|
||||
|
||||
#define hwloc__add_info HWLOC_NAME(_add_info)
|
||||
@@ -816,7 +863,6 @@ extern "C" {
|
||||
#define hwloc_internal_distances_dup HWLOC_NAME(internal_distances_dup)
|
||||
#define hwloc_internal_distances_refresh HWLOC_NAME(internal_distances_refresh)
|
||||
#define hwloc_internal_distances_destroy HWLOC_NAME(internal_distances_destroy)
|
||||
|
||||
#define hwloc_internal_distances_add HWLOC_NAME(internal_distances_add)
|
||||
#define hwloc_internal_distances_add_by_index HWLOC_NAME(internal_distances_add_by_index)
|
||||
#define hwloc_internal_distances_invalidate_cached_objs HWLOC_NAME(hwloc_internal_distances_invalidate_cached_objs)
|
||||
@@ -830,6 +876,7 @@ extern "C" {
|
||||
#define hwloc_internal_memattrs_destroy HWLOC_NAME(internal_memattrs_destroy)
|
||||
#define hwloc_internal_memattrs_need_refresh HWLOC_NAME(internal_memattrs_need_refresh)
|
||||
#define hwloc_internal_memattrs_refresh HWLOC_NAME(internal_memattrs_refresh)
|
||||
#define hwloc_internal_memattrs_guess_memory_tiers HWLOC_NAME(internal_memattrs_guess_memory_tiers)
|
||||
|
||||
#define hwloc_internal_cpukind_s HWLOC_NAME(internal_cpukind_s)
|
||||
#define hwloc_internal_cpukinds_init HWLOC_NAME(internal_cpukinds_init)
|
||||
|
||||
17
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
17
src/3rdparty/hwloc/include/hwloc/rsmi.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2012-2020 Inria. All rights reserved.
|
||||
* Copyright © 2012-2023 Inria. All rights reserved.
|
||||
* Copyright (c) 2020, Advanced Micro Devices, Inc. All rights reserved.
|
||||
* Written by Advanced Micro Devices,
|
||||
* See COPYING in top-level directory.
|
||||
@@ -41,7 +41,7 @@ extern "C" {
|
||||
/** \brief Get the CPU set of logical processors that are physically
|
||||
* close to AMD GPU device whose index is \p dv_ind.
|
||||
*
|
||||
* Return the CPU set describing the locality of the AMD GPU device
|
||||
* Store in \p set the CPU-set describing the locality of the AMD GPU device
|
||||
* whose index is \p dv_ind.
|
||||
*
|
||||
* Topology \p topology and device \p dv_ind must match the local machine.
|
||||
@@ -55,6 +55,9 @@ extern "C" {
|
||||
*
|
||||
* This function is currently only implemented in a meaningful way for
|
||||
* Linux; other systems will simply get a full cpuset.
|
||||
*
|
||||
* \return 0 on success.
|
||||
* \return -1 on error, for instance if device information could not be found.
|
||||
*/
|
||||
static __hwloc_inline int
|
||||
hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
@@ -96,8 +99,9 @@ hwloc_rsmi_get_device_cpuset(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
/** \brief Get the hwloc OS device object corresponding to the
|
||||
* AMD GPU device whose index is \p dv_ind.
|
||||
*
|
||||
* Return the OS device object describing the AMD GPU device whose
|
||||
* index is \p dv_ind. Returns NULL if there is none.
|
||||
* \return The hwloc OS device object describing the AMD GPU device whose
|
||||
* index is \p dv_ind.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* The topology \p topology does not necessarily have to match the current
|
||||
* machine. For instance the topology may be an XML import of a remote host.
|
||||
@@ -124,8 +128,9 @@ hwloc_rsmi_get_device_osdev_by_index(hwloc_topology_t topology, uint32_t dv_ind)
|
||||
/** \brief Get the hwloc OS device object corresponding to AMD GPU device,
|
||||
* whose index is \p dv_ind.
|
||||
*
|
||||
* Return the hwloc OS device object that describes the given
|
||||
* AMD GPU, whose index is \p dv_ind Return NULL if there is none.
|
||||
* \return The hwloc OS device object that describes the given
|
||||
* AMD GPU, whose index is \p dv_ind.
|
||||
* \return \c NULL if none could be found.
|
||||
*
|
||||
* Topology \p topology and device \p dv_ind must match the local machine.
|
||||
* I/O devices detection and the ROCm SMI component must be enabled in the
|
||||
|
||||
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
17
src/3rdparty/hwloc/include/hwloc/shmem.h
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2013-2018 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -48,6 +48,8 @@ extern "C" {
|
||||
* This length (in bytes) must be used in hwloc_shmem_topology_write()
|
||||
* and hwloc_shmem_topology_adopt() later.
|
||||
*
|
||||
* \return the length, or -1 on error, for instance if flags are invalid.
|
||||
*
|
||||
* \note Flags \p flags are currently unused, must be 0.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
||||
@@ -74,9 +76,10 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_get_length(hwloc_topology_t topology,
|
||||
* is not. However the caller may also allocate it manually in shared memory
|
||||
* to share it as well.
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
@@ -112,14 +115,16 @@ HWLOC_DECLSPEC int hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
*
|
||||
* \note This function takes care of calling hwloc_topology_abi_check().
|
||||
*
|
||||
* \return -1 with errno set to EBUSY if the virtual memory mapping defined
|
||||
* \return 0 on success.
|
||||
*
|
||||
* \return -1 with errno set to \c EBUSY if the virtual memory mapping defined
|
||||
* by \p mmap_address and \p length isn't available in the process.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if \p fileoffset, \p mmap_address
|
||||
* \return -1 with errno set to \c EINVAL if \p fileoffset, \p mmap_address
|
||||
* or \p length aren't page-aligned, or do not match what was given to
|
||||
* hwloc_shmem_topology_write() earlier.
|
||||
*
|
||||
* \return -1 with errno set to EINVAL if the layout of the topology structure
|
||||
* \return -1 with errno set to \c EINVAL if the layout of the topology structure
|
||||
* is different between the writer process and the adopter process.
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
|
||||
76
src/3rdparty/hwloc/include/hwloc/windows.h
vendored
Normal file
76
src/3rdparty/hwloc/include/hwloc/windows.h
vendored
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
* Copyright © 2021 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
/** \file
|
||||
* \brief Macros to help interaction between hwloc and Windows.
|
||||
*
|
||||
* Applications that use hwloc on Windows may want to include this file
|
||||
* for Windows specific hwloc features.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_WINDOWS_H
|
||||
#define HWLOC_WINDOWS_H
|
||||
|
||||
#include "hwloc.h"
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
|
||||
/** \defgroup hwlocality_windows Windows-specific helpers
|
||||
*
|
||||
* These functions query Windows processor groups.
|
||||
* These groups partition the operating system into virtual sets
|
||||
* of up to 64 neighbor PUs.
|
||||
* Threads and processes may only be bound inside a single group.
|
||||
* Although Windows processor groups may be exposed in the hwloc
|
||||
* hierarchy as hwloc Groups, they are also often merged into
|
||||
* existing hwloc objects such as NUMA nodes or Packages.
|
||||
* This API provides explicit information about Windows processor
|
||||
* groups so that applications know whether binding to a large
|
||||
* set of PUs may fail because it spans over multiple Windows
|
||||
* processor groups.
|
||||
*
|
||||
* @{
|
||||
*/
|
||||
|
||||
|
||||
/** \brief Get the number of Windows processor groups
|
||||
*
|
||||
* \p flags must be 0 for now.
|
||||
*
|
||||
* \return at least \c 1 on success.
|
||||
* \return -1 on error, for instance if the topology does not match
|
||||
* the current system (e.g. loaded from another machine through XML).
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_windows_get_nr_processor_groups(hwloc_topology_t topology, unsigned long flags);
|
||||
|
||||
/** \brief Get the CPU-set of a Windows processor group.
|
||||
*
|
||||
* Get the set of PU included in the processor group specified
|
||||
* by \p pg_index.
|
||||
* \p pg_index must be between \c 0 and the value returned
|
||||
* by hwloc_windows_get_nr_processor_groups() minus 1.
|
||||
*
|
||||
* \p flags must be 0 for now.
|
||||
*
|
||||
* \return \c 0 on success.
|
||||
* \return \c -1 on error, for instance if \p pg_index is invalid,
|
||||
* or if the topology does not match the current system (e.g. loaded
|
||||
* from another machine through XML).
|
||||
*/
|
||||
HWLOC_DECLSPEC int hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags);
|
||||
|
||||
/** @} */
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
|
||||
|
||||
#endif /* HWLOC_WINDOWS_H */
|
||||
@@ -17,6 +17,10 @@
|
||||
|
||||
#define HWLOC_HAVE_MSVC_CPUIDEX 1
|
||||
|
||||
/* #undef HAVE_MKSTEMP */
|
||||
|
||||
#define HWLOC_HAVE_X86_CPUID 1
|
||||
|
||||
/* Define to 1 if the system has the type `CACHE_DESCRIPTOR'. */
|
||||
#define HAVE_CACHE_DESCRIPTOR 0
|
||||
|
||||
@@ -128,8 +132,7 @@
|
||||
#define HAVE_DECL__SC_PAGE_SIZE 0
|
||||
|
||||
/* Define to 1 if you have the <dirent.h> header file. */
|
||||
/* #define HAVE_DIRENT_H 1 */
|
||||
#undef HAVE_DIRENT_H
|
||||
/* #undef HAVE_DIRENT_H */
|
||||
|
||||
/* Define to 1 if you have the <dlfcn.h> header file. */
|
||||
/* #undef HAVE_DLFCN_H */
|
||||
@@ -282,7 +285,7 @@
|
||||
#define HAVE_STRING_H 1
|
||||
|
||||
/* Define to 1 if you have the `strncasecmp' function. */
|
||||
#define HAVE_STRNCASECMP 1
|
||||
/* #undef HAVE_STRNCASECMP */
|
||||
|
||||
/* Define to '1' if sysctl is present and usable */
|
||||
/* #undef HAVE_SYSCTL */
|
||||
@@ -290,10 +293,6 @@
|
||||
/* Define to '1' if sysctlbyname is present and usable */
|
||||
/* #undef HAVE_SYSCTLBYNAME */
|
||||
|
||||
/* Define to 1 if the system has the type
|
||||
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION'. */
|
||||
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION 1
|
||||
|
||||
/* Define to 1 if the system has the type
|
||||
`SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX'. */
|
||||
#define HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX 1
|
||||
@@ -327,8 +326,7 @@
|
||||
/* #undef HAVE_UNAME */
|
||||
|
||||
/* Define to 1 if you have the <unistd.h> header file. */
|
||||
/* #define HAVE_UNISTD_H 1 */
|
||||
#undef HAVE_UNISTD_H
|
||||
/* #undef HAVE_UNISTD_H */
|
||||
|
||||
/* Define to 1 if you have the `uselocale' function. */
|
||||
/* #undef HAVE_USELOCALE */
|
||||
@@ -663,7 +661,7 @@
|
||||
#define hwloc_pid_t HANDLE
|
||||
|
||||
/* Define this to either strncasecmp or strncmp */
|
||||
#define hwloc_strncasecmp strncasecmp
|
||||
/* #undef hwloc_strncasecmp */
|
||||
|
||||
/* Define this to the thread ID type */
|
||||
#define hwloc_thread_t HANDLE
|
||||
|
||||
22
src/3rdparty/hwloc/include/private/cpuid-x86.h
vendored
22
src/3rdparty/hwloc/include/private/cpuid-x86.h
vendored
@@ -11,6 +11,22 @@
|
||||
#ifndef HWLOC_PRIVATE_CPUID_X86_H
|
||||
#define HWLOC_PRIVATE_CPUID_X86_H
|
||||
|
||||
/* A macro for annotating memory as uninitialized when building with MSAN
|
||||
* (and otherwise having no effect). See below for why this is used with
|
||||
* our custom assembly.
|
||||
*/
|
||||
#ifdef __has_feature
|
||||
#define HWLOC_HAS_FEATURE(name) __has_feature(name)
|
||||
#else
|
||||
#define HWLOC_HAS_FEATURE(name) 0
|
||||
#endif
|
||||
#if HWLOC_HAS_FEATURE(memory_sanitizer) || defined(MEMORY_SANITIZER)
|
||||
#include <sanitizer/msan_interface.h>
|
||||
#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len) __msan_unpoison(ptr, len)
|
||||
#else
|
||||
#define HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ptr, len)
|
||||
#endif
|
||||
|
||||
#if (defined HWLOC_X86_32_ARCH) && (!defined HWLOC_HAVE_MSVC_CPUIDEX)
|
||||
static __hwloc_inline int hwloc_have_x86_cpuid(void)
|
||||
{
|
||||
@@ -71,12 +87,18 @@ static __hwloc_inline void hwloc_x86_cpuid(unsigned *eax, unsigned *ebx, unsigne
|
||||
"movl %k2,%1\n\t"
|
||||
: "+a" (*eax), "=m" (*ebx), "=&r"(sav_rbx),
|
||||
"+c" (*ecx), "=&d" (*edx));
|
||||
/* MSAN does not recognize the effect of the above assembly on the memory operand
|
||||
* (`"=m"(*ebx)`). This may get improved in MSAN at some point in the future, e.g.
|
||||
* see https://github.com/llvm/llvm-project/pull/77393. */
|
||||
HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx);
|
||||
#elif defined(HWLOC_X86_32_ARCH)
|
||||
__asm__(
|
||||
"mov %%ebx,%1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg %%ebx,%1\n\t"
|
||||
: "+a" (*eax), "=&SD" (*ebx), "+c" (*ecx), "=&d" (*edx));
|
||||
/* See above. */
|
||||
HWLOC_ANNOTATE_MEMORY_IS_INITIALIZED(ebx, sizeof *ebx);
|
||||
#else
|
||||
#error unknown architecture
|
||||
#endif
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2018-2019 Inria. All rights reserved.
|
||||
* Copyright © 2018-2020 Inria. All rights reserved.
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -31,6 +31,7 @@ HWLOC_DECLSPEC extern const struct hwloc_component hwloc_cuda_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_gl_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_nvml_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_rsmi_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_levelzero_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_opencl_component;
|
||||
HWLOC_DECLSPEC extern const struct hwloc_component hwloc_pci_component;
|
||||
|
||||
|
||||
35
src/3rdparty/hwloc/include/private/misc.h
vendored
35
src/3rdparty/hwloc/include/private/misc.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2019 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -504,7 +504,7 @@ hwloc__obj_type_is_icache(hwloc_obj_type_t type)
|
||||
} \
|
||||
} while(0)
|
||||
#else /* HAVE_USELOCALE */
|
||||
#if __HWLOC_HAVE_ATTRIBUTE_UNUSED
|
||||
#if HWLOC_HAVE_ATTRIBUTE_UNUSED
|
||||
#define hwloc_localeswitch_declare int __dummy_nolocale __hwloc_attribute_unused
|
||||
#define hwloc_localeswitch_init()
|
||||
#else
|
||||
@@ -573,4 +573,35 @@ typedef SSIZE_T ssize_t;
|
||||
# endif
|
||||
#endif
|
||||
|
||||
static __inline float
|
||||
hwloc__pci_link_speed(unsigned generation, unsigned lanes)
|
||||
{
|
||||
float lanespeed;
|
||||
/*
|
||||
* These are single-direction bandwidths only.
|
||||
*
|
||||
* Gen1 used NRZ with 8/10 encoding.
|
||||
* PCIe Gen1 = 2.5GT/s signal-rate per lane x 8/10 = 0.25GB/s data-rate per lane
|
||||
* PCIe Gen2 = 5 GT/s signal-rate per lane x 8/10 = 0.5 GB/s data-rate per lane
|
||||
* Gen3 switched to NRZ with 128/130 encoding.
|
||||
* PCIe Gen3 = 8 GT/s signal-rate per lane x 128/130 = 1 GB/s data-rate per lane
|
||||
* PCIe Gen4 = 16 GT/s signal-rate per lane x 128/130 = 2 GB/s data-rate per lane
|
||||
* PCIe Gen5 = 32 GT/s signal-rate per lane x 128/130 = 4 GB/s data-rate per lane
|
||||
* Gen6 switched to PAM with with 242/256 FLIT (242B payload protected by 8B CRC + 6B FEC).
|
||||
* PCIe Gen6 = 64 GT/s signal-rate per lane x 242/256 = 8 GB/s data-rate per lane
|
||||
* PCIe Gen7 = 128GT/s signal-rate per lane x 242/256 = 16 GB/s data-rate per lane
|
||||
*/
|
||||
|
||||
/* lanespeed in Gbit/s */
|
||||
if (generation <= 2)
|
||||
lanespeed = 2.5f * generation * 0.8f;
|
||||
else if (generation <= 5)
|
||||
lanespeed = 8.0f * (1<<(generation-3)) * 128/130;
|
||||
else
|
||||
lanespeed = 8.0f * (1<<(generation-3)) * 242/256; /* assume Gen8 will be 256 GT/s and so on */
|
||||
|
||||
/* linkspeed in GB/s */
|
||||
return lanespeed * lanes / 8;
|
||||
}
|
||||
|
||||
#endif /* HWLOC_PRIVATE_MISC_H */
|
||||
|
||||
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
578
src/3rdparty/hwloc/include/private/netloc.h
vendored
@@ -1,578 +0,0 @@
|
||||
/*
|
||||
* Copyright © 2014 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2013-2014 University of Wisconsin-La Crosse.
|
||||
* All rights reserved.
|
||||
* Copyright © 2015-2017 Inria. All rights reserved.
|
||||
*
|
||||
* $COPYRIGHT$
|
||||
*
|
||||
* Additional copyrights may follow
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
* $HEADER$
|
||||
*/
|
||||
|
||||
#ifndef _NETLOC_PRIVATE_H_
|
||||
#define _NETLOC_PRIVATE_H_
|
||||
|
||||
#include <hwloc.h>
|
||||
#include <netloc.h>
|
||||
#include <netloc/uthash.h>
|
||||
#include <netloc/utarray.h>
|
||||
#include <private/autogen/config.h>
|
||||
|
||||
#define NETLOCFILE_VERSION 1
|
||||
|
||||
#ifdef NETLOC_SCOTCH
|
||||
#include <stdint.h>
|
||||
#include <scotch.h>
|
||||
#define NETLOC_int SCOTCH_Num
|
||||
#else
|
||||
#define NETLOC_int int
|
||||
#endif
|
||||
|
||||
/*
|
||||
* "Import" a few things from hwloc
|
||||
*/
|
||||
#define __netloc_attribute_unused __hwloc_attribute_unused
|
||||
#define __netloc_attribute_malloc __hwloc_attribute_malloc
|
||||
#define __netloc_attribute_const __hwloc_attribute_const
|
||||
#define __netloc_attribute_pure __hwloc_attribute_pure
|
||||
#define __netloc_attribute_deprecated __hwloc_attribute_deprecated
|
||||
#define __netloc_attribute_may_alias __hwloc_attribute_may_alias
|
||||
#define NETLOC_DECLSPEC HWLOC_DECLSPEC
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Types
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Definitions for Comparators
|
||||
* \sa These are the return values from the following functions:
|
||||
* netloc_network_compare, netloc_dt_edge_t_compare, netloc_dt_node_t_compare
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_CMP_SAME = 0, /**< Compared as the Same */
|
||||
NETLOC_CMP_SIMILAR = -1, /**< Compared as Similar, but not the Same */
|
||||
NETLOC_CMP_DIFF = -2 /**< Compared as Different */
|
||||
} netloc_compare_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported networks
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NETWORK_TYPE_ETHERNET = 1, /**< Ethernet network */
|
||||
NETLOC_NETWORK_TYPE_INFINIBAND = 2, /**< InfiniBand network */
|
||||
NETLOC_NETWORK_TYPE_INVALID = 3 /**< Invalid network */
|
||||
} netloc_network_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of supported topologies
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_TOPOLOGY_TYPE_INVALID = -1, /**< Invalid */
|
||||
NETLOC_TOPOLOGY_TYPE_TREE = 1, /**< Tree */
|
||||
} netloc_topology_type_t;
|
||||
|
||||
/**
|
||||
* Enumerated type for the various types of nodes
|
||||
*/
|
||||
typedef enum {
|
||||
NETLOC_NODE_TYPE_HOST = 0, /**< Host (a.k.a., network addressable endpoint - e.g., MAC Address) node */
|
||||
NETLOC_NODE_TYPE_SWITCH = 1, /**< Switch node */
|
||||
NETLOC_NODE_TYPE_INVALID = 2 /**< Invalid node */
|
||||
} netloc_node_type_t;
|
||||
|
||||
typedef enum {
|
||||
NETLOC_ARCH_TREE = 0, /* Fat tree */
|
||||
} netloc_arch_type_t;
|
||||
|
||||
|
||||
/* Pre declarations to avoid inter dependency problems */
|
||||
/** \cond IGNORE */
|
||||
struct netloc_topology_t;
|
||||
typedef struct netloc_topology_t netloc_topology_t;
|
||||
struct netloc_node_t;
|
||||
typedef struct netloc_node_t netloc_node_t;
|
||||
struct netloc_edge_t;
|
||||
typedef struct netloc_edge_t netloc_edge_t;
|
||||
struct netloc_physical_link_t;
|
||||
typedef struct netloc_physical_link_t netloc_physical_link_t;
|
||||
struct netloc_path_t;
|
||||
typedef struct netloc_path_t netloc_path_t;
|
||||
|
||||
struct netloc_arch_tree_t;
|
||||
typedef struct netloc_arch_tree_t netloc_arch_tree_t;
|
||||
struct netloc_arch_node_t;
|
||||
typedef struct netloc_arch_node_t netloc_arch_node_t;
|
||||
struct netloc_arch_node_slot_t;
|
||||
typedef struct netloc_arch_node_slot_t netloc_arch_node_slot_t;
|
||||
struct netloc_arch_t;
|
||||
typedef struct netloc_arch_t netloc_arch_t;
|
||||
/** \endcond */
|
||||
|
||||
/**
|
||||
* \struct netloc_topology_t
|
||||
* \brief Netloc Topology Context
|
||||
*
|
||||
* An opaque data structure used to reference a network topology.
|
||||
*
|
||||
* \note Must be initialized with \ref netloc_topology_construct()
|
||||
*/
|
||||
struct netloc_topology_t {
|
||||
/** Topology path */
|
||||
char *topopath;
|
||||
/** Subnet ID */
|
||||
char *subnet_id;
|
||||
|
||||
/** Node List */
|
||||
netloc_node_t *nodes; /* Hash table of nodes by physical_id */
|
||||
netloc_node_t *nodesByHostname; /* Hash table of nodes by hostname */
|
||||
|
||||
netloc_physical_link_t *physical_links; /* Hash table with physcial links */
|
||||
|
||||
/** Partition List */
|
||||
UT_array *partitions;
|
||||
|
||||
/** Hwloc topology List */
|
||||
char *hwlocpath;
|
||||
UT_array *topos;
|
||||
hwloc_topology_t *hwloc_topos;
|
||||
|
||||
/** Type of the graph */
|
||||
netloc_topology_type_t type;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Node Type
|
||||
*
|
||||
* Represents the concept of a node (a.k.a., vertex, endpoint) within a network
|
||||
* graph. This could be a server or a network switch. The \ref node_type parameter
|
||||
* will distinguish the exact type of node this represents in the graph.
|
||||
*/
|
||||
struct netloc_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable with physical_id */
|
||||
UT_hash_handle hh2; /* makes this structure hashable with hostname */
|
||||
|
||||
/** Physical ID of the node */
|
||||
char physical_id[20];
|
||||
|
||||
/** Logical ID of the node (if any) */
|
||||
int logical_id;
|
||||
|
||||
/** Type of the node */
|
||||
netloc_node_type_t type;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
|
||||
/** Outgoing edges from this node */
|
||||
netloc_edge_t *edges;
|
||||
|
||||
UT_array *subnodes; /* the group of nodes for the virtual nodes */
|
||||
|
||||
netloc_path_t *paths;
|
||||
|
||||
char *hostname;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
hwloc_topology_t hwlocTopo;
|
||||
int hwlocTopoIdx;
|
||||
};
|
||||
|
||||
/**
|
||||
* \brief Netloc Edge Type
|
||||
*
|
||||
* Represents the concept of a directed edge within a network graph.
|
||||
*
|
||||
* \note We do not point to the netloc_node_t structure directly to
|
||||
* simplify the representation, and allow the information to more easily
|
||||
* be entered into the data store without circular references.
|
||||
* \todo JJH Is the note above still true?
|
||||
*/
|
||||
struct netloc_edge_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
netloc_node_t *dest;
|
||||
|
||||
int id;
|
||||
|
||||
/** Pointers to the parent node */
|
||||
netloc_node_t *node;
|
||||
|
||||
/* Pointer to physical_links */
|
||||
UT_array *physical_links;
|
||||
|
||||
/** total gbits of the links */
|
||||
float total_gbits;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
UT_array *subnode_edges; /* for edges going to virtual nodes */
|
||||
|
||||
struct netloc_edge_t *other_way;
|
||||
|
||||
/**
|
||||
* Application-given private data pointer.
|
||||
* Initialized to NULL, and not used by the netloc library.
|
||||
*/
|
||||
void * userdata;
|
||||
};
|
||||
|
||||
|
||||
struct netloc_physical_link_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
|
||||
int id; // TODO long long
|
||||
netloc_node_t *src;
|
||||
netloc_node_t *dest;
|
||||
int ports[2];
|
||||
char *width;
|
||||
char *speed;
|
||||
|
||||
netloc_edge_t *edge;
|
||||
|
||||
int other_way_id;
|
||||
struct netloc_physical_link_t *other_way;
|
||||
|
||||
UT_array *partitions; /* index in the list from the topology */
|
||||
|
||||
/** gbits of the link from speed and width */
|
||||
float gbits;
|
||||
|
||||
/** Description information from discovery (if any) */
|
||||
char *description;
|
||||
};
|
||||
|
||||
struct netloc_path_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char dest_id[20];
|
||||
UT_array *links;
|
||||
};
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture structures
|
||||
**********************************************************************/
|
||||
struct netloc_arch_tree_t {
|
||||
NETLOC_int num_levels;
|
||||
NETLOC_int *degrees;
|
||||
NETLOC_int *cost;
|
||||
};
|
||||
|
||||
struct netloc_arch_node_t {
|
||||
UT_hash_handle hh; /* makes this structure hashable */
|
||||
char *name; /* Hash key */
|
||||
netloc_node_t *node; /* Corresponding node */
|
||||
int idx_in_topo; /* idx with ghost hosts to have complete topo */
|
||||
int num_slots; /* it is not the real number of slots but the maximum slot idx */
|
||||
int *slot_idx; /* corresponding idx in slot_tree */
|
||||
int *slot_os_idx; /* corresponding os index for each leaf in tree */
|
||||
netloc_arch_tree_t *slot_tree; /* Tree built from hwloc */
|
||||
int num_current_slots; /* Number of PUs */
|
||||
NETLOC_int *current_slots; /* indices in the complete tree */
|
||||
int *slot_ranks; /* corresponding MPI rank for each leaf in tree */
|
||||
};
|
||||
|
||||
struct netloc_arch_node_slot_t {
|
||||
netloc_arch_node_t *node;
|
||||
int slot;
|
||||
};
|
||||
|
||||
struct netloc_arch_t {
|
||||
netloc_topology_t *topology;
|
||||
int has_slots; /* if slots are included in the architecture */
|
||||
netloc_arch_type_t type;
|
||||
union {
|
||||
netloc_arch_tree_t *node_tree;
|
||||
netloc_arch_tree_t *global_tree;
|
||||
} arch;
|
||||
netloc_arch_node_t *nodes_by_name;
|
||||
netloc_arch_node_slot_t *node_slot_by_idx; /* node_slot by index in complete topo */
|
||||
NETLOC_int num_current_hosts; /* if has_slots, host is a slot, else host is a node */
|
||||
NETLOC_int *current_hosts; /* indices in the complete topology */
|
||||
};
|
||||
|
||||
/**********************************************************************
|
||||
* Topology Functions
|
||||
**********************************************************************/
|
||||
/**
|
||||
* Allocate a topology handle.
|
||||
*
|
||||
* User is responsible for calling \ref netloc_detach on the topology handle.
|
||||
* The network parameter information is deep copied into the topology handle, so the
|
||||
* user may destruct the network handle after calling this function and/or reuse
|
||||
* the network handle.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
netloc_topology_t *netloc_topology_construct(char *path);
|
||||
|
||||
/**
|
||||
* Destruct a topology handle
|
||||
*
|
||||
* \param topology A valid pointer to a \ref netloc_topology_t handle created
|
||||
* from a prior call to \ref netloc_topology_construct.
|
||||
*
|
||||
* \returns NETLOC_SUCCESS on success
|
||||
* \returns NETLOC_ERROR upon an error.
|
||||
*/
|
||||
int netloc_topology_destruct(netloc_topology_t *topology);
|
||||
|
||||
int netloc_topology_find_partition_idx(netloc_topology_t *topology, char *partition_name);
|
||||
|
||||
int netloc_topology_read_hwloc(netloc_topology_t *topology, int num_nodes,
|
||||
netloc_node_t **node_list);
|
||||
|
||||
#define netloc_topology_iter_partitions(topology,partition) \
|
||||
for ((partition) = (char **)utarray_front(topology->partitions); \
|
||||
(partition) != NULL; \
|
||||
(partition) = (char **)utarray_next(topology->partitions, partition))
|
||||
|
||||
#define netloc_topology_iter_hwloctopos(topology,hwloctopo) \
|
||||
for ((hwloctopo) = (char **)utarray_front(topology->topos); \
|
||||
(hwloctopo) != NULL; \
|
||||
(hwloctopo) = (char **)utarray_next(topology->topos, hwloctopo))
|
||||
|
||||
#define netloc_topology_find_node(topology,node_id,node) \
|
||||
HASH_FIND_STR(topology->nodes, node_id, node)
|
||||
|
||||
#define netloc_topology_iter_nodes(topology,node,_tmp) \
|
||||
HASH_ITER(hh, topology->nodes, node, _tmp)
|
||||
|
||||
#define netloc_topology_num_nodes(topology) \
|
||||
HASH_COUNT(topology->nodes)
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_node_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the network information.
|
||||
*/
|
||||
netloc_node_t *netloc_node_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_node_t
|
||||
*
|
||||
* \param node A valid node handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_node_destruct(netloc_node_t *node);
|
||||
|
||||
char *netloc_node_pretty_print(netloc_node_t* node);
|
||||
|
||||
#define netloc_node_get_num_subnodes(node) \
|
||||
utarray_len((node)->subnodes)
|
||||
|
||||
#define netloc_node_get_subnode(node,i) \
|
||||
(*(netloc_node_t **)utarray_eltptr((node)->subnodes, (i)))
|
||||
|
||||
#define netloc_node_get_num_edges(node) \
|
||||
utarray_len((node)->edges)
|
||||
|
||||
#define netloc_node_get_edge(node,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((node)->edges, (i)))
|
||||
|
||||
#define netloc_node_iter_edges(node,edge,_tmp) \
|
||||
HASH_ITER(hh, node->edges, edge, _tmp)
|
||||
|
||||
#define netloc_node_iter_paths(node,path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
#define netloc_node_is_host(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_HOST)
|
||||
|
||||
#define netloc_node_is_switch(node) \
|
||||
(node->type == NETLOC_NODE_TYPE_SWITCH)
|
||||
|
||||
#define netloc_node_iter_paths(node, path,_tmp) \
|
||||
HASH_ITER(hh, node->paths, path, _tmp)
|
||||
|
||||
int netloc_node_is_in_partition(netloc_node_t *node, int partition);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_edge_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the edge information.
|
||||
*/
|
||||
netloc_edge_t *netloc_edge_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_edge_t
|
||||
*
|
||||
* \param edge A valid edge handle
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_edge_destruct(netloc_edge_t *edge);
|
||||
|
||||
char * netloc_edge_pretty_print(netloc_edge_t* edge);
|
||||
|
||||
void netloc_edge_reset_uid(void);
|
||||
|
||||
int netloc_edge_is_in_partition(netloc_edge_t *edge, int partition);
|
||||
|
||||
#define netloc_edge_get_num_links(edge) \
|
||||
utarray_len((edge)->physical_links)
|
||||
|
||||
#define netloc_edge_get_link(edge,i) \
|
||||
(*(netloc_physical_link_t **)utarray_eltptr((edge)->physical_links, (i)))
|
||||
|
||||
#define netloc_edge_get_num_subedges(edge) \
|
||||
utarray_len((edge)->subnode_edges)
|
||||
|
||||
#define netloc_edge_get_subedge(edge,i) \
|
||||
(*(netloc_edge_t **)utarray_eltptr((edge)->subnode_edges, (i)))
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
/**
|
||||
* Constructor for netloc_physical_link_t
|
||||
*
|
||||
* User is responsible for calling the destructor on the handle.
|
||||
*
|
||||
* Returns
|
||||
* A newly allocated pointer to the physical link information.
|
||||
*/
|
||||
netloc_physical_link_t * netloc_physical_link_construct(void);
|
||||
|
||||
/**
|
||||
* Destructor for netloc_physical_link_t
|
||||
*
|
||||
* Returns
|
||||
* NETLOC_SUCCESS on success
|
||||
* NETLOC_ERROR on error
|
||||
*/
|
||||
int netloc_physical_link_destruct(netloc_physical_link_t *link);
|
||||
|
||||
char * netloc_link_pretty_print(netloc_physical_link_t* link);
|
||||
|
||||
/*************************************************/
|
||||
|
||||
|
||||
netloc_path_t *netloc_path_construct(void);
|
||||
int netloc_path_destruct(netloc_path_t *path);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Architecture functions
|
||||
**********************************************************************/
|
||||
|
||||
netloc_arch_t * netloc_arch_construct(void);
|
||||
|
||||
int netloc_arch_destruct(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_build(netloc_arch_t *arch, int add_slots);
|
||||
|
||||
int netloc_arch_set_current_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_set_global_resources(netloc_arch_t *arch);
|
||||
|
||||
int netloc_arch_node_get_hwloc_info(netloc_arch_node_t *arch);
|
||||
|
||||
void netloc_arch_tree_complete(netloc_arch_tree_t *tree, UT_array **down_degrees_by_level,
|
||||
int num_hosts, int **parch_idx);
|
||||
|
||||
NETLOC_int netloc_arch_tree_num_leaves(netloc_arch_tree_t *tree);
|
||||
|
||||
|
||||
/**********************************************************************
|
||||
* Access functions of various elements of the topology
|
||||
**********************************************************************/
|
||||
|
||||
#define netloc_get_num_partitions(object) \
|
||||
utarray_len((object)->partitions)
|
||||
|
||||
#define netloc_get_partition(object,i) \
|
||||
(*(int *)utarray_eltptr((object)->partitions, (i)))
|
||||
|
||||
|
||||
#define netloc_path_iter_links(path,link) \
|
||||
for ((link) = (netloc_physical_link_t **)utarray_front(path->links); \
|
||||
(link) != NULL; \
|
||||
(link) = (netloc_physical_link_t **)utarray_next(path->links, link))
|
||||
|
||||
/**********************************************************************
|
||||
* Misc functions
|
||||
**********************************************************************/
|
||||
|
||||
/**
|
||||
* Decode the network type
|
||||
*
|
||||
* \param net_type A valid member of the \ref netloc_network_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_network_type_t type
|
||||
*/
|
||||
static inline const char * netloc_network_type_decode(netloc_network_type_t net_type) {
|
||||
if( NETLOC_NETWORK_TYPE_ETHERNET == net_type ) {
|
||||
return "ETH";
|
||||
}
|
||||
else if( NETLOC_NETWORK_TYPE_INFINIBAND == net_type ) {
|
||||
return "IB";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Decode the node type
|
||||
*
|
||||
* \param node_type A valid member of the \ref netloc_node_type_t type
|
||||
*
|
||||
* \returns NULL if the type is invalid
|
||||
* \returns A string for that \ref netloc_node_type_t type
|
||||
*/
|
||||
static inline const char * netloc_node_type_decode(netloc_node_type_t node_type) {
|
||||
if( NETLOC_NODE_TYPE_SWITCH == node_type ) {
|
||||
return "SW";
|
||||
}
|
||||
else if( NETLOC_NODE_TYPE_HOST == node_type ) {
|
||||
return "CA";
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
ssize_t netloc_line_get(char **lineptr, size_t *n, FILE *stream);
|
||||
|
||||
char *netloc_line_get_next_token(char **string, char c);
|
||||
|
||||
int netloc_build_comm_mat(char *filename, int *pn, double ***pmat);
|
||||
|
||||
#define STRDUP_IF_NOT_NULL(str) (NULL == str ? NULL : strdup(str))
|
||||
#define STR_EMPTY_IF_NULL(str) (NULL == str ? "" : str)
|
||||
|
||||
|
||||
#endif // _NETLOC_PRIVATE_H_
|
||||
26
src/3rdparty/hwloc/include/private/private.h
vendored
26
src/3rdparty/hwloc/include/private/private.h
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
*
|
||||
@@ -166,6 +166,7 @@ struct hwloc_topology {
|
||||
unsigned long kind;
|
||||
|
||||
#define HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID (1U<<0) /* if the objs array is valid below */
|
||||
#define HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED (1U<<1) /* if the distances isn't in the list yet */
|
||||
unsigned iflags;
|
||||
|
||||
/* objects are currently stored in physical_index order */
|
||||
@@ -244,6 +245,12 @@ struct hwloc_topology {
|
||||
* temporary variables during discovery
|
||||
*/
|
||||
|
||||
/* set to 1 at the beginning of load() if the filter of any cpu cache type (L1 to L3i) is not NONE,
|
||||
* may be checked by backends before querying caches
|
||||
* (when they don't know the level of caches they are querying).
|
||||
*/
|
||||
int want_some_cpu_caches;
|
||||
|
||||
/* machine-wide memory.
|
||||
* temporarily stored there by OSes that only provide this without NUMA information,
|
||||
* and actually used later by the core.
|
||||
@@ -258,6 +265,7 @@ struct hwloc_topology {
|
||||
unsigned bus_first, bus_last;
|
||||
hwloc_bitmap_t cpuset;
|
||||
} * pci_forced_locality;
|
||||
hwloc_uint64_t pci_locality_quirks;
|
||||
|
||||
/* component blacklisting */
|
||||
unsigned nr_blacklisted_components;
|
||||
@@ -304,11 +312,6 @@ extern void hwloc_pci_discovery_init(struct hwloc_topology *topology);
|
||||
extern void hwloc_pci_discovery_prepare(struct hwloc_topology *topology);
|
||||
extern void hwloc_pci_discovery_exit(struct hwloc_topology *topology);
|
||||
|
||||
/* Look for an object matching the given domain/bus/func,
|
||||
* either exactly or return the smallest container bridge
|
||||
*/
|
||||
extern struct hwloc_obj * hwloc_pci_find_by_busid(struct hwloc_topology *topology, unsigned domain, unsigned bus, unsigned dev, unsigned func);
|
||||
|
||||
/* Look for an object matching complete cpuset exactly, or insert one.
|
||||
* Return NULL on failure.
|
||||
* Return a good fallback (object above) on failure to insert.
|
||||
@@ -408,10 +411,14 @@ extern void hwloc_internal_distances_prepare(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_distances_destroy(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_distances_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
extern void hwloc_internal_distances_refresh(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
|
||||
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
|
||||
extern void hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology);
|
||||
|
||||
/* these distances_add() functions are higher-level than those in hwloc/plugins.h
|
||||
* but they may change in the future, hence they are not exported to plugins.
|
||||
*/
|
||||
extern int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values, unsigned long kind, unsigned long flags);
|
||||
extern int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values, unsigned long kind, unsigned long flags);
|
||||
|
||||
extern void hwloc_internal_memattrs_init(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_prepare(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_destroy(hwloc_topology_t topology);
|
||||
@@ -419,6 +426,7 @@ extern void hwloc_internal_memattrs_need_refresh(hwloc_topology_t topology);
|
||||
extern void hwloc_internal_memattrs_refresh(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_memattrs_dup(hwloc_topology_t new, hwloc_topology_t old);
|
||||
extern int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value);
|
||||
extern int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype);
|
||||
|
||||
extern void hwloc_internal_cpukinds_init(hwloc_topology_t topology);
|
||||
extern int hwloc_internal_cpukinds_rank(hwloc_topology_t topology);
|
||||
@@ -475,11 +483,13 @@ extern char * hwloc_progname(struct hwloc_topology *topology);
|
||||
#define HWLOC_GROUP_KIND_INTEL_DIE 104 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_S390_BOOK 110 /* subkind 0 is book, subkind 1 is drawer (group of books) */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT 120 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_AMD_COMPLEX 121 /* no subkind */
|
||||
/* then, OS-specific groups */
|
||||
#define HWLOC_GROUP_KIND_SOLARIS_PG_HW_PERF 200 /* subkind is group width */
|
||||
#define HWLOC_GROUP_KIND_AIX_SDL_UNKNOWN 210 /* subkind is SDL level */
|
||||
#define HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP 220 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN 221 /* no subkind */
|
||||
#define HWLOC_GROUP_KIND_LINUX_CLUSTER 222 /* no subkind */
|
||||
/* distance groups */
|
||||
#define HWLOC_GROUP_KIND_DISTANCE 900 /* subkind is round of adding these groups during distance based grouping */
|
||||
/* finally, hwloc-specific groups required to insert something else, should disappear as soon as possible */
|
||||
|
||||
30
src/3rdparty/hwloc/include/private/windows.h
vendored
Normal file
30
src/3rdparty/hwloc/include/private/windows.h
vendored
Normal file
@@ -0,0 +1,30 @@
|
||||
/*
|
||||
* Copyright © 2009 Université Bordeaux
|
||||
* Copyright © 2020-2022 Inria. All rights reserved.
|
||||
*
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#ifndef HWLOC_PRIVATE_WINDOWS_H
|
||||
#define HWLOC_PRIVATE_WINDOWS_H
|
||||
|
||||
#ifndef _ANONYMOUS_UNION
|
||||
#ifdef __GNUC__
|
||||
#define _ANONYMOUS_UNION __extension__
|
||||
#else
|
||||
#define _ANONYMOUS_UNION
|
||||
#endif /* __GNUC__ */
|
||||
#endif /* _ANONYMOUS_UNION */
|
||||
|
||||
#ifndef _ANONYMOUS_STRUCT
|
||||
#ifdef __GNUC__
|
||||
#define _ANONYMOUS_STRUCT __extension__
|
||||
#else
|
||||
#define _ANONYMOUS_STRUCT
|
||||
#endif /* __GNUC__ */
|
||||
#endif /* _ANONYMOUS_STRUCT */
|
||||
|
||||
#define DUMMYUNIONNAME
|
||||
#define DUMMYSTRUCTNAME
|
||||
|
||||
#endif /* HWLOC_PRIVATE_WINDOWS_H */
|
||||
8
src/3rdparty/hwloc/include/private/xml.h
vendored
8
src/3rdparty/hwloc/include/private/xml.h
vendored
@@ -19,13 +19,14 @@ HWLOC_DECLSPEC int hwloc__xml_verbose(void);
|
||||
typedef struct hwloc__xml_import_state_s {
|
||||
struct hwloc__xml_import_state_s *parent;
|
||||
|
||||
/* globals shared because the entire stack of states during import */
|
||||
/* globals shared between the entire stack of states during import */
|
||||
struct hwloc_xml_backend_data_s *global;
|
||||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 3 ptrs. nolibxml is 3 ptr + one int.
|
||||
*/
|
||||
char data[32];
|
||||
char data[4 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_import_state_t;
|
||||
|
||||
struct hwloc__xml_imported_v1distances_s {
|
||||
@@ -74,8 +75,9 @@ typedef struct hwloc__xml_export_state_s {
|
||||
|
||||
/* opaque data used to store backend-specific data.
|
||||
* statically allocated to allow stack-allocation by the common code without knowing actual backend needs.
|
||||
* libxml is 1 ptr. nolibxml is 1 ptr + 2 size_t + 3 ints.
|
||||
*/
|
||||
char data[40];
|
||||
char data[6 * SIZEOF_VOID_P];
|
||||
} * hwloc__xml_export_state_t;
|
||||
|
||||
HWLOC_DECLSPEC void hwloc__xml_export_topology(hwloc__xml_export_state_t parentstate, hwloc_topology_t topology, unsigned long flags);
|
||||
|
||||
3
src/3rdparty/hwloc/src/bind.c
vendored
3
src/3rdparty/hwloc/src/bind.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2012 Université Bordeaux
|
||||
* Copyright © 2011-2015 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -287,6 +287,7 @@ static __hwloc_inline int hwloc__check_membind_policy(hwloc_membind_policy_t pol
|
||||
|| policy == HWLOC_MEMBIND_FIRSTTOUCH
|
||||
|| policy == HWLOC_MEMBIND_BIND
|
||||
|| policy == HWLOC_MEMBIND_INTERLEAVE
|
||||
|| policy == HWLOC_MEMBIND_WEIGHTED_INTERLEAVE
|
||||
|| policy == HWLOC_MEMBIND_NEXTTOUCH)
|
||||
return 0;
|
||||
return -1;
|
||||
|
||||
59
src/3rdparty/hwloc/src/bitmap.c
vendored
59
src/3rdparty/hwloc/src/bitmap.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -245,6 +245,7 @@ int hwloc_bitmap_copy(struct hwloc_bitmap_s * dst, const struct hwloc_bitmap_s *
|
||||
/* Strings always use 32bit groups */
|
||||
#define HWLOC_PRIxSUBBITMAP "%08lx"
|
||||
#define HWLOC_BITMAP_SUBSTRING_SIZE 32
|
||||
#define HWLOC_BITMAP_SUBSTRING_FULL_VALUE 0xFFFFFFFFUL
|
||||
#define HWLOC_BITMAP_SUBSTRING_LENGTH (HWLOC_BITMAP_SUBSTRING_SIZE/4)
|
||||
#define HWLOC_BITMAP_STRING_PER_LONG (HWLOC_BITS_PER_LONG/HWLOC_BITMAP_SUBSTRING_SIZE)
|
||||
|
||||
@@ -261,6 +262,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
|
||||
const unsigned long accum_mask = ~0UL;
|
||||
#else /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
|
||||
const unsigned long accum_mask = ((1UL << HWLOC_BITMAP_SUBSTRING_SIZE) - 1) << (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
|
||||
int merge_with_infinite_prefix = 0;
|
||||
#endif /* HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE */
|
||||
|
||||
HWLOC__BITMAP_CHECK(set);
|
||||
@@ -279,6 +281,9 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
|
||||
res = size>0 ? (int)size - 1 : 0;
|
||||
tmp += res;
|
||||
size -= res;
|
||||
#if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE
|
||||
merge_with_infinite_prefix = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
i=(int) set->ulongs_count-1;
|
||||
@@ -294,16 +299,24 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
|
||||
}
|
||||
|
||||
while (i>=0 || accumed) {
|
||||
unsigned long value;
|
||||
|
||||
/* Refill accumulator */
|
||||
if (!accumed) {
|
||||
accum = set->ulongs[i--];
|
||||
accumed = HWLOC_BITS_PER_LONG;
|
||||
}
|
||||
value = (accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE);
|
||||
|
||||
if (accum & accum_mask) {
|
||||
#if HWLOC_BITS_PER_LONG > HWLOC_BITMAP_SUBSTRING_SIZE
|
||||
if (merge_with_infinite_prefix && value == HWLOC_BITMAP_SUBSTRING_FULL_VALUE) {
|
||||
/* first full subbitmap merged with infinite prefix */
|
||||
res = 0;
|
||||
} else
|
||||
#endif
|
||||
if (value) {
|
||||
/* print the whole subset if not empty */
|
||||
res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP,
|
||||
(accum & accum_mask) >> (HWLOC_BITS_PER_LONG - HWLOC_BITMAP_SUBSTRING_SIZE));
|
||||
res = hwloc_snprintf(tmp, size, needcomma ? ",0x" HWLOC_PRIxSUBBITMAP : "0x" HWLOC_PRIxSUBBITMAP, value);
|
||||
needcomma = 1;
|
||||
} else if (i == -1 && accumed == HWLOC_BITMAP_SUBSTRING_SIZE) {
|
||||
/* print a single 0 to mark the last subset */
|
||||
@@ -323,6 +336,7 @@ int hwloc_bitmap_snprintf(char * __hwloc_restrict buf, size_t buflen, const stru
|
||||
#else
|
||||
accum <<= HWLOC_BITMAP_SUBSTRING_SIZE;
|
||||
accumed -= HWLOC_BITMAP_SUBSTRING_SIZE;
|
||||
merge_with_infinite_prefix = 0;
|
||||
#endif
|
||||
|
||||
if (res >= size)
|
||||
@@ -362,7 +376,8 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric
|
||||
{
|
||||
const char * current = string;
|
||||
unsigned long accum = 0;
|
||||
int count=0;
|
||||
int count = 0;
|
||||
int ulongcount;
|
||||
int infinite = 0;
|
||||
|
||||
/* count how many substrings there are */
|
||||
@@ -383,9 +398,20 @@ int hwloc_bitmap_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc_restric
|
||||
count--;
|
||||
}
|
||||
|
||||
if (hwloc_bitmap_reset_by_ulongs(set, (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG) < 0)
|
||||
ulongcount = (count + HWLOC_BITMAP_STRING_PER_LONG - 1) / HWLOC_BITMAP_STRING_PER_LONG;
|
||||
if (hwloc_bitmap_reset_by_ulongs(set, ulongcount) < 0)
|
||||
return -1;
|
||||
set->infinite = 0;
|
||||
|
||||
set->infinite = 0; /* will be updated later */
|
||||
|
||||
#if HWLOC_BITS_PER_LONG != HWLOC_BITMAP_SUBSTRING_SIZE
|
||||
if (infinite && (count % HWLOC_BITMAP_STRING_PER_LONG) != 0) {
|
||||
/* accumulate substrings of the first ulong that are hidden in the infinite prefix */
|
||||
int i;
|
||||
for(i = (count % HWLOC_BITMAP_STRING_PER_LONG); i < HWLOC_BITMAP_STRING_PER_LONG; i++)
|
||||
accum |= (HWLOC_BITMAP_SUBSTRING_FULL_VALUE << (i*HWLOC_BITMAP_SUBSTRING_SIZE));
|
||||
}
|
||||
#endif
|
||||
|
||||
while (*current != '\0') {
|
||||
unsigned long val;
|
||||
@@ -544,6 +570,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
|
||||
ssize_t size = buflen;
|
||||
char *tmp = buf;
|
||||
int res, ret = 0;
|
||||
#if HWLOC_BITS_PER_LONG == 64
|
||||
int merge_with_infinite_prefix = 0;
|
||||
#endif
|
||||
int started = 0;
|
||||
int i;
|
||||
|
||||
@@ -563,6 +592,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
|
||||
res = size>0 ? (int)size - 1 : 0;
|
||||
tmp += res;
|
||||
size -= res;
|
||||
#if HWLOC_BITS_PER_LONG == 64
|
||||
merge_with_infinite_prefix = 1;
|
||||
#endif
|
||||
}
|
||||
|
||||
i=set->ulongs_count-1;
|
||||
@@ -582,7 +614,11 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
|
||||
if (started) {
|
||||
/* print the whole subset */
|
||||
#if HWLOC_BITS_PER_LONG == 64
|
||||
res = hwloc_snprintf(tmp, size, "%016lx", val);
|
||||
if (merge_with_infinite_prefix && (val & 0xffffffff00000000UL) == 0xffffffff00000000UL) {
|
||||
res = hwloc_snprintf(tmp, size, "%08lx", val & 0xffffffffUL);
|
||||
} else {
|
||||
res = hwloc_snprintf(tmp, size, "%016lx", val);
|
||||
}
|
||||
#else
|
||||
res = hwloc_snprintf(tmp, size, "%08lx", val);
|
||||
#endif
|
||||
@@ -599,6 +635,9 @@ int hwloc_bitmap_taskset_snprintf(char * __hwloc_restrict buf, size_t buflen, co
|
||||
res = size>0 ? (int)size - 1 : 0;
|
||||
tmp += res;
|
||||
size -= res;
|
||||
#if HWLOC_BITS_PER_LONG == 64
|
||||
merge_with_infinite_prefix = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
/* if didn't display anything, display 0x0 */
|
||||
@@ -679,6 +718,10 @@ int hwloc_bitmap_taskset_sscanf(struct hwloc_bitmap_s *set, const char * __hwloc
|
||||
goto failed;
|
||||
|
||||
set->ulongs[count-1] = val;
|
||||
if (infinite && tmpchars != HWLOC_BITS_PER_LONG/4) {
|
||||
/* infinite prefix with partial substring, fill remaining bits */
|
||||
set->ulongs[count-1] |= (~0ULL)<<(4*tmpchars);
|
||||
}
|
||||
|
||||
current += tmpchars;
|
||||
chars -= tmpchars;
|
||||
|
||||
96
src/3rdparty/hwloc/src/components.c
vendored
96
src/3rdparty/hwloc/src/components.c
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2022 Inria. All rights reserved.
|
||||
* Copyright © 2012 Université Bordeaux
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
@@ -94,8 +94,7 @@ static hwloc_dlhandle hwloc_dlopenext(const char *_filename)
|
||||
{
|
||||
hwloc_dlhandle handle;
|
||||
char *filename = NULL;
|
||||
(void) asprintf(&filename, "%s.so", _filename);
|
||||
if (!filename)
|
||||
if (asprintf(&filename, "%s.so", _filename) < 0)
|
||||
return NULL;
|
||||
handle = dlopen(filename, RTLD_NOW|RTLD_LOCAL);
|
||||
free(filename);
|
||||
@@ -124,7 +123,7 @@ hwloc_dlforeachfile(const char *_paths,
|
||||
*colon = '\0';
|
||||
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, " Looking under %s\n", path);
|
||||
fprintf(stderr, "hwloc: Looking under %s\n", path);
|
||||
|
||||
dir = opendir(path);
|
||||
if (!dir)
|
||||
@@ -198,7 +197,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
char *componentsymbolname;
|
||||
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin dlforeach found `%s'\n", filename);
|
||||
fprintf(stderr, "hwloc: Plugin dlforeach found `%s'\n", filename);
|
||||
|
||||
basename = strrchr(filename, '/');
|
||||
if (!basename)
|
||||
@@ -208,7 +207,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
|
||||
if (hwloc_plugins_blacklist && strstr(hwloc_plugins_blacklist, basename)) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin `%s' is blacklisted in the environment\n", basename);
|
||||
fprintf(stderr, "hwloc: Plugin `%s' is blacklisted in the environment\n", basename);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -216,14 +215,14 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
handle = hwloc_dlopenext(filename);
|
||||
if (!handle) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Failed to load plugin: %s\n", hwloc_dlerror());
|
||||
fprintf(stderr, "hwloc: Failed to load plugin: %s\n", hwloc_dlerror());
|
||||
goto out;
|
||||
}
|
||||
|
||||
componentsymbolname = malloc(strlen(basename)+10+1);
|
||||
if (!componentsymbolname) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Failed to allocation component `%s' symbol\n",
|
||||
fprintf(stderr, "hwloc: Failed to allocation component `%s' symbol\n",
|
||||
basename);
|
||||
goto out_with_handle;
|
||||
}
|
||||
@@ -231,38 +230,38 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
component = hwloc_dlsym(handle, componentsymbolname);
|
||||
if (!component) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Failed to find component symbol `%s'\n",
|
||||
fprintf(stderr, "hwloc: Failed to find component symbol `%s'\n",
|
||||
componentsymbolname);
|
||||
free(componentsymbolname);
|
||||
goto out_with_handle;
|
||||
}
|
||||
if (component->abi != HWLOC_COMPONENT_ABI) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin symbol ABI %u instead of %d\n",
|
||||
fprintf(stderr, "hwloc: Plugin symbol ABI %u instead of %d\n",
|
||||
component->abi, HWLOC_COMPONENT_ABI);
|
||||
free(componentsymbolname);
|
||||
goto out_with_handle;
|
||||
}
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin contains expected symbol `%s'\n",
|
||||
fprintf(stderr, "hwloc: Plugin contains expected symbol `%s'\n",
|
||||
componentsymbolname);
|
||||
free(componentsymbolname);
|
||||
|
||||
if (HWLOC_COMPONENT_TYPE_DISC == component->type) {
|
||||
if (strncmp(basename, "hwloc_", 6)) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin name `%s' doesn't match its type DISCOVERY\n", basename);
|
||||
fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type DISCOVERY\n", basename);
|
||||
goto out_with_handle;
|
||||
}
|
||||
} else if (HWLOC_COMPONENT_TYPE_XML == component->type) {
|
||||
if (strncmp(basename, "hwloc_xml_", 10)) {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin name `%s' doesn't match its type XML\n", basename);
|
||||
fprintf(stderr, "hwloc: Plugin name `%s' doesn't match its type XML\n", basename);
|
||||
goto out_with_handle;
|
||||
}
|
||||
} else {
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin name `%s' has invalid type %u\n",
|
||||
fprintf(stderr, "hwloc: Plugin name `%s' has invalid type %u\n",
|
||||
basename, (unsigned) component->type);
|
||||
goto out_with_handle;
|
||||
}
|
||||
@@ -277,7 +276,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
desc->handle = handle;
|
||||
desc->next = NULL;
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin descriptor `%s' ready\n", basename);
|
||||
fprintf(stderr, "hwloc: Plugin descriptor `%s' ready\n", basename);
|
||||
|
||||
/* append to the list */
|
||||
prevdesc = &hwloc_plugins;
|
||||
@@ -285,7 +284,7 @@ hwloc__dlforeach_cb(const char *filename, void *_data __hwloc_attribute_unused)
|
||||
prevdesc = &((*prevdesc)->next);
|
||||
*prevdesc = desc;
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Plugin descriptor `%s' queued\n", basename);
|
||||
fprintf(stderr, "hwloc: Plugin descriptor `%s' queued\n", basename);
|
||||
return 0;
|
||||
|
||||
out_with_handle:
|
||||
@@ -300,7 +299,7 @@ hwloc_plugins_exit(void)
|
||||
struct hwloc__plugin_desc *desc, *next;
|
||||
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Closing all plugins\n");
|
||||
fprintf(stderr, "hwloc: Closing all plugins\n");
|
||||
|
||||
desc = hwloc_plugins;
|
||||
while (desc) {
|
||||
@@ -340,7 +339,7 @@ hwloc_plugins_init(void)
|
||||
hwloc_plugins = NULL;
|
||||
|
||||
if (hwloc_plugins_verbose)
|
||||
fprintf(stderr, "Starting plugin dlforeach in %s\n", path);
|
||||
fprintf(stderr, "hwloc: Starting plugin dlforeach in %s\n", path);
|
||||
err = hwloc_dlforeachfile(path, hwloc__dlforeach_cb, NULL);
|
||||
if (err)
|
||||
goto out_with_init;
|
||||
@@ -364,14 +363,14 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
|
||||
/* check that the component name is valid */
|
||||
if (!strcmp(component->name, HWLOC_COMPONENT_STOP_NAME)) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n");
|
||||
fprintf(stderr, "hwloc: Cannot register discovery component with reserved name `" HWLOC_COMPONENT_STOP_NAME "'\n");
|
||||
return -1;
|
||||
}
|
||||
if (strchr(component->name, HWLOC_COMPONENT_EXCLUDE_CHAR)
|
||||
|| strchr(component->name, HWLOC_COMPONENT_PHASESEP_CHAR)
|
||||
|| strcspn(component->name, HWLOC_COMPONENT_SEPS) != strlen(component->name)) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
|
||||
fprintf(stderr, "hwloc: Cannot register discovery component with name `%s' containing reserved characters `%c" HWLOC_COMPONENT_SEPS "'\n",
|
||||
component->name, HWLOC_COMPONENT_EXCLUDE_CHAR);
|
||||
return -1;
|
||||
}
|
||||
@@ -386,8 +385,9 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
|
||||
|HWLOC_DISC_PHASE_MISC
|
||||
|HWLOC_DISC_PHASE_ANNOTATE
|
||||
|HWLOC_DISC_PHASE_TWEAK))) {
|
||||
fprintf(stderr, "Cannot register discovery component `%s' with invalid phases 0x%x\n",
|
||||
component->name, component->phases);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Cannot register discovery component `%s' with invalid phases 0x%x\n",
|
||||
component->name, component->phases);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -398,13 +398,13 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
|
||||
if ((*prev)->priority < component->priority) {
|
||||
/* drop the existing component */
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Dropping previously registered discovery component `%s', priority %u lower than new one %u\n",
|
||||
fprintf(stderr, "hwloc: Dropping previously registered discovery component `%s', priority %u lower than new one %u\n",
|
||||
(*prev)->name, (*prev)->priority, component->priority);
|
||||
*prev = (*prev)->next;
|
||||
} else {
|
||||
/* drop the new one */
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n",
|
||||
fprintf(stderr, "hwloc: Ignoring new discovery component `%s', priority %u lower than previously registered one %u\n",
|
||||
component->name, component->priority, (*prev)->priority);
|
||||
return -1;
|
||||
}
|
||||
@@ -412,7 +412,7 @@ hwloc_disc_component_register(struct hwloc_disc_component *component,
|
||||
prev = &((*prev)->next);
|
||||
}
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n",
|
||||
fprintf(stderr, "hwloc: Registered discovery component `%s' phases 0x%x with priority %u (%s%s)\n",
|
||||
component->name, component->phases, component->priority,
|
||||
filename ? "from plugin " : "statically build", filename ? filename : "");
|
||||
|
||||
@@ -475,15 +475,16 @@ hwloc_components_init(void)
|
||||
/* hwloc_static_components is created by configure in static-components.h */
|
||||
for(i=0; NULL != hwloc_static_components[i]; i++) {
|
||||
if (hwloc_static_components[i]->flags) {
|
||||
fprintf(stderr, "Ignoring static component with invalid flags %lx\n",
|
||||
hwloc_static_components[i]->flags);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Ignoring static component with invalid flags %lx\n",
|
||||
hwloc_static_components[i]->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* initialize the component */
|
||||
if (hwloc_static_components[i]->init && hwloc_static_components[i]->init(0) < 0) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Ignoring static component, failed to initialize\n");
|
||||
fprintf(stderr, "hwloc: Ignoring static component, failed to initialize\n");
|
||||
continue;
|
||||
}
|
||||
/* queue ->finalize() callback if any */
|
||||
@@ -503,15 +504,16 @@ hwloc_components_init(void)
|
||||
#ifdef HWLOC_HAVE_PLUGINS
|
||||
for(desc = hwloc_plugins; NULL != desc; desc = desc->next) {
|
||||
if (desc->component->flags) {
|
||||
fprintf(stderr, "Ignoring plugin `%s' component with invalid flags %lx\n",
|
||||
desc->name, desc->component->flags);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Ignoring plugin `%s' component with invalid flags %lx\n",
|
||||
desc->name, desc->component->flags);
|
||||
continue;
|
||||
}
|
||||
|
||||
/* initialize the component */
|
||||
if (desc->component->init && desc->component->init(0) < 0) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Ignoring plugin `%s', failed to initialize\n", desc->name);
|
||||
fprintf(stderr, "hwloc: Ignoring plugin `%s', failed to initialize\n", desc->name);
|
||||
continue;
|
||||
}
|
||||
/* queue ->finalize() callback if any */
|
||||
@@ -608,7 +610,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
|
||||
/* replace linuxpci and linuxio with linux (with IO phases)
|
||||
* for backward compatibility with pre-v2.0 and v2.0 respectively */
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name);
|
||||
fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' IO phases in blacklisting\n", name);
|
||||
comp = hwloc_disc_component_find("linux", NULL);
|
||||
phases = HWLOC_DISC_PHASE_PCI | HWLOC_DISC_PHASE_IO | HWLOC_DISC_PHASE_MISC | HWLOC_DISC_PHASE_ANNOTATE;
|
||||
|
||||
@@ -624,7 +626,7 @@ hwloc_disc_component_blacklist_one(struct hwloc_topology *topology,
|
||||
}
|
||||
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Blacklisting component `%s` phases 0x%x\n", comp->name, phases);
|
||||
fprintf(stderr, "hwloc: Blacklisting component `%s` phases 0x%x\n", comp->name, phases);
|
||||
|
||||
for(i=0; i<topology->nr_blacklisted_components; i++) {
|
||||
if (topology->blacklisted_components[i].component == comp) {
|
||||
@@ -727,7 +729,7 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology,
|
||||
if (hwloc_components_verbose)
|
||||
/* do not warn if envvar_forced since system-wide HWLOC_COMPONENTS must be silently ignored after set_xml() etc.
|
||||
*/
|
||||
fprintf(stderr, "Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n",
|
||||
fprintf(stderr, "hwloc: Excluding discovery component `%s' phases 0x%x, conflicts with excludes 0x%x\n",
|
||||
comp->name, comp->phases, topology->backend_excluded_phases);
|
||||
return -1;
|
||||
}
|
||||
@@ -735,8 +737,8 @@ hwloc_disc_component_try_enable(struct hwloc_topology *topology,
|
||||
backend = comp->instantiate(topology, comp, topology->backend_excluded_phases | blacklisted_phases,
|
||||
NULL, NULL, NULL);
|
||||
if (!backend) {
|
||||
if (hwloc_components_verbose || envvar_forced)
|
||||
fprintf(stderr, "Failed to instantiate discovery component `%s'\n", comp->name);
|
||||
if (hwloc_components_verbose || (envvar_forced && HWLOC_SHOW_CRITICAL_ERRORS()))
|
||||
fprintf(stderr, "hwloc: Failed to instantiate discovery component `%s'\n", comp->name);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -817,7 +819,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
|
||||
name = curenv;
|
||||
if (!strcmp(name, "linuxpci") || !strcmp(name, "linuxio")) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Replacing deprecated component `%s' with `linux' in envvar forcing\n", name);
|
||||
fprintf(stderr, "hwloc: Replacing deprecated component `%s' with `linux' in envvar forcing\n", name);
|
||||
name = "linux";
|
||||
}
|
||||
|
||||
@@ -832,7 +834,8 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
|
||||
if (comp->phases & ~blacklisted_phases)
|
||||
hwloc_disc_component_try_enable(topology, comp, 1 /* envvar forced */, blacklisted_phases);
|
||||
} else {
|
||||
fprintf(stderr, "Cannot find discovery component `%s'\n", name);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Cannot find discovery component `%s'\n", name);
|
||||
}
|
||||
|
||||
/* restore chars (the second loop below needs env to be unmodified) */
|
||||
@@ -864,7 +867,7 @@ hwloc_disc_components_enable_others(struct hwloc_topology *topology)
|
||||
|
||||
if (!(comp->phases & ~blacklisted_phases)) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Excluding blacklisted discovery component `%s' phases 0x%x\n",
|
||||
fprintf(stderr, "hwloc: Excluding blacklisted discovery component `%s' phases 0x%x\n",
|
||||
comp->name, comp->phases);
|
||||
goto nextcomp;
|
||||
}
|
||||
@@ -879,7 +882,7 @@ nextcomp:
|
||||
/* print a summary */
|
||||
int first = 1;
|
||||
backend = topology->backends;
|
||||
fprintf(stderr, "Final list of enabled discovery components: ");
|
||||
fprintf(stderr, "hwloc: Final list of enabled discovery components: ");
|
||||
while (backend != NULL) {
|
||||
fprintf(stderr, "%s%s(0x%x)", first ? "" : ",", backend->component->name, backend->phases);
|
||||
backend = backend->next;
|
||||
@@ -935,7 +938,7 @@ hwloc_backend_alloc(struct hwloc_topology *topology,
|
||||
/* filter-out component phases that are excluded */
|
||||
backend->phases = component->phases & ~topology->backend_excluded_phases;
|
||||
if (backend->phases != component->phases && hwloc_components_verbose)
|
||||
fprintf(stderr, "Trying discovery component `%s' with phases 0x%x instead of 0x%x\n",
|
||||
fprintf(stderr, "hwloc: Trying discovery component `%s' with phases 0x%x instead of 0x%x\n",
|
||||
component->name, backend->phases, component->phases);
|
||||
backend->flags = 0;
|
||||
backend->discover = NULL;
|
||||
@@ -963,8 +966,9 @@ hwloc_backend_enable(struct hwloc_backend *backend)
|
||||
|
||||
/* check backend flags */
|
||||
if (backend->flags) {
|
||||
fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n",
|
||||
backend->component->name, backend->component->phases, backend->flags);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x with unknown flags %lx\n",
|
||||
backend->component->name, backend->component->phases, backend->flags);
|
||||
return -1;
|
||||
}
|
||||
|
||||
@@ -973,7 +977,7 @@ hwloc_backend_enable(struct hwloc_backend *backend)
|
||||
while (NULL != *pprev) {
|
||||
if ((*pprev)->component == backend->component) {
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Cannot enable discovery component `%s' phases 0x%x twice\n",
|
||||
fprintf(stderr, "hwloc: Cannot enable discovery component `%s' phases 0x%x twice\n",
|
||||
backend->component->name, backend->component->phases);
|
||||
hwloc_backend_disable(backend);
|
||||
errno = EBUSY;
|
||||
@@ -983,7 +987,7 @@ hwloc_backend_enable(struct hwloc_backend *backend)
|
||||
}
|
||||
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n",
|
||||
fprintf(stderr, "hwloc: Enabling discovery component `%s' with phases 0x%x (among 0x%x)\n",
|
||||
backend->component->name, backend->phases, backend->component->phases);
|
||||
|
||||
/* enqueue at the end */
|
||||
@@ -1067,7 +1071,7 @@ hwloc_backends_disable_all(struct hwloc_topology *topology)
|
||||
while (NULL != (backend = topology->backends)) {
|
||||
struct hwloc_backend *next = backend->next;
|
||||
if (hwloc_components_verbose)
|
||||
fprintf(stderr, "Disabling discovery component `%s'\n",
|
||||
fprintf(stderr, "hwloc: Disabling discovery component `%s'\n",
|
||||
backend->component->name);
|
||||
hwloc_backend_disable(backend);
|
||||
topology->backends = next;
|
||||
|
||||
53
src/3rdparty/hwloc/src/cpukinds.c
vendored
53
src/3rdparty/hwloc/src/cpukinds.c
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2020-2021 Inria. All rights reserved.
|
||||
* Copyright © 2020-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -42,11 +42,15 @@ hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old)
|
||||
struct hwloc_internal_cpukind_s *kinds;
|
||||
unsigned i;
|
||||
|
||||
if (!old->nr_cpukinds)
|
||||
return 0;
|
||||
|
||||
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
|
||||
if (!kinds)
|
||||
return -1;
|
||||
new->cpukinds = kinds;
|
||||
new->nr_cpukinds = old->nr_cpukinds;
|
||||
new->nr_cpukinds_allocated = old->nr_cpukinds;
|
||||
memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds));
|
||||
|
||||
for(i=0;i<old->nr_cpukinds; i++) {
|
||||
@@ -343,7 +347,8 @@ enum hwloc_cpukinds_ranking {
|
||||
HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */
|
||||
HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */
|
||||
HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY,
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY,
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, /* either coretype or frequency or both */
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT, /* both coretype and frequency are required */
|
||||
HWLOC_CPUKINDS_RANKING_CORETYPE,
|
||||
HWLOC_CPUKINDS_RANKING_FREQUENCY,
|
||||
HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX,
|
||||
@@ -358,9 +363,9 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n");
|
||||
/* we need intel_core_type + (base or max freq) for all kinds */
|
||||
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype+frequency_strict...\n");
|
||||
/* we need intel_core_type AND (base or max freq) for all kinds */
|
||||
if (!summary->have_intel_core_type
|
||||
|| (!summary->have_max_freq && !summary->have_base_freq))
|
||||
return -1;
|
||||
@@ -373,6 +378,21 @@ hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n");
|
||||
/* we need intel_core_type OR (base or max freq) for all kinds */
|
||||
if (!summary->have_intel_core_type
|
||||
&& (!summary->have_max_freq && !summary->have_base_freq))
|
||||
return -1;
|
||||
/* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */
|
||||
for(i=0; i<topology->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
|
||||
if (summary->have_base_freq)
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq;
|
||||
else
|
||||
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
|
||||
}
|
||||
|
||||
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) {
|
||||
hwloc_debug("Trying to rank cpukinds by coretype...\n");
|
||||
/* we need intel_core_type */
|
||||
@@ -429,7 +449,9 @@ static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b
|
||||
{
|
||||
const struct hwloc_internal_cpukind_s *a = _a;
|
||||
const struct hwloc_internal_cpukind_s *b = _b;
|
||||
return a->ranking_value - b->ranking_value;
|
||||
uint64_t arv = a->ranking_value;
|
||||
uint64_t brv = b->ranking_value;
|
||||
return arv < brv ? -1 : arv > brv ? 1 : 0;
|
||||
}
|
||||
|
||||
/* this function requires ranking values to be unique */
|
||||
@@ -469,6 +491,8 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_NONE;
|
||||
else if (!strcmp(env, "coretype+frequency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
|
||||
else if (!strcmp(env, "coretype+frequency_strict"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY_STRICT;
|
||||
else if (!strcmp(env, "coretype"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE;
|
||||
else if (!strcmp(env, "frequency"))
|
||||
@@ -481,16 +505,14 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY;
|
||||
else if (!strcmp(env, "no_forced_efficiency"))
|
||||
heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY;
|
||||
else if (!hwloc_hide_errors())
|
||||
fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env);
|
||||
else if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env);
|
||||
}
|
||||
|
||||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT
|
||||
|| heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
|
||||
/* default is forced_efficiency first */
|
||||
struct hwloc_cpukinds_info_summary summary;
|
||||
enum hwloc_cpukinds_ranking subheuristics;
|
||||
const char *arch;
|
||||
|
||||
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT)
|
||||
hwloc_debug("Using default ranking strategy...\n");
|
||||
@@ -508,16 +530,7 @@ hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
|
||||
goto failed;
|
||||
hwloc__cpukinds_summarize_info(topology, &summary);
|
||||
|
||||
arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture");
|
||||
/* TODO: rather coretype_frequency only on x86/Intel? */
|
||||
if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5)))
|
||||
/* then frequency on ARM */
|
||||
subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
|
||||
else
|
||||
/* or coretype+frequency otherwise */
|
||||
subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
|
||||
|
||||
err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary);
|
||||
err = hwloc__cpukinds_try_rank_by_info(topology, HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY, &summary);
|
||||
free(summary.summaries);
|
||||
if (!err)
|
||||
goto ready;
|
||||
|
||||
28
src/3rdparty/hwloc/src/diff.c
vendored
28
src/3rdparty/hwloc/src/diff.c
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2013-2020 Inria. All rights reserved.
|
||||
* Copyright © 2013-2023 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -218,7 +218,7 @@ hwloc_diff_trees(hwloc_topology_t topo1, hwloc_obj_t obj1,
|
||||
struct hwloc_info_s *info1 = &obj1->infos[i], *info2 = &obj2->infos[i];
|
||||
if (strcmp(info1->name, info2->name))
|
||||
goto out_too_complex;
|
||||
if (strcmp(obj1->infos[i].value, obj2->infos[i].value)) {
|
||||
if (strcmp(info1->value, info2->value)) {
|
||||
err = hwloc_append_diff_obj_attr_string(obj1,
|
||||
HWLOC_TOPOLOGY_DIFF_OBJ_ATTR_INFO,
|
||||
info1->name,
|
||||
@@ -411,6 +411,30 @@ int hwloc_topology_diff_build(hwloc_topology_t topo1,
|
||||
}
|
||||
}
|
||||
|
||||
if (!err) {
|
||||
/* cpukinds */
|
||||
if (topo1->nr_cpukinds != topo2->nr_cpukinds)
|
||||
goto roottoocomplex;
|
||||
for(i=0; i<topo1->nr_cpukinds; i++) {
|
||||
struct hwloc_internal_cpukind_s *ic1 = &topo1->cpukinds[i];
|
||||
struct hwloc_internal_cpukind_s *ic2 = &topo2->cpukinds[i];
|
||||
unsigned j;
|
||||
if (!hwloc_bitmap_isequal(ic1->cpuset, ic2->cpuset)
|
||||
|| ic1->efficiency != ic2->efficiency
|
||||
|| ic1->forced_efficiency != ic2->forced_efficiency
|
||||
|| ic1->ranking_value != ic2->ranking_value
|
||||
|| ic1->nr_infos != ic2->nr_infos)
|
||||
goto roottoocomplex;
|
||||
for(j=0; j<ic1->nr_infos; j++) {
|
||||
struct hwloc_info_s *info1 = &ic1->infos[j], *info2 = &ic2->infos[j];
|
||||
if (strcmp(info1->name, info2->name)
|
||||
|| strcmp(info1->value, info2->value)) {
|
||||
goto roottoocomplex;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return err;
|
||||
|
||||
roottoocomplex:
|
||||
|
||||
740
src/3rdparty/hwloc/src/distances.c
vendored
740
src/3rdparty/hwloc/src/distances.c
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2010-2020 Inria. All rights reserved.
|
||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
||||
* Copyright © 2011-2012 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -17,6 +17,37 @@
|
||||
static struct hwloc_internal_distances_s *
|
||||
hwloc__internal_distances_from_public(hwloc_topology_t topology, struct hwloc_distances_s *distances);
|
||||
|
||||
static void
|
||||
hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck);
|
||||
|
||||
static void
|
||||
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
|
||||
uint64_t *indexes,
|
||||
hwloc_obj_type_t *different_types,
|
||||
uint64_t *values,
|
||||
unsigned nbobjs, unsigned disappeared);
|
||||
|
||||
static void
|
||||
hwloc_internal_distances_print_matrix(struct hwloc_internal_distances_s *dist)
|
||||
{
|
||||
unsigned nbobjs = dist->nbobjs;
|
||||
hwloc_obj_t *objs = dist->objs;
|
||||
hwloc_uint64_t *values = dist->values;
|
||||
int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type);
|
||||
unsigned i, j;
|
||||
|
||||
fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
|
||||
fprintf(stderr, "\n");
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Global init, prepare, destroy, dup
|
||||
*/
|
||||
@@ -244,27 +275,33 @@ int hwloc_distances_release_remove(hwloc_topology_t topology,
|
||||
return 0;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Add distances to the topology
|
||||
/*********************************************************
|
||||
* Backend functions for adding distances to the topology
|
||||
*/
|
||||
|
||||
/* cancel a distances handle. only needed internally for now */
|
||||
static void
|
||||
hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck);
|
||||
hwloc_backend_distances_add__cancel(struct hwloc_internal_distances_s *dist)
|
||||
{
|
||||
/* everything is set to NULL in hwloc_backend_distances_add_create() */
|
||||
free(dist->name);
|
||||
free(dist->indexes);
|
||||
free(dist->objs);
|
||||
free(dist->different_types);
|
||||
free(dist->values);
|
||||
free(dist);
|
||||
}
|
||||
|
||||
/* insert a distance matrix in the topology.
|
||||
* the caller gives us the distances and objs pointers, we'll free them later.
|
||||
/* prepare a distances handle for later commit in the topology.
|
||||
* we duplicate the caller's name.
|
||||
*/
|
||||
static int
|
||||
hwloc_internal_distances__add(hwloc_topology_t topology, const char *name,
|
||||
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
|
||||
unsigned long kind, unsigned iflags)
|
||||
hwloc_backend_distances_add_handle_t
|
||||
hwloc_backend_distances_add_create(hwloc_topology_t topology,
|
||||
const char *name, unsigned long kind, unsigned long flags)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist;
|
||||
|
||||
if (different_types) {
|
||||
kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES; /* the user isn't forced to give it */
|
||||
} else if (kind & HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES) {
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
@@ -273,110 +310,54 @@ hwloc_internal_distances__add(hwloc_topology_t topology, const char *name,
|
||||
if (!dist)
|
||||
goto err;
|
||||
|
||||
if (name)
|
||||
if (name) {
|
||||
dist->name = strdup(name); /* ignore failure */
|
||||
|
||||
dist->unique_type = unique_type;
|
||||
dist->different_types = different_types;
|
||||
dist->nbobjs = nbobjs;
|
||||
dist->kind = kind;
|
||||
dist->iflags = iflags;
|
||||
|
||||
assert(!!(iflags & HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID) == !!objs);
|
||||
|
||||
if (!objs) {
|
||||
assert(indexes);
|
||||
/* we only have indexes, we'll refresh objs from there */
|
||||
dist->indexes = indexes;
|
||||
dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
|
||||
if (!dist->objs)
|
||||
if (!dist->name)
|
||||
goto err_with_dist;
|
||||
|
||||
} else {
|
||||
unsigned i;
|
||||
assert(!indexes);
|
||||
/* we only have objs, generate the indexes arrays so that we can refresh objs later */
|
||||
dist->objs = objs;
|
||||
dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
|
||||
if (!dist->indexes)
|
||||
goto err_with_dist;
|
||||
if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->os_index;
|
||||
} else {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->gp_index;
|
||||
}
|
||||
}
|
||||
|
||||
dist->values = values;
|
||||
dist->kind = kind;
|
||||
dist->iflags = HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED;
|
||||
|
||||
dist->unique_type = HWLOC_OBJ_TYPE_NONE;
|
||||
dist->different_types = NULL;
|
||||
dist->nbobjs = 0;
|
||||
dist->indexes = NULL;
|
||||
dist->objs = NULL;
|
||||
dist->values = NULL;
|
||||
|
||||
dist->id = topology->next_dist_id++;
|
||||
|
||||
if (topology->last_dist)
|
||||
topology->last_dist->next = dist;
|
||||
else
|
||||
topology->first_dist = dist;
|
||||
dist->prev = topology->last_dist;
|
||||
dist->next = NULL;
|
||||
topology->last_dist = dist;
|
||||
return 0;
|
||||
return dist;
|
||||
|
||||
err_with_dist:
|
||||
if (name)
|
||||
free(dist->name);
|
||||
free(dist);
|
||||
hwloc_backend_distances_add__cancel(dist);
|
||||
err:
|
||||
free(different_types);
|
||||
free(objs);
|
||||
free(indexes);
|
||||
free(values);
|
||||
return -1;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name,
|
||||
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
/* attach objects and values to a distances handle.
|
||||
* on success, objs and values arrays are attached and will be freed with the distances.
|
||||
* on failure, the handle is freed.
|
||||
*/
|
||||
int
|
||||
hwloc_backend_distances_add_values(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
hwloc_backend_distances_add_handle_t handle,
|
||||
unsigned nbobjs, hwloc_obj_t *objs,
|
||||
hwloc_uint64_t *values,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned iflags = 0; /* objs not valid */
|
||||
|
||||
if (nbobjs < 2) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* cannot group without objects,
|
||||
* and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
|
||||
*/
|
||||
if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, NULL, indexes, values, kind, iflags);
|
||||
|
||||
err:
|
||||
free(indexes);
|
||||
free(values);
|
||||
free(different_types);
|
||||
return -1;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
|
||||
uint64_t *indexes,
|
||||
uint64_t *values,
|
||||
unsigned nbobjs, unsigned disappeared);
|
||||
|
||||
int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
hwloc_obj_type_t unique_type, *different_types;
|
||||
struct hwloc_internal_distances_s *dist = handle;
|
||||
hwloc_obj_type_t unique_type, *different_types = NULL;
|
||||
hwloc_uint64_t *indexes = NULL;
|
||||
unsigned i, disappeared = 0;
|
||||
unsigned iflags = HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
|
||||
|
||||
if (nbobjs < 2) {
|
||||
if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
|
||||
/* target distances is already set */
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (flags || nbobjs < 2 || !objs || !values) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
@@ -389,15 +370,18 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
/* some objects are NULL */
|
||||
if (disappeared == nbobjs) {
|
||||
/* nothing left, drop the matrix */
|
||||
free(objs);
|
||||
free(values);
|
||||
return 0;
|
||||
errno = ENOENT;
|
||||
goto err;
|
||||
}
|
||||
/* restrict the matrix */
|
||||
hwloc_internal_distances_restrict(objs, NULL, values, nbobjs, disappeared);
|
||||
hwloc_internal_distances_restrict(objs, NULL, NULL, values, nbobjs, disappeared);
|
||||
nbobjs -= disappeared;
|
||||
}
|
||||
|
||||
indexes = malloc(nbobjs * sizeof(*indexes));
|
||||
if (!indexes)
|
||||
goto err;
|
||||
|
||||
unique_type = objs[0]->type;
|
||||
for(i=1; i<nbobjs; i++)
|
||||
if (objs[i]->type != unique_type) {
|
||||
@@ -408,16 +392,108 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
/* heterogeneous types */
|
||||
different_types = malloc(nbobjs * sizeof(*different_types));
|
||||
if (!different_types)
|
||||
goto err;
|
||||
goto err_with_indexes;
|
||||
for(i=0; i<nbobjs; i++)
|
||||
different_types[i] = objs[i]->type;
|
||||
|
||||
} else {
|
||||
/* homogeneous types */
|
||||
different_types = NULL;
|
||||
}
|
||||
|
||||
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !different_types) {
|
||||
dist->nbobjs = nbobjs;
|
||||
dist->objs = objs;
|
||||
dist->iflags |= HWLOC_INTERNAL_DIST_FLAG_OBJS_VALID;
|
||||
dist->indexes = indexes;
|
||||
dist->unique_type = unique_type;
|
||||
dist->different_types = different_types;
|
||||
dist->values = values;
|
||||
|
||||
if (different_types)
|
||||
dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
|
||||
|
||||
if (HWLOC_DIST_TYPE_USE_OS_INDEX(dist->unique_type)) {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->os_index;
|
||||
} else {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->gp_index;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
err_with_indexes:
|
||||
free(indexes);
|
||||
err:
|
||||
hwloc_backend_distances_add__cancel(dist);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* attach objects and values to a distance handle.
|
||||
* on success, objs and values arrays are attached and will be freed with the distances.
|
||||
* on failure, the handle is freed.
|
||||
*/
|
||||
static int
|
||||
hwloc_backend_distances_add_values_by_index(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
hwloc_backend_distances_add_handle_t handle,
|
||||
unsigned nbobjs, hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, hwloc_uint64_t *indexes,
|
||||
hwloc_uint64_t *values)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist = handle;
|
||||
hwloc_obj_t *objs;
|
||||
|
||||
if (dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
|
||||
/* target distances is already set */
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
if (nbobjs < 2 || !indexes || !values || (unique_type == HWLOC_OBJ_TYPE_NONE && !different_types)) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
objs = malloc(nbobjs * sizeof(*objs));
|
||||
if (!objs)
|
||||
goto err;
|
||||
|
||||
dist->nbobjs = nbobjs;
|
||||
dist->objs = objs;
|
||||
dist->indexes = indexes;
|
||||
dist->unique_type = unique_type;
|
||||
dist->different_types = different_types;
|
||||
dist->values = values;
|
||||
|
||||
if (different_types)
|
||||
dist->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
hwloc_backend_distances_add__cancel(dist);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* commit a distances handle.
|
||||
* on failure, the handle is freed with its objects and values arrays.
|
||||
*/
|
||||
int
|
||||
hwloc_backend_distances_add_commit(hwloc_topology_t topology,
|
||||
hwloc_backend_distances_add_handle_t handle,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist = handle;
|
||||
|
||||
if (!dist->nbobjs || !(dist->iflags & HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED)) {
|
||||
/* target distances not ready for commit */
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if ((flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->objs) {
|
||||
/* cannot group without objects,
|
||||
* and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
|
||||
*/
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) && !dist->different_types) {
|
||||
float full_accuracy = 0.f;
|
||||
float *accuracies;
|
||||
unsigned nbaccuracies;
|
||||
@@ -431,26 +507,94 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
}
|
||||
|
||||
if (topology->grouping_verbose) {
|
||||
unsigned j;
|
||||
int gp = !HWLOC_DIST_TYPE_USE_OS_INDEX(unique_type);
|
||||
fprintf(stderr, "Trying to group objects using distance matrix:\n");
|
||||
fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
|
||||
fprintf(stderr, "\n");
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
hwloc_internal_distances_print_matrix(dist);
|
||||
}
|
||||
|
||||
hwloc__groups_by_distances(topology, nbobjs, objs, values,
|
||||
kind, nbaccuracies, accuracies, 1 /* check the first matrice */);
|
||||
hwloc__groups_by_distances(topology, dist->nbobjs, dist->objs, dist->values,
|
||||
dist->kind, nbaccuracies, accuracies, 1 /* check the first matrix */);
|
||||
}
|
||||
|
||||
return hwloc_internal_distances__add(topology, name, unique_type, different_types, nbobjs, objs, NULL, values, kind, iflags);
|
||||
if (topology->last_dist)
|
||||
topology->last_dist->next = dist;
|
||||
else
|
||||
topology->first_dist = dist;
|
||||
dist->prev = topology->last_dist;
|
||||
dist->next = NULL;
|
||||
topology->last_dist = dist;
|
||||
|
||||
dist->iflags &= ~HWLOC_INTERNAL_DIST_FLAG_NOT_COMMITTED;
|
||||
return 0;
|
||||
|
||||
err:
|
||||
hwloc_backend_distances_add__cancel(dist);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* all-in-one backend function not exported to plugins, only used by XML for now */
|
||||
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology, const char *name,
|
||||
hwloc_obj_type_t unique_type, hwloc_obj_type_t *different_types, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
hwloc_backend_distances_add_handle_t handle;
|
||||
int err;
|
||||
|
||||
handle = hwloc_backend_distances_add_create(topology, name, kind, 0);
|
||||
if (!handle)
|
||||
goto err;
|
||||
|
||||
err = hwloc_backend_distances_add_values_by_index(topology, handle,
|
||||
nbobjs, unique_type, different_types, indexes,
|
||||
values);
|
||||
if (err < 0)
|
||||
goto err;
|
||||
|
||||
/* arrays are now attached to the handle */
|
||||
indexes = NULL;
|
||||
different_types = NULL;
|
||||
values = NULL;
|
||||
|
||||
err = hwloc_backend_distances_add_commit(topology, handle, flags);
|
||||
if (err < 0)
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
free(indexes);
|
||||
free(different_types);
|
||||
free(values);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* all-in-one backend function not exported to plugins, used by OS backends */
|
||||
int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
hwloc_backend_distances_add_handle_t handle;
|
||||
int err;
|
||||
|
||||
handle = hwloc_backend_distances_add_create(topology, name, kind, 0);
|
||||
if (!handle)
|
||||
goto err;
|
||||
|
||||
err = hwloc_backend_distances_add_values(topology, handle,
|
||||
nbobjs, objs,
|
||||
values,
|
||||
0);
|
||||
if (err < 0)
|
||||
goto err;
|
||||
|
||||
/* arrays are now attached to the handle */
|
||||
objs = NULL;
|
||||
values = NULL;
|
||||
|
||||
err = hwloc_backend_distances_add_commit(topology, handle, flags);
|
||||
if (err < 0)
|
||||
goto err;
|
||||
|
||||
return 0;
|
||||
|
||||
err:
|
||||
free(objs);
|
||||
@@ -458,44 +602,54 @@ int hwloc_internal_distances_add(hwloc_topology_t topology, const char *name,
|
||||
return -1;
|
||||
}
|
||||
|
||||
/********************************
|
||||
* User API for adding distances
|
||||
*/
|
||||
|
||||
#define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER)
|
||||
#define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
|
||||
#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL)
|
||||
#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL|HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES)
|
||||
#define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE)
|
||||
|
||||
/* The actual function exported to the user
|
||||
*/
|
||||
int hwloc_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
void * hwloc_distances_add_create(hwloc_topology_t topology,
|
||||
const char *name, unsigned long kind,
|
||||
unsigned long flags)
|
||||
{
|
||||
if (!topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
errno = EPERM;
|
||||
return NULL;
|
||||
}
|
||||
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) > 1
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) > 1) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return hwloc_backend_distances_add_create(topology, name, kind, flags);
|
||||
}
|
||||
|
||||
int hwloc_distances_add_values(hwloc_topology_t topology,
|
||||
void *handle,
|
||||
unsigned nbobjs, hwloc_obj_t *objs,
|
||||
hwloc_uint64_t *values,
|
||||
unsigned long flags)
|
||||
{
|
||||
unsigned i;
|
||||
uint64_t *_values;
|
||||
hwloc_obj_t *_objs;
|
||||
int err;
|
||||
|
||||
if (nbobjs < 2 || !objs || !values || !topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
errno = EPERM;
|
||||
return -1;
|
||||
}
|
||||
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
|
||||
|| (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* no strict need to check for duplicates, things shouldn't break */
|
||||
|
||||
for(i=1; i<nbobjs; i++)
|
||||
if (!objs[i]) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* copy the input arrays and give them to the topology */
|
||||
@@ -506,22 +660,78 @@ int hwloc_distances_add(hwloc_topology_t topology,
|
||||
|
||||
memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
|
||||
memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
|
||||
err = hwloc_internal_distances_add(topology, NULL, nbobjs, _objs, _values, kind, flags);
|
||||
if (err < 0)
|
||||
goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
|
||||
|
||||
err = hwloc_backend_distances_add_values(topology, handle, nbobjs, _objs, _values, flags);
|
||||
if (err < 0) {
|
||||
/* handle was canceled inside hwloc_backend_distances_add_values */
|
||||
handle = NULL;
|
||||
goto out_with_arrays;
|
||||
}
|
||||
|
||||
return 0;
|
||||
|
||||
out_with_arrays:
|
||||
free(_objs);
|
||||
free(_values);
|
||||
out:
|
||||
if (handle)
|
||||
hwloc_backend_distances_add__cancel(handle);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_distances_add_commit(hwloc_topology_t topology,
|
||||
void *handle,
|
||||
unsigned long flags)
|
||||
{
|
||||
int err;
|
||||
|
||||
if (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL) {
|
||||
errno = EINVAL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
err = hwloc_backend_distances_add_commit(topology, handle, flags);
|
||||
if (err < 0) {
|
||||
/* handle was canceled inside hwloc_backend_distances_add_commit */
|
||||
handle = NULL;
|
||||
goto out;
|
||||
}
|
||||
|
||||
/* in case we added some groups, see if we need to reconnect */
|
||||
hwloc_topology_reconnect(topology, 0);
|
||||
|
||||
return 0;
|
||||
|
||||
out_with_arrays:
|
||||
free(_values);
|
||||
free(_objs);
|
||||
out:
|
||||
if (handle)
|
||||
hwloc_backend_distances_add__cancel(handle);
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* deprecated all-in-one user function */
|
||||
int hwloc_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
void *handle;
|
||||
int err;
|
||||
|
||||
handle = hwloc_distances_add_create(topology, NULL, kind, 0);
|
||||
if (!handle)
|
||||
return -1;
|
||||
|
||||
err = hwloc_distances_add_values(topology, handle, nbobjs, objs, values, 0);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
|
||||
err = hwloc_distances_add_commit(topology, handle, flags);
|
||||
if (err < 0)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Refresh objects in distances
|
||||
*/
|
||||
@@ -529,6 +739,7 @@ int hwloc_distances_add(hwloc_topology_t topology,
|
||||
static void
|
||||
hwloc_internal_distances_restrict(hwloc_obj_t *objs,
|
||||
uint64_t *indexes,
|
||||
hwloc_obj_type_t *different_types,
|
||||
uint64_t *values,
|
||||
unsigned nbobjs, unsigned disappeared)
|
||||
{
|
||||
@@ -550,6 +761,8 @@ hwloc_internal_distances_restrict(hwloc_obj_t *objs,
|
||||
objs[newi] = objs[i];
|
||||
if (indexes)
|
||||
indexes[newi] = indexes[i];
|
||||
if (different_types)
|
||||
different_types[newi] = different_types[i];
|
||||
newi++;
|
||||
}
|
||||
}
|
||||
@@ -594,7 +807,7 @@ hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
|
||||
return -1;
|
||||
|
||||
if (disappeared) {
|
||||
hwloc_internal_distances_restrict(objs, dist->indexes, dist->values, nbobjs, disappeared);
|
||||
hwloc_internal_distances_restrict(objs, dist->indexes, dist->different_types, dist->values, nbobjs, disappeared);
|
||||
dist->nbobjs -= disappeared;
|
||||
}
|
||||
|
||||
@@ -647,7 +860,7 @@ struct hwloc_distances_container_s {
|
||||
struct hwloc_distances_s distances;
|
||||
};
|
||||
|
||||
#define HWLOC_DISTANCES_CONTAINER_OFFSET ((char*)&((struct hwloc_distances_container_s*)NULL)->distances - (char*)NULL)
|
||||
#define HWLOC_DISTANCES_CONTAINER_OFFSET ((uintptr_t)(&((struct hwloc_distances_container_s*)NULL)->distances) - (uintptr_t)NULL)
|
||||
#define HWLOC_DISTANCES_CONTAINER(_d) (struct hwloc_distances_container_s *) ( ((char*)_d) - HWLOC_DISTANCES_CONTAINER_OFFSET )
|
||||
|
||||
static struct hwloc_internal_distances_s *
|
||||
@@ -1087,3 +1300,210 @@ hwloc__groups_by_distances(struct hwloc_topology *topology,
|
||||
out_with_groupids:
|
||||
free(groupids);
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__distances_transform_remove_null(struct hwloc_distances_s *distances)
|
||||
{
|
||||
hwloc_uint64_t *values = distances->values;
|
||||
hwloc_obj_t *objs = distances->objs;
|
||||
unsigned i, nb, nbobjs = distances->nbobjs;
|
||||
hwloc_obj_type_t unique_type;
|
||||
|
||||
for(i=0, nb=0; i<nbobjs; i++)
|
||||
if (objs[i])
|
||||
nb++;
|
||||
|
||||
if (nb < 2) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (nb == nbobjs)
|
||||
return 0;
|
||||
|
||||
hwloc_internal_distances_restrict(objs, NULL, NULL, values, nbobjs, nbobjs-nb);
|
||||
distances->nbobjs = nb;
|
||||
|
||||
/* update HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES for convenience */
|
||||
unique_type = objs[0]->type;
|
||||
for(i=1; i<nb; i++)
|
||||
if (objs[i]->type != unique_type) {
|
||||
unique_type = HWLOC_OBJ_TYPE_NONE;
|
||||
break;
|
||||
}
|
||||
if (unique_type == HWLOC_OBJ_TYPE_NONE)
|
||||
distances->kind |= HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
|
||||
else
|
||||
distances->kind &= ~HWLOC_DISTANCES_KIND_HETEROGENEOUS_TYPES;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__distances_transform_links(struct hwloc_distances_s *distances)
|
||||
{
|
||||
/* FIXME: we should look for the greatest common denominator
|
||||
* but we just use the smallest positive value, that's enough for current use-cases.
|
||||
* We'll return -1 in other cases.
|
||||
*/
|
||||
hwloc_uint64_t divider, *values = distances->values;
|
||||
unsigned i, nbobjs = distances->nbobjs;
|
||||
|
||||
if (!(distances->kind & HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(i=0; i<nbobjs; i++)
|
||||
values[i*nbobjs+i] = 0;
|
||||
|
||||
/* find the smallest positive value */
|
||||
divider = 0;
|
||||
for(i=0; i<nbobjs*nbobjs; i++)
|
||||
if (values[i] && (!divider || values[i] < divider))
|
||||
divider = values[i];
|
||||
|
||||
if (!divider)
|
||||
/* only zeroes? do nothing */
|
||||
return 0;
|
||||
|
||||
/* check it divides all values */
|
||||
for(i=0; i<nbobjs*nbobjs; i++)
|
||||
if (values[i]%divider) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* ok, now divide for real */
|
||||
for(i=0; i<nbobjs*nbobjs; i++)
|
||||
values[i] /= divider;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static __hwloc_inline int is_nvswitch(hwloc_obj_t obj)
|
||||
{
|
||||
return obj && obj->subtype && !strcmp(obj->subtype, "NVSwitch");
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__distances_transform_merge_switch_ports(hwloc_topology_t topology,
|
||||
struct hwloc_distances_s *distances)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
|
||||
hwloc_obj_t *objs = distances->objs;
|
||||
hwloc_uint64_t *values = distances->values;
|
||||
unsigned first, i, j, nbobjs = distances->nbobjs;
|
||||
|
||||
if (strcmp(dist->name, "NVLinkBandwidth")) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* find the first port */
|
||||
first = (unsigned) -1;
|
||||
for(i=0; i<nbobjs; i++)
|
||||
if (is_nvswitch(objs[i])) {
|
||||
first = i;
|
||||
break;
|
||||
}
|
||||
if (first == (unsigned)-1) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(j=i+1; j<nbobjs; j++) {
|
||||
if (is_nvswitch(objs[j])) {
|
||||
/* another port, merge it */
|
||||
unsigned k;
|
||||
for(k=0; k<nbobjs; k++) {
|
||||
if (k==i || k==j)
|
||||
continue;
|
||||
values[k*nbobjs+i] += values[k*nbobjs+j];
|
||||
values[k*nbobjs+j] = 0;
|
||||
values[i*nbobjs+k] += values[j*nbobjs+k];
|
||||
values[j*nbobjs+k] = 0;
|
||||
}
|
||||
values[i*nbobjs+i] += values[j*nbobjs+j];
|
||||
values[j*nbobjs+j] = 0;
|
||||
}
|
||||
/* the caller will also call REMOVE_NULL to remove other ports */
|
||||
objs[j] = NULL;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__distances_transform_transitive_closure(hwloc_topology_t topology,
|
||||
struct hwloc_distances_s *distances)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist = hwloc__internal_distances_from_public(topology, distances);
|
||||
hwloc_obj_t *objs = distances->objs;
|
||||
hwloc_uint64_t *values = distances->values;
|
||||
unsigned nbobjs = distances->nbobjs;
|
||||
unsigned i, j, k;
|
||||
|
||||
if (strcmp(dist->name, "NVLinkBandwidth")) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
hwloc_uint64_t bw_i2sw = 0;
|
||||
if (is_nvswitch(objs[i]))
|
||||
continue;
|
||||
/* count our BW to the switch */
|
||||
for(k=0; k<nbobjs; k++)
|
||||
if (is_nvswitch(objs[k]))
|
||||
bw_i2sw += values[i*nbobjs+k];
|
||||
|
||||
for(j=0; j<nbobjs; j++) {
|
||||
hwloc_uint64_t bw_sw2j = 0;
|
||||
if (i == j || is_nvswitch(objs[j]))
|
||||
continue;
|
||||
/* count our BW from the switch */
|
||||
for(k=0; k<nbobjs; k++)
|
||||
if (is_nvswitch(objs[k]))
|
||||
bw_sw2j += values[k*nbobjs+j];
|
||||
|
||||
/* bandwidth from i to j is now min(i2sw,sw2j) */
|
||||
values[i*nbobjs+j] = bw_i2sw > bw_sw2j ? bw_sw2j : bw_i2sw;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_distances_transform(hwloc_topology_t topology,
|
||||
struct hwloc_distances_s *distances,
|
||||
enum hwloc_distances_transform_e transform,
|
||||
void *transform_attr,
|
||||
unsigned long flags)
|
||||
{
|
||||
if (flags || transform_attr) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
switch (transform) {
|
||||
case HWLOC_DISTANCES_TRANSFORM_REMOVE_NULL:
|
||||
return hwloc__distances_transform_remove_null(distances);
|
||||
case HWLOC_DISTANCES_TRANSFORM_LINKS:
|
||||
return hwloc__distances_transform_links(distances);
|
||||
case HWLOC_DISTANCES_TRANSFORM_MERGE_SWITCH_PORTS:
|
||||
{
|
||||
int err;
|
||||
err = hwloc__distances_transform_merge_switch_ports(topology, distances);
|
||||
if (!err)
|
||||
err = hwloc__distances_transform_remove_null(distances);
|
||||
return err;
|
||||
}
|
||||
case HWLOC_DISTANCES_TRANSFORM_TRANSITIVE_CLOSURE:
|
||||
return hwloc__distances_transform_transitive_closure(topology, distances);
|
||||
default:
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
731
src/3rdparty/hwloc/src/memattrs.c
vendored
731
src/3rdparty/hwloc/src/memattrs.c
vendored
@@ -1,11 +1,12 @@
|
||||
/*
|
||||
* Copyright © 2020 Inria. All rights reserved.
|
||||
* Copyright © 2020-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#include "private/autogen/config.h"
|
||||
#include "hwloc.h"
|
||||
#include "private/private.h"
|
||||
#include "private/debug.h"
|
||||
|
||||
|
||||
/*****************************
|
||||
@@ -13,13 +14,26 @@
|
||||
*/
|
||||
|
||||
static __hwloc_inline
|
||||
hwloc_uint64_t hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id,
|
||||
hwloc_obj_t node)
|
||||
int hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id,
|
||||
hwloc_obj_t node,
|
||||
hwloc_uint64_t *valuep)
|
||||
{
|
||||
if (id == HWLOC_MEMATTR_ID_CAPACITY)
|
||||
return node->attr->numanode.local_memory;
|
||||
else if (id == HWLOC_MEMATTR_ID_LOCALITY)
|
||||
return hwloc_bitmap_weight(node->cpuset);
|
||||
if (id == HWLOC_MEMATTR_ID_CAPACITY) {
|
||||
if (node->type != HWLOC_OBJ_NUMANODE) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
*valuep = node->attr->numanode.local_memory;
|
||||
return 0;
|
||||
}
|
||||
else if (id == HWLOC_MEMATTR_ID_LOCALITY) {
|
||||
if (!node->cpuset) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
*valuep = hwloc_bitmap_weight(node->cpuset);
|
||||
return 0;
|
||||
}
|
||||
else
|
||||
assert(0);
|
||||
return 0; /* shut up the compiler */
|
||||
@@ -49,36 +63,51 @@ hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr,
|
||||
void
|
||||
hwloc_internal_memattrs_prepare(struct hwloc_topology *topology)
|
||||
{
|
||||
#define NR_DEFAULT_MEMATTRS 4
|
||||
topology->memattrs = malloc(NR_DEFAULT_MEMATTRS * sizeof(*topology->memattrs));
|
||||
topology->memattrs = malloc(HWLOC_MEMATTR_ID_MAX * sizeof(*topology->memattrs));
|
||||
if (!topology->memattrs)
|
||||
return;
|
||||
|
||||
assert(HWLOC_MEMATTR_ID_CAPACITY < NR_DEFAULT_MEMATTRS);
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY],
|
||||
(char *) "Capacity",
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
|
||||
|
||||
assert(HWLOC_MEMATTR_ID_LOCALITY < NR_DEFAULT_MEMATTRS);
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY],
|
||||
(char *) "Locality",
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE);
|
||||
|
||||
assert(HWLOC_MEMATTR_ID_BANDWIDTH < NR_DEFAULT_MEMATTRS);
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH],
|
||||
(char *) "Bandwidth",
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
assert(HWLOC_MEMATTR_ID_LATENCY < NR_DEFAULT_MEMATTRS);
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_BANDWIDTH],
|
||||
(char *) "ReadBandwidth",
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_BANDWIDTH],
|
||||
(char *) "WriteBandwidth",
|
||||
HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY],
|
||||
(char *) "Latency",
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
topology->nr_memattrs = NR_DEFAULT_MEMATTRS;
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_LATENCY],
|
||||
(char *) "ReadLatency",
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_LATENCY],
|
||||
(char *) "WriteLatency",
|
||||
HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR,
|
||||
HWLOC_IMATTR_FLAG_STATIC_NAME);
|
||||
|
||||
topology->nr_memattrs = HWLOC_MEMATTR_ID_MAX;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -127,6 +156,8 @@ hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *o
|
||||
struct hwloc_internal_memattr_s *imattrs;
|
||||
hwloc_memattr_id_t id;
|
||||
|
||||
/* old->nr_memattrs is always > 0 thanks to default memattrs */
|
||||
|
||||
imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs));
|
||||
if (!imattrs)
|
||||
return -1;
|
||||
@@ -604,7 +635,7 @@ hwloc_memattr_get_targets(hwloc_topology_t topology,
|
||||
if (found<max) {
|
||||
targets[found] = node;
|
||||
if (values)
|
||||
values[found] = hwloc__memattr_get_convenience_value(id, node);
|
||||
hwloc__memattr_get_convenience_value(id, node, &values[found]);
|
||||
}
|
||||
found++;
|
||||
}
|
||||
@@ -730,7 +761,7 @@ hwloc_memattr_get_initiators(hwloc_topology_t topology,
|
||||
struct hwloc_internal_memattr_target_s *imtg;
|
||||
unsigned i, max;
|
||||
|
||||
if (flags) {
|
||||
if (flags || !target_node) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
@@ -792,7 +823,7 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
|
||||
struct hwloc_internal_memattr_s *imattr;
|
||||
struct hwloc_internal_memattr_target_s *imtg;
|
||||
|
||||
if (flags) {
|
||||
if (flags || !target_node) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
@@ -805,8 +836,7 @@ hwloc_memattr_get_value(hwloc_topology_t topology,
|
||||
|
||||
if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) {
|
||||
/* convenience attributes */
|
||||
*valuep = hwloc__memattr_get_convenience_value(id, target_node);
|
||||
return 0;
|
||||
return hwloc__memattr_get_convenience_value(id, target_node, valuep);
|
||||
}
|
||||
|
||||
/* normal attributes */
|
||||
@@ -918,7 +948,7 @@ hwloc_memattr_set_value(hwloc_topology_t topology,
|
||||
{
|
||||
struct hwloc_internal_location_s iloc, *ilocp;
|
||||
|
||||
if (flags) {
|
||||
if (flags || !target_node) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
@@ -989,10 +1019,10 @@ hwloc_memattr_get_best_target(hwloc_topology_t topology,
|
||||
/* convenience attributes */
|
||||
for(j=0; ; j++) {
|
||||
hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j);
|
||||
hwloc_uint64_t value;
|
||||
hwloc_uint64_t value = 0;
|
||||
if (!node)
|
||||
break;
|
||||
value = hwloc__memattr_get_convenience_value(id, node);
|
||||
hwloc__memattr_get_convenience_value(id, node, &value);
|
||||
hwloc__update_best_target(&best, &best_value, &found,
|
||||
node, value,
|
||||
imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST);
|
||||
@@ -1075,7 +1105,7 @@ hwloc_memattr_get_best_initiator(hwloc_topology_t topology,
|
||||
int found;
|
||||
unsigned i;
|
||||
|
||||
if (flags) {
|
||||
if (flags || !target_node) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
@@ -1195,3 +1225,658 @@ hwloc_get_local_numanode_objs(hwloc_topology_t topology,
|
||||
*nrp = i;
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
/**************************************
|
||||
* Using memattrs to identify HBM/DRAM
|
||||
*/
|
||||
|
||||
enum hwloc_memory_tier_type_e {
|
||||
/* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */
|
||||
HWLOC_MEMORY_TIER_HBM = 1UL<<0,
|
||||
HWLOC_MEMORY_TIER_DRAM = 1UL<<1,
|
||||
HWLOC_MEMORY_TIER_GPU = 1UL<<2,
|
||||
HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/
|
||||
HWLOC_MEMORY_TIER_NVM = 1UL<<4,
|
||||
HWLOC_MEMORY_TIER_CXL = 1UL<<5
|
||||
};
|
||||
typedef unsigned long hwloc_memory_tier_type_t;
|
||||
#define HWLOC_MEMORY_TIER_UNKNOWN 0UL
|
||||
|
||||
static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type)
|
||||
{
|
||||
switch (type) {
|
||||
case HWLOC_MEMORY_TIER_DRAM: return "DRAM";
|
||||
case HWLOC_MEMORY_TIER_HBM: return "HBM";
|
||||
case HWLOC_MEMORY_TIER_GPU: return "GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_SPM: return "SPM";
|
||||
case HWLOC_MEMORY_TIER_NVM: return "NVM";
|
||||
case HWLOC_MEMORY_TIER_CXL:
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM";
|
||||
case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM";
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name)
|
||||
{
|
||||
if (!strcasecmp(name, "DRAM"))
|
||||
return HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "HBM"))
|
||||
return HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "SPM"))
|
||||
return HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "NVM"))
|
||||
return HWLOC_MEMORY_TIER_NVM;
|
||||
if (!strcasecmp(name, "CXL-DRAM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM;
|
||||
if (!strcasecmp(name, "CXL-HBM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM;
|
||||
if (!strcasecmp(name, "CXL-GPUMemory"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU;
|
||||
if (!strcasecmp(name, "CXL-SPM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM;
|
||||
if (!strcasecmp(name, "CXL-NVM"))
|
||||
return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* factorized tier, grouping multiple nodes */
|
||||
struct hwloc_memory_tier_s {
|
||||
hwloc_nodeset_t nodeset;
|
||||
uint64_t local_bw_min, local_bw_max;
|
||||
uint64_t local_lat_min, local_lat_max;
|
||||
hwloc_memory_tier_type_t type;
|
||||
};
|
||||
|
||||
/* early tier discovery, one entry per node */
|
||||
struct hwloc_memory_node_info_s {
|
||||
hwloc_obj_t node;
|
||||
uint64_t local_bw;
|
||||
uint64_t local_lat;
|
||||
hwloc_memory_tier_type_t type;
|
||||
unsigned rank;
|
||||
};
|
||||
|
||||
static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b)
|
||||
{
|
||||
const struct hwloc_memory_node_info_s *a = _a, *b = _b;
|
||||
/* sort by type of node first */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
/* then by bandwidth */
|
||||
if (a->local_bw > b->local_bw)
|
||||
return -1;
|
||||
else if (a->local_bw < b->local_bw)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int compare_tiers_by_bw_and_type(const void *_a, const void *_b)
|
||||
{
|
||||
const struct hwloc_memory_tier_s *a = _a, *b = _b;
|
||||
/* sort by (average) BW first */
|
||||
if (a->local_bw_min && b->local_bw_min) {
|
||||
if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max)
|
||||
return -1;
|
||||
else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max)
|
||||
return 1;
|
||||
}
|
||||
/* then by tier type */
|
||||
if (a->type != b->type)
|
||||
return a->type - b->type;
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__group_memory_tiers(hwloc_topology_t topology,
|
||||
unsigned *nr_tiers_p)
|
||||
{
|
||||
struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat;
|
||||
struct hwloc_memory_node_info_s *nodeinfos;
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
float bw_threshold = 0.1;
|
||||
float lat_threshold = 0.1;
|
||||
const char *env;
|
||||
unsigned i, j, n;
|
||||
|
||||
n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE);
|
||||
assert(n);
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD");
|
||||
if (env)
|
||||
bw_threshold = atof(env);
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD");
|
||||
if (env)
|
||||
lat_threshold = atof(env);
|
||||
|
||||
imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH];
|
||||
imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY];
|
||||
|
||||
if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_bw);
|
||||
if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID))
|
||||
hwloc__imattr_refresh(topology, imattr_lat);
|
||||
|
||||
nodeinfos = malloc(n * sizeof(*nodeinfos));
|
||||
if (!nodeinfos)
|
||||
return NULL;
|
||||
|
||||
for(i=0; i<n; i++) {
|
||||
hwloc_obj_t node;
|
||||
const char *daxtype;
|
||||
struct hwloc_internal_location_s iloc;
|
||||
struct hwloc_internal_memattr_target_s *imtg;
|
||||
|
||||
node = hwloc_get_obj_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE, i);
|
||||
assert(node);
|
||||
nodeinfos[i].node = node;
|
||||
|
||||
/* defaults to unknown */
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
nodeinfos[i].local_bw = 0;
|
||||
nodeinfos[i].local_lat = 0;
|
||||
|
||||
daxtype = hwloc_obj_get_info_by_name(node, "DAXType");
|
||||
/* mark NVM, SPM and GPU nodes */
|
||||
if (node->subtype && !strcmp(node->subtype, "GPUMemory"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU;
|
||||
else if (daxtype && !strcmp(daxtype, "NVM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM;
|
||||
else if (daxtype && !strcmp(daxtype, "SPM"))
|
||||
nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM;
|
||||
/* add CXL flag */
|
||||
if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) {
|
||||
/* CXL is always SPM for now. HBM and DRAM not possible here yet.
|
||||
* Hence remove all but NVM first.
|
||||
*/
|
||||
nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM;
|
||||
nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL;
|
||||
}
|
||||
|
||||
/* get local bandwidth */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_bw->nr_targets; j++)
|
||||
if (imattr_bw->targets[j].obj == node) {
|
||||
imtg = &imattr_bw->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_bw = imi->value;
|
||||
}
|
||||
/* get local latency */
|
||||
imtg = NULL;
|
||||
for(j=0; j<imattr_lat->nr_targets; j++)
|
||||
if (imattr_lat->targets[j].obj == node) {
|
||||
imtg = &imattr_lat->targets[j];
|
||||
break;
|
||||
}
|
||||
if (imtg && !hwloc_bitmap_iszero(node->cpuset)) {
|
||||
struct hwloc_internal_memattr_initiator_s *imi;
|
||||
iloc.type = HWLOC_LOCATION_TYPE_CPUSET;
|
||||
iloc.location.cpuset = node->cpuset;
|
||||
imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0);
|
||||
if (imi)
|
||||
nodeinfos[i].local_lat = imi->value;
|
||||
}
|
||||
}
|
||||
|
||||
/* Sort nodes.
|
||||
* We could also sort by the existing subtype.
|
||||
* KNL is the only case where subtypes are set in backends, but we set memattrs as well there.
|
||||
* Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes.
|
||||
*/
|
||||
hwloc_debug("Sorting memory node infos...\n");
|
||||
qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<n; i++)
|
||||
hwloc_debug(" node info %u = node L#%u P#%u with info type %lx and local BW %llu lat %llu\n",
|
||||
i,
|
||||
nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index,
|
||||
nodeinfos[i].type,
|
||||
(unsigned long long) nodeinfos[i].local_bw,
|
||||
(unsigned long long) nodeinfos[i].local_lat);
|
||||
#endif
|
||||
/* now we have UNKNOWN nodes (sorted by BW only), then known ones */
|
||||
|
||||
/* iterate among them and add a rank value.
|
||||
* start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */
|
||||
hwloc_debug("Starting memory tier #0 and iterating over nodes...\n");
|
||||
nodeinfos[0].rank = 0;
|
||||
for(i=1; i<n; i++) {
|
||||
/* reuse the same rank by default */
|
||||
nodeinfos[i].rank = nodeinfos[i-1].rank;
|
||||
/* comparing type */
|
||||
if (nodeinfos[i].type != nodeinfos[i-1].type) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of type\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
/* comparing bandwidth */
|
||||
if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) {
|
||||
float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw;
|
||||
if (bw_ratio < 1.)
|
||||
bw_ratio = 1./bw_ratio;
|
||||
if (bw_ratio > 1.0 + bw_threshold) {
|
||||
nodeinfos[i].rank++;
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
/* comparing latency */
|
||||
if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) {
|
||||
float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat;
|
||||
if (lat_ratio < 1.)
|
||||
lat_ratio = 1./lat_ratio;
|
||||
if (lat_ratio > 1.0 + lat_threshold) {
|
||||
hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n",
|
||||
nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index);
|
||||
nodeinfos[i].rank++;
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
/* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */
|
||||
hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1);
|
||||
|
||||
/* now group nodeinfos into factorized tiers */
|
||||
nr_tiers = nodeinfos[n-1].rank + 1;
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers)
|
||||
goto out_with_nodeinfos;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
tiers[i].nodeset = hwloc_bitmap_alloc();
|
||||
if (!tiers[i].nodeset)
|
||||
goto out_with_tiers;
|
||||
tiers[i].local_bw_min = tiers[i].local_bw_max = 0;
|
||||
tiers[i].local_lat_min = tiers[i].local_lat_max = 0;
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_UNKNOWN;
|
||||
}
|
||||
for(i=0; i<n; i++) {
|
||||
unsigned rank = nodeinfos[i].rank;
|
||||
assert(rank < nr_tiers);
|
||||
hwloc_bitmap_set(tiers[rank].nodeset, nodeinfos[i].node->os_index);
|
||||
assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN
|
||||
|| tiers[rank].type == nodeinfos[i].type);
|
||||
tiers[rank].type = nodeinfos[i].type;
|
||||
/* nodeinfos are sorted in BW order, no need to compare */
|
||||
if (!tiers[rank].local_bw_min)
|
||||
tiers[rank].local_bw_min = nodeinfos[i].local_bw;
|
||||
tiers[rank].local_bw_max = nodeinfos[i].local_bw;
|
||||
/* compare latencies to update min/max */
|
||||
if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min)
|
||||
tiers[rank].local_lat_min = nodeinfos[i].local_lat;
|
||||
if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max)
|
||||
tiers[rank].local_lat_max = nodeinfos[i].local_lat;
|
||||
}
|
||||
|
||||
free(nodeinfos);
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeinfos:
|
||||
free(nodeinfos);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
enum hwloc_guess_memtiers_flag {
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM = 1<<0,
|
||||
HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM = 1<<1
|
||||
};
|
||||
|
||||
static int
|
||||
hwloc__guess_dram_hbm_tiers(struct hwloc_memory_tier_s *tier1,
|
||||
struct hwloc_memory_tier_s *tier2,
|
||||
unsigned long flags)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tmp;
|
||||
|
||||
if (!tier1->local_bw_min || !tier2->local_bw_min) {
|
||||
hwloc_debug(" Missing BW info\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* reorder tiers by BW */
|
||||
if (tier1->local_bw_min > tier2->local_bw_min) {
|
||||
tmp = tier1; tier1 = tier2; tier2 = tmp;
|
||||
}
|
||||
/* tier1 < tier2 */
|
||||
|
||||
hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n",
|
||||
(unsigned long long) tier1->local_bw_min,
|
||||
(unsigned long long) tier1->local_bw_max,
|
||||
(unsigned long long) tier2->local_bw_min,
|
||||
(unsigned long long) tier2->local_bw_max);
|
||||
if (tier2->local_bw_min <= tier1->local_bw_max * 2) {
|
||||
/* tier2 BW isn't 2x tier1, we cannot guess HBM */
|
||||
hwloc_debug(" BW difference isn't >2x\n");
|
||||
return -1;
|
||||
}
|
||||
/* tier2 BW is >2x tier1 */
|
||||
|
||||
if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM)
|
||||
&& hwloc_bitmap_isset(tier2->nodeset, 0)) {
|
||||
/* node0 is not DRAM, and we assume that's not possible */
|
||||
hwloc_debug(" node0 shouldn't have HBM BW\n");
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* assume tier1 == DRAM and tier2 == HBM */
|
||||
tier1->type = HWLOC_MEMORY_TIER_DRAM;
|
||||
tier2->type = HWLOC_MEMORY_TIER_HBM;
|
||||
hwloc_debug(" Success\n");
|
||||
return 0;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers)
|
||||
{
|
||||
unsigned long flags;
|
||||
const char *env;
|
||||
unsigned nr_unknown, nr_spm;
|
||||
struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier;
|
||||
unsigned i;
|
||||
|
||||
flags = 0;
|
||||
env = getenv("HWLOC_MEMTIERS_GUESS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
return 0;
|
||||
/* by default, we don't guess anything unsure */
|
||||
if (!strcmp(env, "all"))
|
||||
/* enable all typical cases */
|
||||
flags = ~0UL;
|
||||
if (strstr(env, "spm_is_hbm")) {
|
||||
hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM;
|
||||
}
|
||||
if (strstr(env, "node0_is_dram")) {
|
||||
hwloc_debug("Assuming node0 is DRAM\n");
|
||||
flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM;
|
||||
}
|
||||
}
|
||||
|
||||
if (nr_tiers == 1)
|
||||
/* Likely DRAM only, but could also be HBM-only in non-SPM mode.
|
||||
* We cannot be sure, but it doesn't matter since there's a single tier.
|
||||
*/
|
||||
return 0;
|
||||
|
||||
nr_unknown = nr_spm = 0;
|
||||
unknown_tier[0] = unknown_tier[1] = spm_tier = NULL;
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
switch (tiers[i].type) {
|
||||
case HWLOC_MEMORY_TIER_UNKNOWN:
|
||||
if (nr_unknown < 2)
|
||||
unknown_tier[nr_unknown] = &tiers[i];
|
||||
nr_unknown++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_SPM:
|
||||
spm_tier = &tiers[i];
|
||||
nr_spm++;
|
||||
break;
|
||||
case HWLOC_MEMORY_TIER_DRAM:
|
||||
case HWLOC_MEMORY_TIER_HBM:
|
||||
/* not possible */
|
||||
abort();
|
||||
default:
|
||||
/* ignore HBM, NVM, ... */
|
||||
break;
|
||||
}
|
||||
}
|
||||
hwloc_debug("Found %u unknown memory tiers and %u SPM\n",
|
||||
nr_unknown, nr_spm);
|
||||
|
||||
/* Try to guess DRAM + HBM common cases.
|
||||
* Other things we'd like to detect:
|
||||
* single unknown => DRAM or HBM? HBM won't be SPM on HBM-only CPUs
|
||||
* unknown + CXL DRAM => DRAM or HBM?
|
||||
*/
|
||||
if (nr_unknown == 2 && !nr_spm) {
|
||||
/* 2 unknown, could be DRAM + non-SPM HBM */
|
||||
hwloc_debug(" Trying to guess 2 unknown tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags);
|
||||
} else if (nr_unknown == 1 && nr_spm == 1) {
|
||||
/* 1 unknown + 1 SPM, could be DRAM + SPM HBM */
|
||||
hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n");
|
||||
hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags);
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) {
|
||||
/* force mark SPM as HBM */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (tiers[i].type == HWLOC_MEMORY_TIER_SPM) {
|
||||
hwloc_debug("Forcing SPM tier to HBM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_HBM;
|
||||
}
|
||||
}
|
||||
|
||||
if (flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) {
|
||||
/* force mark node0's tier as DRAM if we couldn't guess it */
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
if (hwloc_bitmap_isset(tiers[i].nodeset, 0)
|
||||
&& tiers[i].type == HWLOC_MEMORY_TIER_UNKNOWN) {
|
||||
hwloc_debug("Forcing node0 tier to DRAM");
|
||||
tiers[i].type = HWLOC_MEMORY_TIER_DRAM;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* parses something like 0xf=HBM;0x0f=DRAM;0x00f=CXL-DRAM */
|
||||
static struct hwloc_memory_tier_s *
|
||||
hwloc__force_memory_tiers(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
unsigned *nr_tiers_p,
|
||||
const char *_env)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers = NULL;
|
||||
unsigned nr_tiers, i;
|
||||
hwloc_bitmap_t nodeset = NULL;
|
||||
char *env;
|
||||
const char *tmp;
|
||||
|
||||
env = strdup(_env);
|
||||
if (!env) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to duplicate HWLOC_MEMTIERS envvar\n");
|
||||
goto out;
|
||||
}
|
||||
|
||||
tmp = env;
|
||||
nr_tiers = 1;
|
||||
while (1) {
|
||||
tmp = strchr(tmp, ';');
|
||||
if (!tmp)
|
||||
break;
|
||||
tmp++;
|
||||
nr_tiers++;
|
||||
}
|
||||
|
||||
nodeset = hwloc_bitmap_alloc();
|
||||
if (!nodeset) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers' nodeset\n");
|
||||
goto out_with_envvar;
|
||||
}
|
||||
|
||||
tiers = calloc(nr_tiers, sizeof(*tiers));
|
||||
if (!tiers) {
|
||||
fprintf(stderr, "[hwloc/memtiers] failed to allocated forced tiers\n");
|
||||
goto out_with_nodeset;
|
||||
}
|
||||
nr_tiers = 0;
|
||||
|
||||
tmp = env;
|
||||
while (1) {
|
||||
char *end;
|
||||
char *equal;
|
||||
hwloc_memory_tier_type_t type;
|
||||
|
||||
end = strchr(tmp, ';');
|
||||
if (end)
|
||||
*end = '\0';
|
||||
|
||||
equal = strchr(tmp, '=');
|
||||
if (!equal) {
|
||||
fprintf(stderr, "[hwloc/memtiers] missing `=' before end of forced tier description at `%s'\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
*equal = '\0';
|
||||
|
||||
hwloc_bitmap_sscanf(nodeset, tmp);
|
||||
if (hwloc_bitmap_iszero(nodeset)) {
|
||||
fprintf(stderr, "[hwloc/memtiers] empty forced tier nodeset `%s', aborting\n", tmp);
|
||||
goto out_with_tiers;
|
||||
}
|
||||
type = hwloc_memory_tier_type_sscanf(equal+1);
|
||||
if (!type)
|
||||
hwloc_debug("failed to recognize forced tier type `%s'\n", equal+1);
|
||||
tiers[nr_tiers].nodeset = hwloc_bitmap_dup(nodeset);
|
||||
tiers[nr_tiers].type = type;
|
||||
tiers[nr_tiers].local_bw_min = tiers[nr_tiers].local_bw_max = 0;
|
||||
tiers[nr_tiers].local_lat_min = tiers[nr_tiers].local_lat_max = 0;
|
||||
nr_tiers++;
|
||||
if (!end)
|
||||
break;
|
||||
tmp = end+1;
|
||||
}
|
||||
|
||||
free(env);
|
||||
hwloc_bitmap_free(nodeset);
|
||||
hwloc_debug("Forcing %u memory tiers\n", nr_tiers);
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier #%u type %lx nodeset %s\n", i, tiers[i].type, s);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
*nr_tiers_p = nr_tiers;
|
||||
return tiers;
|
||||
|
||||
out_with_tiers:
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out_with_nodeset:
|
||||
hwloc_bitmap_free(nodeset);
|
||||
out_with_envvar:
|
||||
free(env);
|
||||
out:
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc__apply_memory_tiers_subtypes(hwloc_topology_t topology,
|
||||
unsigned nr_tiers,
|
||||
struct hwloc_memory_tier_s *tiers,
|
||||
int force)
|
||||
{
|
||||
hwloc_obj_t node = NULL;
|
||||
hwloc_debug("Marking node tiers\n");
|
||||
while ((node = hwloc_get_next_obj_by_type(topology, HWLOC_OBJ_NUMANODE, node)) != NULL) {
|
||||
unsigned j;
|
||||
for(j=0; j<nr_tiers; j++) {
|
||||
if (hwloc_bitmap_isset(tiers[j].nodeset, node->os_index)) {
|
||||
const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type);
|
||||
if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */
|
||||
if (subtype) { /* don't set a subtype for unknown tiers */
|
||||
hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype);
|
||||
free(node->subtype);
|
||||
node->subtype = strdup(subtype);
|
||||
}
|
||||
} else
|
||||
hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n",
|
||||
node->logical_index, node->os_index, node->subtype, subtype);
|
||||
if (nr_tiers > 1) {
|
||||
char tmp[20];
|
||||
snprintf(tmp, sizeof(tmp), "%u", j);
|
||||
hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1);
|
||||
}
|
||||
break; /* each node is in a single tier */
|
||||
}
|
||||
}
|
||||
}
|
||||
if (nr_tiers > 1) {
|
||||
hwloc_obj_t root = hwloc_get_root_obj(topology);
|
||||
char tmp[20];
|
||||
snprintf(tmp, sizeof(tmp), "%u", nr_tiers);
|
||||
hwloc__add_info_nodup(&root->infos, &root->infos_count, "MemoryTiersNr", tmp, 1);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype)
|
||||
{
|
||||
struct hwloc_memory_tier_s *tiers;
|
||||
unsigned nr_tiers;
|
||||
unsigned i;
|
||||
const char *env;
|
||||
|
||||
env = getenv("HWLOC_MEMTIERS");
|
||||
if (env) {
|
||||
if (!strcmp(env, "none"))
|
||||
goto out;
|
||||
tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env);
|
||||
if (tiers) {
|
||||
assert(nr_tiers > 0);
|
||||
force_subtype = 1;
|
||||
goto ready;
|
||||
}
|
||||
}
|
||||
|
||||
tiers = hwloc__group_memory_tiers(topology, &nr_tiers);
|
||||
if (!tiers)
|
||||
goto out;
|
||||
|
||||
hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers);
|
||||
|
||||
/* sort tiers by BW first, then by type */
|
||||
hwloc_debug("Sorting memory tiers...\n");
|
||||
qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type);
|
||||
|
||||
ready:
|
||||
#ifdef HWLOC_DEBUG
|
||||
for(i=0; i<nr_tiers; i++) {
|
||||
char *s;
|
||||
hwloc_bitmap_asprintf(&s, tiers[i].nodeset);
|
||||
hwloc_debug(" tier %u = nodes %s with type %lx and local BW %llu-%llu lat %llu-%llu\n",
|
||||
i,
|
||||
s, tiers[i].type,
|
||||
(unsigned long long) tiers[i].local_bw_min,
|
||||
(unsigned long long) tiers[i].local_bw_max,
|
||||
(unsigned long long) tiers[i].local_lat_min,
|
||||
(unsigned long long) tiers[i].local_lat_max);
|
||||
free(s);
|
||||
}
|
||||
#endif
|
||||
|
||||
hwloc__apply_memory_tiers_subtypes(topology, nr_tiers, tiers, force_subtype);
|
||||
|
||||
for(i=0; i<nr_tiers; i++)
|
||||
hwloc_bitmap_free(tiers[i].nodeset);
|
||||
free(tiers);
|
||||
out:
|
||||
return 0;
|
||||
}
|
||||
|
||||
158
src/3rdparty/hwloc/src/pci-common.c
vendored
158
src/3rdparty/hwloc/src/pci-common.c
vendored
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -119,6 +119,13 @@ hwloc_pci_discovery_init(struct hwloc_topology *topology)
|
||||
topology->pci_forced_locality = NULL;
|
||||
|
||||
topology->first_pci_locality = topology->last_pci_locality = NULL;
|
||||
|
||||
#define HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A (1ULL<<0)
|
||||
#define HWLOC_PCI_LOCALITY_QUIRK_FAKE (1ULL<<62)
|
||||
topology->pci_locality_quirks = (uint64_t) -1;
|
||||
/* -1 is unknown, 0 is disabled, >0 is bitmask of enabled quirks.
|
||||
* bit 63 should remain unused so that -1 is unaccessible as a bitmask.
|
||||
*/
|
||||
}
|
||||
|
||||
void
|
||||
@@ -146,8 +153,9 @@ hwloc_pci_discovery_prepare(struct hwloc_topology *topology)
|
||||
}
|
||||
free(buffer);
|
||||
} else {
|
||||
fprintf(stderr, "Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
|
||||
env, (unsigned long) st.st_size);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc/pci: Ignoring HWLOC_PCI_LOCALITY file `%s' too large (%lu bytes)\n",
|
||||
env, (unsigned long) st.st_size);
|
||||
}
|
||||
}
|
||||
close(fd);
|
||||
@@ -206,8 +214,11 @@ hwloc_pci_traverse_print_cb(void * cbdata __hwloc_attribute_unused,
|
||||
else
|
||||
hwloc_debug("%s Bridge [%04x:%04x]", busid,
|
||||
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id);
|
||||
hwloc_debug(" to %04x:[%02x:%02x]\n",
|
||||
pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
|
||||
if (pcidev->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI)
|
||||
hwloc_debug(" to %04x:[%02x:%02x]\n",
|
||||
pcidev->attr->bridge.downstream.pci.domain, pcidev->attr->bridge.downstream.pci.secondary_bus, pcidev->attr->bridge.downstream.pci.subordinate_bus);
|
||||
else
|
||||
assert(0);
|
||||
} else
|
||||
hwloc_debug("%s Device [%04x:%04x (%04x:%04x) rev=%02x class=%04x]\n", busid,
|
||||
pcidev->attr->pcidev.vendor_id, pcidev->attr->pcidev.device_id,
|
||||
@@ -251,11 +262,11 @@ hwloc_pci_compare_busids(struct hwloc_obj *a, struct hwloc_obj *b)
|
||||
if (a->attr->pcidev.domain > b->attr->pcidev.domain)
|
||||
return HWLOC_PCI_BUSID_HIGHER;
|
||||
|
||||
if (a->type == HWLOC_OBJ_BRIDGE
|
||||
if (a->type == HWLOC_OBJ_BRIDGE && a->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
|
||||
&& b->attr->pcidev.bus >= a->attr->bridge.downstream.pci.secondary_bus
|
||||
&& b->attr->pcidev.bus <= a->attr->bridge.downstream.pci.subordinate_bus)
|
||||
return HWLOC_PCI_BUSID_SUPERSET;
|
||||
if (b->type == HWLOC_OBJ_BRIDGE
|
||||
if (b->type == HWLOC_OBJ_BRIDGE && b->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
|
||||
&& a->attr->pcidev.bus >= b->attr->bridge.downstream.pci.secondary_bus
|
||||
&& a->attr->pcidev.bus <= b->attr->bridge.downstream.pci.subordinate_bus)
|
||||
return HWLOC_PCI_BUSID_INCLUDED;
|
||||
@@ -302,7 +313,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs
|
||||
new->next_sibling = *curp;
|
||||
*curp = new;
|
||||
new->parent = parent;
|
||||
if (new->type == HWLOC_OBJ_BRIDGE) {
|
||||
if (new->type == HWLOC_OBJ_BRIDGE && new->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
|
||||
/* look at remaining siblings and move some below new */
|
||||
childp = &new->io_first_child;
|
||||
curp = &new->next_sibling;
|
||||
@@ -329,7 +340,7 @@ hwloc_pci_add_object(struct hwloc_obj *parent, struct hwloc_obj **parent_io_firs
|
||||
}
|
||||
case HWLOC_PCI_BUSID_EQUAL: {
|
||||
static int reported = 0;
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) {
|
||||
fprintf(stderr, "*********************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid PCI information.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
@@ -411,7 +422,7 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
|
||||
dstnextp = &child->next_sibling;
|
||||
|
||||
/* compute hostbridge secondary/subordinate buses */
|
||||
if (child->type == HWLOC_OBJ_BRIDGE
|
||||
if (child->type == HWLOC_OBJ_BRIDGE && child->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI
|
||||
&& child->attr->bridge.downstream.pci.subordinate_bus > current_subordinate)
|
||||
current_subordinate = child->attr->bridge.downstream.pci.subordinate_bus;
|
||||
|
||||
@@ -438,13 +449,90 @@ hwloc_pcidisc_add_hostbridges(struct hwloc_topology *topology,
|
||||
return new;
|
||||
}
|
||||
|
||||
static struct hwloc_obj *
|
||||
hwloc_pci_fixup_busid_parent(struct hwloc_topology *topology __hwloc_attribute_unused,
|
||||
struct hwloc_pcidev_attr_s *busid __hwloc_attribute_unused,
|
||||
struct hwloc_obj *parent __hwloc_attribute_unused)
|
||||
/* return 1 if a quirk was applied */
|
||||
static int
|
||||
hwloc__pci_find_busid_parent_quirk(struct hwloc_topology *topology,
|
||||
struct hwloc_pcidev_attr_s *busid,
|
||||
hwloc_cpuset_t cpuset)
|
||||
{
|
||||
/* no quirk for now */
|
||||
return parent;
|
||||
if (topology->pci_locality_quirks == (uint64_t)-1 /* unknown */) {
|
||||
const char *dmi_board_name, *env;
|
||||
|
||||
/* first invokation, detect which quirks are needed */
|
||||
topology->pci_locality_quirks = 0; /* no quirk yet */
|
||||
|
||||
dmi_board_name = hwloc_obj_get_info_by_name(hwloc_get_root_obj(topology), "DMIBoardName");
|
||||
if (dmi_board_name && !strcmp(dmi_board_name, "HPE CRAY EX235A")) {
|
||||
hwloc_debug("enabling for PCI locality quirk for HPE Cray EX235A\n");
|
||||
topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A;
|
||||
}
|
||||
|
||||
env = getenv("HWLOC_PCI_LOCALITY_QUIRK_FAKE");
|
||||
if (env && atoi(env)) {
|
||||
hwloc_debug("enabling for PCI locality fake quirk (attaching everything to last PU)\n");
|
||||
topology->pci_locality_quirks |= HWLOC_PCI_LOCALITY_QUIRK_FAKE;
|
||||
}
|
||||
}
|
||||
|
||||
if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_FAKE) {
|
||||
unsigned last = hwloc_bitmap_last(hwloc_topology_get_topology_cpuset(topology));
|
||||
hwloc_bitmap_set(cpuset, last);
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (topology->pci_locality_quirks & HWLOC_PCI_LOCALITY_QUIRK_CRAY_EX235A) {
|
||||
/* AMD Trento has xGMI ports connected to individual CCDs (8 cores + L3)
|
||||
* instead of NUMA nodes (pairs of CCDs within Trento) as is usual in AMD EPYC CPUs.
|
||||
* This is not described by the ACPI tables, hence we need to manually hardwire
|
||||
* the xGMI locality for the (currently single) server that currently uses that CPU.
|
||||
* It's not clear if ACPI tables can/will ever be fixed (would require one initiator
|
||||
* proximity domain per CCD), or if Linux can/will work around the issue.
|
||||
*/
|
||||
if (busid->domain == 0) {
|
||||
if (busid->bus >= 0xd0 && busid->bus <= 0xd1) {
|
||||
hwloc_bitmap_set_range(cpuset, 0, 7);
|
||||
hwloc_bitmap_set_range(cpuset, 64, 71);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xd4 && busid->bus <= 0xd6) {
|
||||
hwloc_bitmap_set_range(cpuset, 8, 15);
|
||||
hwloc_bitmap_set_range(cpuset, 72, 79);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xc8 && busid->bus <= 0xc9) {
|
||||
hwloc_bitmap_set_range(cpuset, 16, 23);
|
||||
hwloc_bitmap_set_range(cpuset, 80, 87);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xcc && busid->bus <= 0xce) {
|
||||
hwloc_bitmap_set_range(cpuset, 24, 31);
|
||||
hwloc_bitmap_set_range(cpuset, 88, 95);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xd8 && busid->bus <= 0xd9) {
|
||||
hwloc_bitmap_set_range(cpuset, 32, 39);
|
||||
hwloc_bitmap_set_range(cpuset, 96, 103);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xdc && busid->bus <= 0xde) {
|
||||
hwloc_bitmap_set_range(cpuset, 40, 47);
|
||||
hwloc_bitmap_set_range(cpuset, 104, 111);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xc0 && busid->bus <= 0xc1) {
|
||||
hwloc_bitmap_set_range(cpuset, 48, 55);
|
||||
hwloc_bitmap_set_range(cpuset, 112, 119);
|
||||
return 1;
|
||||
}
|
||||
if (busid->bus >= 0xc4 && busid->bus <= 0xc6) {
|
||||
hwloc_bitmap_set_range(cpuset, 56, 63);
|
||||
hwloc_bitmap_set_range(cpuset, 120, 127);
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static struct hwloc_obj *
|
||||
@@ -453,7 +541,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
|
||||
hwloc_bitmap_t cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_obj_t parent;
|
||||
int forced = 0;
|
||||
int noquirks = 0;
|
||||
int noquirks = 0, got_quirked = 0;
|
||||
unsigned i;
|
||||
int err;
|
||||
|
||||
@@ -486,7 +574,8 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
|
||||
if (env) {
|
||||
static int reported = 0;
|
||||
if (!topology->pci_has_forced_locality && !reported) {
|
||||
fprintf(stderr, "Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
|
||||
if (HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc/pci: Environment variable %s is deprecated, please use HWLOC_PCI_LOCALITY instead.\n", env);
|
||||
reported = 1;
|
||||
}
|
||||
if (*env) {
|
||||
@@ -500,7 +589,13 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
|
||||
}
|
||||
}
|
||||
|
||||
if (!forced) {
|
||||
if (!forced && !noquirks && topology->pci_locality_quirks /* either quirks are unknown yet, or some are enabled */) {
|
||||
err = hwloc__pci_find_busid_parent_quirk(topology, busid, cpuset);
|
||||
if (err > 0)
|
||||
got_quirked = 1;
|
||||
}
|
||||
|
||||
if (!forced && !got_quirked) {
|
||||
/* get the cpuset by asking the backend that provides the relevant hook, if any. */
|
||||
struct hwloc_backend *backend = topology->get_pci_busid_cpuset_backend;
|
||||
if (backend)
|
||||
@@ -515,11 +610,7 @@ hwloc__pci_find_busid_parent(struct hwloc_topology *topology, struct hwloc_pcide
|
||||
hwloc_debug_bitmap(" will attach PCI bus to cpuset %s\n", cpuset);
|
||||
|
||||
parent = hwloc_find_insert_io_parent_by_complete_cpuset(topology, cpuset);
|
||||
if (parent) {
|
||||
if (!noquirks)
|
||||
/* We found a valid parent. Check that the OS didn't report invalid locality */
|
||||
parent = hwloc_pci_fixup_busid_parent(topology, busid, parent);
|
||||
} else {
|
||||
if (!parent) {
|
||||
/* Fallback to root */
|
||||
parent = hwloc_get_root_obj(topology);
|
||||
}
|
||||
@@ -565,7 +656,7 @@ hwloc_pcidisc_tree_attach(struct hwloc_topology *topology, struct hwloc_obj *tre
|
||||
assert(pciobj->type == HWLOC_OBJ_PCI_DEVICE
|
||||
|| (pciobj->type == HWLOC_OBJ_BRIDGE && pciobj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI));
|
||||
|
||||
if (obj->type == HWLOC_OBJ_BRIDGE) {
|
||||
if (obj->type == HWLOC_OBJ_BRIDGE && obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
|
||||
domain = obj->attr->bridge.downstream.pci.domain;
|
||||
bus_min = obj->attr->bridge.downstream.pci.secondary_bus;
|
||||
bus_max = obj->attr->bridge.downstream.pci.subordinate_bus;
|
||||
@@ -795,26 +886,12 @@ hwloc_pcidisc_find_linkspeed(const unsigned char *config,
|
||||
unsigned offset, float *linkspeed)
|
||||
{
|
||||
unsigned linksta, speed, width;
|
||||
float lanespeed;
|
||||
|
||||
memcpy(&linksta, &config[offset + HWLOC_PCI_EXP_LNKSTA], 4);
|
||||
speed = linksta & HWLOC_PCI_EXP_LNKSTA_SPEED; /* PCIe generation */
|
||||
width = (linksta & HWLOC_PCI_EXP_LNKSTA_WIDTH) >> 4; /* how many lanes */
|
||||
/* PCIe Gen1 = 2.5GT/s signal-rate per lane with 8/10 encoding = 0.25GB/s data-rate per lane
|
||||
* PCIe Gen2 = 5 GT/s signal-rate per lane with 8/10 encoding = 0.5 GB/s data-rate per lane
|
||||
* PCIe Gen3 = 8 GT/s signal-rate per lane with 128/130 encoding = 1 GB/s data-rate per lane
|
||||
* PCIe Gen4 = 16 GT/s signal-rate per lane with 128/130 encoding = 2 GB/s data-rate per lane
|
||||
* PCIe Gen5 = 32 GT/s signal-rate per lane with 128/130 encoding = 4 GB/s data-rate per lane
|
||||
*/
|
||||
|
||||
/* lanespeed in Gbit/s */
|
||||
if (speed <= 2)
|
||||
lanespeed = 2.5f * speed * 0.8f;
|
||||
else
|
||||
lanespeed = 8.0f * (1<<(speed-3)) * 128/130; /* assume Gen6 will be 64 GT/s and so on */
|
||||
|
||||
/* linkspeed in GB/s */
|
||||
*linkspeed = lanespeed * width / 8;
|
||||
*linkspeed = hwloc__pci_link_speed(speed, width);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -938,6 +1015,7 @@ hwloc_pci_class_string(unsigned short class_id)
|
||||
switch (class_id) {
|
||||
case 0x0500: return "RAM";
|
||||
case 0x0501: return "Flash";
|
||||
case 0x0502: return "CXLMem";
|
||||
}
|
||||
return "Memory";
|
||||
case 0x06:
|
||||
|
||||
11
src/3rdparty/hwloc/src/shmem.c
vendored
11
src/3rdparty/hwloc/src/shmem.c
vendored
@@ -23,6 +23,7 @@ struct hwloc_shmem_header {
|
||||
uint32_t header_length; /* where the actual topology starts in the file/mapping */
|
||||
uint64_t mmap_address; /* virtual address to pass to mmap */
|
||||
uint64_t mmap_length; /* length to pass to mmap (includes the header) */
|
||||
/* we will pad the end to a multiple of pointer size so that the topology is well aligned */
|
||||
};
|
||||
|
||||
#define HWLOC_SHMEM_MALLOC_ALIGN 8UL
|
||||
@@ -85,6 +86,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
hwloc_topology_t new;
|
||||
struct hwloc_tma tma;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
@@ -100,7 +102,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
|
||||
header.header_version = HWLOC_SHMEM_HEADER_VERSION;
|
||||
header.header_length = sizeof(header);
|
||||
header.header_length = header_length;
|
||||
header.mmap_address = (uintptr_t) mmap_address;
|
||||
header.mmap_length = length;
|
||||
|
||||
@@ -127,7 +129,7 @@ hwloc_shmem_topology_write(hwloc_topology_t topology,
|
||||
|
||||
tma.malloc = tma_shmem_malloc;
|
||||
tma.dontfree = 1;
|
||||
tma.data = (char *)mmap_res + sizeof(header);
|
||||
tma.data = (char *)mmap_res + header_length;
|
||||
err = hwloc__topology_dup(&new, topology, &tma);
|
||||
if (err < 0)
|
||||
return err;
|
||||
@@ -154,6 +156,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
{
|
||||
hwloc_topology_t new, old;
|
||||
struct hwloc_shmem_header header;
|
||||
uint32_t header_length = (sizeof(header) + sizeof(void*) - 1) & ~(sizeof(void*) - 1); /* pad to a multiple of pointer size */
|
||||
void *mmap_res;
|
||||
int err;
|
||||
|
||||
@@ -171,7 +174,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
return -1;
|
||||
|
||||
if (header.header_version != HWLOC_SHMEM_HEADER_VERSION
|
||||
|| header.header_length != sizeof(header)
|
||||
|| header.header_length != header_length
|
||||
|| header.mmap_address != (uintptr_t) mmap_address
|
||||
|| header.mmap_length != length) {
|
||||
errno = EINVAL;
|
||||
@@ -186,7 +189,7 @@ hwloc_shmem_topology_adopt(hwloc_topology_t *topologyp,
|
||||
goto out_with_mmap;
|
||||
}
|
||||
|
||||
old = (hwloc_topology_t)((char*)mmap_address + sizeof(header));
|
||||
old = (hwloc_topology_t)((char*)mmap_address + header_length);
|
||||
if (hwloc_topology_abi_check(old) < 0) {
|
||||
errno = EINVAL;
|
||||
goto out_with_mmap;
|
||||
|
||||
174
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
174
src/3rdparty/hwloc/src/topology-synthetic.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -23,6 +23,7 @@ struct hwloc_synthetic_attr_s {
|
||||
unsigned depth; /* For caches/groups */
|
||||
hwloc_obj_cache_type_t cachetype; /* For caches */
|
||||
hwloc_uint64_t memorysize; /* For caches/memory */
|
||||
hwloc_uint64_t memorysidecachesize; /* Single level of memory-side-cache in-front of a NUMA node */
|
||||
};
|
||||
|
||||
struct hwloc_synthetic_indexes_s {
|
||||
@@ -323,17 +324,29 @@ hwloc_synthetic_parse_memory_attr(const char *attr, const char **endp)
|
||||
hwloc_uint64_t size;
|
||||
size = strtoull(attr, (char **) &endptr, 0);
|
||||
if (!hwloc_strncasecmp(endptr, "TB", 2)) {
|
||||
size *= 1000ULL*1000ULL*1000ULL*1000ULL;
|
||||
endptr += 2;
|
||||
} else if (!hwloc_strncasecmp(endptr, "TiB", 3)) {
|
||||
size <<= 40;
|
||||
endptr += 2;
|
||||
endptr += 3;
|
||||
} else if (!hwloc_strncasecmp(endptr, "GB", 2)) {
|
||||
size *= 1000ULL*1000ULL*1000ULL;
|
||||
endptr += 2;
|
||||
} else if (!hwloc_strncasecmp(endptr, "GiB", 3)) {
|
||||
size <<= 30;
|
||||
endptr += 2;
|
||||
endptr += 3;
|
||||
} else if (!hwloc_strncasecmp(endptr, "MB", 2)) {
|
||||
size *= 1000ULL*1000ULL;
|
||||
endptr += 2;
|
||||
} else if (!hwloc_strncasecmp(endptr, "MiB", 3)) {
|
||||
size <<= 20;
|
||||
endptr += 2;
|
||||
endptr += 3;
|
||||
} else if (!hwloc_strncasecmp(endptr, "kB", 2)) {
|
||||
size <<= 10;
|
||||
size *= 1000ULL;
|
||||
endptr += 2;
|
||||
} else if (!hwloc_strncasecmp(endptr, "kiB", 3)) {
|
||||
size <<= 10;
|
||||
endptr += 3;
|
||||
}
|
||||
*endp = endptr;
|
||||
return size;
|
||||
@@ -368,6 +381,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
||||
} else if (!iscache && !strncmp("memory=", attrs, 7)) {
|
||||
memorysize = hwloc_synthetic_parse_memory_attr(attrs+7, &attrs);
|
||||
|
||||
} else if (!strncmp("memorysidecachesize=", attrs, 20)) {
|
||||
sattr->memorysidecachesize = hwloc_synthetic_parse_memory_attr(attrs+20, &attrs);
|
||||
|
||||
} else if (!strncmp("indexes=", attrs, 8)) {
|
||||
index_string = attrs+8;
|
||||
attrs += 8;
|
||||
@@ -375,10 +391,9 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
||||
attrs += index_string_length;
|
||||
|
||||
} else {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Unknown attribute at '%s'\n", attrs);
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
size_t length = strcspn(attrs, " )");
|
||||
fprintf(stderr, "hwloc/synthetic: Ignoring unknown attribute at '%s'\n", attrs);
|
||||
attrs += length;
|
||||
}
|
||||
|
||||
if (' ' == *attrs)
|
||||
@@ -404,6 +419,32 @@ hwloc_synthetic_parse_attrs(const char *attrs, const char **next_posp,
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_synthetic_set_default_attrs(struct hwloc_synthetic_attr_s *sattr,
|
||||
int *type_count)
|
||||
{
|
||||
hwloc_obj_type_t type = sattr->type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (sattr->depth == (unsigned)-1)
|
||||
sattr->depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!sattr->memorysize) {
|
||||
if (1 == sattr->depth)
|
||||
/* 32KiB in L1 */
|
||||
sattr->memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MiB for L2, unified */
|
||||
sattr->memorysize = 256ULL*1024 << (2*sattr->depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !sattr->memorysize) {
|
||||
/* 1GiB in memory nodes. */
|
||||
sattr->memorysize = 1024*1024*1024;
|
||||
}
|
||||
}
|
||||
|
||||
/* frees level until arity = 0 */
|
||||
static void
|
||||
hwloc_synthetic_free_levels(struct hwloc_synthetic_backend_data_s *data)
|
||||
@@ -453,6 +494,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
data->level[0].indexes.string = NULL;
|
||||
data->level[0].indexes.array = NULL;
|
||||
data->level[0].attr.memorysize = 0;
|
||||
data->level[0].attr.memorysidecachesize = 0;
|
||||
data->level[0].attached = NULL;
|
||||
type_count[HWLOC_OBJ_MACHINE] = 1;
|
||||
if (*description == '(') {
|
||||
@@ -502,6 +544,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
if (attached) {
|
||||
attached->attr.type = type;
|
||||
attached->attr.memorysize = 0;
|
||||
attached->attr.memorysidecachesize = 0;
|
||||
/* attached->attr.depth and .cachetype unused */
|
||||
attached->next = NULL;
|
||||
pprev = &data->level[count-1].attached;
|
||||
@@ -589,7 +632,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
}
|
||||
if (!item) {
|
||||
if (verbose)
|
||||
fprintf(stderr,"Synthetic string with disallow 0 number of objects at '%s'\n", pos);
|
||||
fprintf(stderr,"Synthetic string with disallowed 0 number of objects at '%s'\n", pos);
|
||||
errno = EINVAL;
|
||||
goto error;
|
||||
}
|
||||
@@ -599,6 +642,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
data->level[count].indexes.string = NULL;
|
||||
data->level[count].indexes.array = NULL;
|
||||
data->level[count].attr.memorysize = 0;
|
||||
data->level[count].attr.memorysidecachesize = 0;
|
||||
if (*next_pos == '(') {
|
||||
err = hwloc_synthetic_parse_attrs(next_pos+1, &next_pos, &data->level[count].attr, &data->level[count].indexes, verbose);
|
||||
if (err < 0)
|
||||
@@ -784,6 +828,7 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
data->level[1].indexes.string = NULL;
|
||||
data->level[1].indexes.array = NULL;
|
||||
data->level[1].attr.memorysize = 0;
|
||||
data->level[1].attr.memorysidecachesize = 0;
|
||||
data->level[1].totalwidth = data->level[0].totalwidth;
|
||||
/* update arity to insert a single NUMA node per parent */
|
||||
data->level[1].arity = data->level[0].arity;
|
||||
@@ -791,30 +836,14 @@ hwloc_backend_synthetic_init(struct hwloc_synthetic_backend_data_s *data,
|
||||
count++;
|
||||
}
|
||||
|
||||
/* set default attributes that depend on the depth/hierarchy of levels */
|
||||
for (i=0; i<count; i++) {
|
||||
struct hwloc_synthetic_attached_s *attached;
|
||||
struct hwloc_synthetic_level_data_s *curlevel = &data->level[i];
|
||||
hwloc_obj_type_t type = curlevel->attr.type;
|
||||
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
if (curlevel->attr.depth == (unsigned)-1)
|
||||
curlevel->attr.depth = type_count[HWLOC_OBJ_GROUP]--;
|
||||
|
||||
} else if (hwloc__obj_type_is_cache(type)) {
|
||||
if (!curlevel->attr.memorysize) {
|
||||
if (1 == curlevel->attr.depth)
|
||||
/* 32Kb in L1 */
|
||||
curlevel->attr.memorysize = 32*1024;
|
||||
else
|
||||
/* *4 at each level, starting from 1MB for L2, unified */
|
||||
curlevel->attr.memorysize = 256ULL*1024 << (2*curlevel->attr.depth);
|
||||
}
|
||||
|
||||
} else if (type == HWLOC_OBJ_NUMANODE && !curlevel->attr.memorysize) {
|
||||
/* 1GB in memory nodes. */
|
||||
curlevel->attr.memorysize = 1024*1024*1024;
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->level[i].indexes, data->level[i].totalwidth, verbose);
|
||||
hwloc_synthetic_set_default_attrs(&curlevel->attr, type_count);
|
||||
for(attached = curlevel->attached; attached != NULL; attached = attached->next)
|
||||
hwloc_synthetic_set_default_attrs(&attached->attr, type_count);
|
||||
hwloc_synthetic_process_indexes(data, &curlevel->indexes, curlevel->totalwidth, verbose);
|
||||
}
|
||||
|
||||
hwloc_synthetic_process_indexes(data, &data->numa_attached_indexes, data->numa_attached_nr, verbose);
|
||||
@@ -847,6 +876,12 @@ hwloc_synthetic_set_attr(struct hwloc_synthetic_attr_s *sattr,
|
||||
obj->attr->numanode.page_types[0].size = 4096;
|
||||
obj->attr->numanode.page_types[0].count = sattr->memorysize / 4096;
|
||||
break;
|
||||
case HWLOC_OBJ_MEMCACHE:
|
||||
obj->attr->cache.depth = 1;
|
||||
obj->attr->cache.linesize = 64;
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
|
||||
obj->attr->cache.size = sattr->memorysidecachesize;
|
||||
break;
|
||||
case HWLOC_OBJ_PACKAGE:
|
||||
case HWLOC_OBJ_DIE:
|
||||
break;
|
||||
@@ -914,6 +949,14 @@ hwloc_synthetic_insert_attached(struct hwloc_topology *topology,
|
||||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, child, "synthetic:attached");
|
||||
|
||||
if (attached->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(child->nodeset);
|
||||
hwloc_synthetic_set_attr(&attached->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:attached:mscache");
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, attached->next, set);
|
||||
}
|
||||
|
||||
@@ -965,6 +1008,14 @@ hwloc__look_synthetic(struct hwloc_topology *topology,
|
||||
hwloc_synthetic_set_attr(&curlevel->attr, obj);
|
||||
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "synthetic");
|
||||
|
||||
if (type == HWLOC_OBJ_NUMANODE && curlevel->attr.memorysidecachesize) {
|
||||
hwloc_obj_t mscachechild = hwloc_alloc_setup_object(topology, HWLOC_OBJ_MEMCACHE, HWLOC_UNKNOWN_INDEX);
|
||||
mscachechild->cpuset = hwloc_bitmap_dup(set);
|
||||
mscachechild->nodeset = hwloc_bitmap_dup(obj->nodeset);
|
||||
hwloc_synthetic_set_attr(&curlevel->attr, mscachechild);
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, mscachechild, "synthetic:mscache");
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_synthetic_insert_attached(topology, data, curlevel->attached, set);
|
||||
@@ -1205,6 +1256,7 @@ hwloc__export_synthetic_indexes(hwloc_obj_t *level, unsigned total,
|
||||
|
||||
static int
|
||||
hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
unsigned long flags,
|
||||
hwloc_obj_t obj,
|
||||
char *buffer, size_t buflen)
|
||||
{
|
||||
@@ -1212,6 +1264,7 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
const char * prefix = "(";
|
||||
char cachesize[64] = "";
|
||||
char memsize[64] = "";
|
||||
char memorysidecachesize[64] = "";
|
||||
int needindexes = 0;
|
||||
|
||||
if (hwloc__obj_type_is_cache(obj->type) && obj->attr->cache.size) {
|
||||
@@ -1224,6 +1277,19 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
prefix, (unsigned long long) obj->attr->numanode.local_memory);
|
||||
prefix = separator;
|
||||
}
|
||||
if (obj->type == HWLOC_OBJ_NUMANODE && !(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1)) {
|
||||
hwloc_obj_t memorysidecache = obj->parent;
|
||||
hwloc_uint64_t size = 0;
|
||||
while (memorysidecache && memorysidecache->type == HWLOC_OBJ_MEMCACHE) {
|
||||
size += memorysidecache->attr->cache.size;
|
||||
memorysidecache = memorysidecache->parent;
|
||||
}
|
||||
if (size) {
|
||||
snprintf(memorysidecachesize, sizeof(memorysidecachesize), "%smemorysidecachesize=%llu",
|
||||
prefix, (unsigned long long) size);
|
||||
prefix = separator;
|
||||
}
|
||||
}
|
||||
if (!obj->logical_index /* only display indexes once per level (not for non-first NUMA children, etc.) */
|
||||
&& (obj->type == HWLOC_OBJ_PU || obj->type == HWLOC_OBJ_NUMANODE)) {
|
||||
hwloc_obj_t cur = obj;
|
||||
@@ -1235,12 +1301,12 @@ hwloc__export_synthetic_obj_attr(struct hwloc_topology * topology,
|
||||
cur = cur->next_cousin;
|
||||
}
|
||||
}
|
||||
if (*cachesize || *memsize || needindexes) {
|
||||
if (*cachesize || *memsize || *memorysidecachesize || needindexes) {
|
||||
ssize_t tmplen = buflen;
|
||||
char *tmp = buffer;
|
||||
int res, ret = 0;
|
||||
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s", cachesize, memsize, needindexes ? "" : ")");
|
||||
res = hwloc_snprintf(tmp, tmplen, "%s%s%s%s", cachesize, memsize, memorysidecachesize, needindexes ? "" : ")");
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
|
||||
@@ -1314,7 +1380,7 @@ hwloc__export_synthetic_obj(struct hwloc_topology * topology, unsigned long flag
|
||||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
}
|
||||
@@ -1339,7 +1405,7 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
||||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1: export a single NUMA child */
|
||||
if (parent->memory_arity > 1 || mchild->type != HWLOC_OBJ_NUMANODE) {
|
||||
if (parent->memory_arity > 1) {
|
||||
/* not supported */
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if multiple memory children are attached to the same location.\n");
|
||||
@@ -1350,6 +1416,9 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
||||
if (needprefix)
|
||||
hwloc__export_synthetic_add_char(&ret, &tmp, &tmplen, ' ');
|
||||
|
||||
/* ignore memcaches and export the NUMA node */
|
||||
while (mchild->type != HWLOC_OBJ_NUMANODE)
|
||||
mchild = mchild->memory_first_child;
|
||||
res = hwloc__export_synthetic_obj(topology, flags, mchild, 1, tmp, tmplen);
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
return -1;
|
||||
@@ -1357,16 +1426,25 @@ hwloc__export_synthetic_memory_children(struct hwloc_topology * topology, unsign
|
||||
}
|
||||
|
||||
while (mchild) {
|
||||
/* FIXME: really recurse to export memcaches and numanode,
|
||||
/* The core doesn't support shared memcache for now (because ACPI and Linux don't).
|
||||
* So, for each mchild here, recurse only in the first children at each level.
|
||||
*
|
||||
* FIXME: whenever supported by the core, really recurse to export memcaches and numanode,
|
||||
* but it requires clever parsing of [ memcache [numa] [numa] ] during import,
|
||||
* better attaching of things to describe the hierarchy.
|
||||
*/
|
||||
hwloc_obj_t numanode = mchild;
|
||||
/* only export the first NUMA node leaf of each memory child
|
||||
* FIXME: This assumes mscache aren't shared between nodes, that's true in current platforms
|
||||
/* Only export the first NUMA node leaf of each memory child.
|
||||
* Memcaches are ignored here, they will be summed and exported as a single attribute
|
||||
* of the NUMA node in hwloc__export_synthetic_obj().
|
||||
*/
|
||||
while (numanode && numanode->type != HWLOC_OBJ_NUMANODE) {
|
||||
assert(numanode->arity == 1);
|
||||
if (verbose && numanode->memory_arity > 1) {
|
||||
static int warned = 0;
|
||||
if (!warned)
|
||||
fprintf(stderr, "Ignoring non-first memory children at non-first level of memory hierarchy.\n");
|
||||
warned = 1;
|
||||
}
|
||||
numanode = numanode->memory_first_child;
|
||||
}
|
||||
assert(numanode); /* there's always a numanode at the bottom of the memory tree */
|
||||
@@ -1499,17 +1577,21 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
||||
|
||||
if (flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_V1) {
|
||||
/* v1 requires all NUMA at the same level */
|
||||
hwloc_obj_t node;
|
||||
hwloc_obj_t node, parent;
|
||||
signed pdepth;
|
||||
|
||||
node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0);
|
||||
assert(node);
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
pdepth = node->parent->depth;
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
pdepth = parent->depth;
|
||||
|
||||
while ((node = node->next_cousin) != NULL) {
|
||||
assert(hwloc__obj_type_is_normal(node->parent->type)); /* only depth-1 memory children for now */
|
||||
if (node->parent->depth != pdepth) {
|
||||
parent = node->parent;
|
||||
while (!hwloc__obj_type_is_normal(parent->type))
|
||||
parent = parent->parent;
|
||||
if (parent->depth != pdepth) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Cannot export to synthetic v1 if memory is attached to parents at different depths.\n");
|
||||
errno = EINVAL;
|
||||
@@ -1522,7 +1604,7 @@ hwloc_topology_export_synthetic(struct hwloc_topology * topology,
|
||||
|
||||
if (!(flags & HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS)) {
|
||||
/* obj attributes */
|
||||
res = hwloc__export_synthetic_obj_attr(topology, obj, tmp, tmplen);
|
||||
res = hwloc__export_synthetic_obj_attr(topology, flags, obj, tmp, tmplen);
|
||||
if (res > 0)
|
||||
needprefix = 1;
|
||||
if (hwloc__export_synthetic_update_status(&ret, &tmp, &tmplen, res) < 0)
|
||||
|
||||
450
src/3rdparty/hwloc/src/topology-windows.c
vendored
450
src/3rdparty/hwloc/src/topology-windows.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -11,7 +11,9 @@
|
||||
|
||||
#include "private/autogen/config.h"
|
||||
#include "hwloc.h"
|
||||
#include "hwloc/windows.h"
|
||||
#include "private/private.h"
|
||||
#include "private/windows.h" /* must be before windows.h */
|
||||
#include "private/debug.h"
|
||||
|
||||
#include <windows.h>
|
||||
@@ -64,26 +66,6 @@ typedef enum _LOGICAL_PROCESSOR_RELATIONSHIP {
|
||||
# endif /* HAVE_RELATIONPROCESSORPACKAGE */
|
||||
#endif /* HAVE_LOGICAL_PROCESSOR_RELATIONSHIP */
|
||||
|
||||
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION
|
||||
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION {
|
||||
ULONG_PTR ProcessorMask;
|
||||
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
|
||||
_ANONYMOUS_UNION
|
||||
union {
|
||||
struct {
|
||||
BYTE flags;
|
||||
} ProcessorCore;
|
||||
struct {
|
||||
DWORD NodeNumber;
|
||||
} NumaNode;
|
||||
CACHE_DESCRIPTOR Cache;
|
||||
ULONGLONG Reserved[2];
|
||||
} DUMMYUNIONNAME;
|
||||
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION;
|
||||
#endif
|
||||
|
||||
/* Extended interface, for group support */
|
||||
|
||||
#ifndef HAVE_GROUP_AFFINITY
|
||||
typedef struct _GROUP_AFFINITY {
|
||||
KAFFINITY Mask;
|
||||
@@ -92,35 +74,40 @@ typedef struct _GROUP_AFFINITY {
|
||||
} GROUP_AFFINITY, *PGROUP_AFFINITY;
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_PROCESSOR_RELATIONSHIP
|
||||
/* always use our own structure because the EfficiencyClass field didn't exist before Win10 */
|
||||
typedef struct HWLOC_PROCESSOR_RELATIONSHIP {
|
||||
BYTE Flags;
|
||||
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency, only available in Win10+ */
|
||||
BYTE EfficiencyClass; /* for RelationProcessorCore, higher means greater performance but less efficiency */
|
||||
BYTE Reserved[20];
|
||||
WORD GroupCount;
|
||||
GROUP_AFFINITY GroupMask[ANYSIZE_ARRAY];
|
||||
} PROCESSOR_RELATIONSHIP, *PPROCESSOR_RELATIONSHIP;
|
||||
#endif
|
||||
} HWLOC_PROCESSOR_RELATIONSHIP;
|
||||
|
||||
#ifndef HAVE_NUMA_NODE_RELATIONSHIP
|
||||
typedef struct _NUMA_NODE_RELATIONSHIP {
|
||||
/* always use our own structure because the GroupCount and GroupMasks fields didn't exist in some Win10 */
|
||||
typedef struct HWLOC_NUMA_NODE_RELATIONSHIP {
|
||||
DWORD NodeNumber;
|
||||
BYTE Reserved[20];
|
||||
GROUP_AFFINITY GroupMask;
|
||||
} NUMA_NODE_RELATIONSHIP, *PNUMA_NODE_RELATIONSHIP;
|
||||
#endif
|
||||
BYTE Reserved[18];
|
||||
WORD GroupCount;
|
||||
_ANONYMOUS_UNION
|
||||
union {
|
||||
GROUP_AFFINITY GroupMask;
|
||||
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
|
||||
} DUMMYUNIONNAME;
|
||||
} HWLOC_NUMA_NODE_RELATIONSHIP;
|
||||
|
||||
#ifndef HAVE_CACHE_RELATIONSHIP
|
||||
typedef struct _CACHE_RELATIONSHIP {
|
||||
typedef struct HWLOC_CACHE_RELATIONSHIP {
|
||||
BYTE Level;
|
||||
BYTE Associativity;
|
||||
WORD LineSize;
|
||||
DWORD CacheSize;
|
||||
PROCESSOR_CACHE_TYPE Type;
|
||||
BYTE Reserved[20];
|
||||
GROUP_AFFINITY GroupMask;
|
||||
} CACHE_RELATIONSHIP, *PCACHE_RELATIONSHIP;
|
||||
#endif
|
||||
BYTE Reserved[18];
|
||||
WORD GroupCount;
|
||||
union {
|
||||
GROUP_AFFINITY GroupMask;
|
||||
GROUP_AFFINITY GroupMasks[ANYSIZE_ARRAY];
|
||||
} DUMMYUNIONNAME;
|
||||
} HWLOC_CACHE_RELATIONSHIP;
|
||||
|
||||
#ifndef HAVE_PROCESSOR_GROUP_INFO
|
||||
typedef struct _PROCESSOR_GROUP_INFO {
|
||||
@@ -140,20 +127,19 @@ typedef struct _GROUP_RELATIONSHIP {
|
||||
} GROUP_RELATIONSHIP, *PGROUP_RELATIONSHIP;
|
||||
#endif
|
||||
|
||||
#ifndef HAVE_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX
|
||||
typedef struct _SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
|
||||
/* always use our own structure because we need our own HWLOC_PROCESSOR/CACHE/NUMA_NODE_RELATIONSHIP */
|
||||
typedef struct HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX {
|
||||
LOGICAL_PROCESSOR_RELATIONSHIP Relationship;
|
||||
DWORD Size;
|
||||
_ANONYMOUS_UNION
|
||||
union {
|
||||
PROCESSOR_RELATIONSHIP Processor;
|
||||
NUMA_NODE_RELATIONSHIP NumaNode;
|
||||
CACHE_RELATIONSHIP Cache;
|
||||
HWLOC_PROCESSOR_RELATIONSHIP Processor;
|
||||
HWLOC_NUMA_NODE_RELATIONSHIP NumaNode;
|
||||
HWLOC_CACHE_RELATIONSHIP Cache;
|
||||
GROUP_RELATIONSHIP Group;
|
||||
/* Odd: no member to tell the cpu mask of the package... */
|
||||
} DUMMYUNIONNAME;
|
||||
} SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX, *PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
|
||||
#endif
|
||||
} HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX;
|
||||
|
||||
#ifndef HAVE_PSAPI_WORKING_SET_EX_BLOCK
|
||||
typedef union _PSAPI_WORKING_SET_EX_BLOCK {
|
||||
@@ -190,9 +176,6 @@ typedef struct _PROCESSOR_NUMBER {
|
||||
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORGROUPCOUNT)(void);
|
||||
static PFN_GETACTIVEPROCESSORGROUPCOUNT GetActiveProcessorGroupCountProc;
|
||||
|
||||
static unsigned long nr_processor_groups = 1;
|
||||
static unsigned long max_numanode_index = 0;
|
||||
|
||||
typedef WORD (WINAPI *PFN_GETACTIVEPROCESSORCOUNT)(WORD);
|
||||
static PFN_GETACTIVEPROCESSORCOUNT GetActiveProcessorCountProc;
|
||||
|
||||
@@ -202,10 +185,7 @@ static PFN_GETCURRENTPROCESSORNUMBER GetCurrentProcessorNumberProc;
|
||||
typedef VOID (WINAPI *PFN_GETCURRENTPROCESSORNUMBEREX)(PPROCESSOR_NUMBER);
|
||||
static PFN_GETCURRENTPROCESSORNUMBEREX GetCurrentProcessorNumberExProc;
|
||||
|
||||
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATION)(PSYSTEM_LOGICAL_PROCESSOR_INFORMATION Buffer, PDWORD ReturnLength);
|
||||
static PFN_GETLOGICALPROCESSORINFORMATION GetLogicalProcessorInformationProc;
|
||||
|
||||
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX Buffer, PDWORD ReturnLength);
|
||||
typedef BOOL (WINAPI *PFN_GETLOGICALPROCESSORINFORMATIONEX)(LOGICAL_PROCESSOR_RELATIONSHIP relationship, HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *Buffer, PDWORD ReturnLength);
|
||||
static PFN_GETLOGICALPROCESSORINFORMATIONEX GetLogicalProcessorInformationExProc;
|
||||
|
||||
typedef BOOL (WINAPI *PFN_SETTHREADGROUPAFFINITY)(HANDLE hThread, const GROUP_AFFINITY *GroupAffinity, PGROUP_AFFINITY PreviousGroupAffinity);
|
||||
@@ -240,14 +220,12 @@ static void hwloc_win_get_function_ptrs(void)
|
||||
#pragma GCC diagnostic ignored "-Wcast-function-type"
|
||||
#endif
|
||||
|
||||
kernel32 = LoadLibrary("kernel32.dll");
|
||||
kernel32 = LoadLibrary(TEXT("kernel32.dll"));
|
||||
if (kernel32) {
|
||||
GetActiveProcessorGroupCountProc =
|
||||
(PFN_GETACTIVEPROCESSORGROUPCOUNT) GetProcAddress(kernel32, "GetActiveProcessorGroupCount");
|
||||
GetActiveProcessorCountProc =
|
||||
(PFN_GETACTIVEPROCESSORCOUNT) GetProcAddress(kernel32, "GetActiveProcessorCount");
|
||||
GetLogicalProcessorInformationProc =
|
||||
(PFN_GETLOGICALPROCESSORINFORMATION) GetProcAddress(kernel32, "GetLogicalProcessorInformation");
|
||||
GetCurrentProcessorNumberProc =
|
||||
(PFN_GETCURRENTPROCESSORNUMBER) GetProcAddress(kernel32, "GetCurrentProcessorNumber");
|
||||
GetCurrentProcessorNumberExProc =
|
||||
@@ -270,16 +248,13 @@ static void hwloc_win_get_function_ptrs(void)
|
||||
(PFN_VIRTUALFREEEX) GetProcAddress(kernel32, "VirtualFreeEx");
|
||||
}
|
||||
|
||||
if (GetActiveProcessorGroupCountProc)
|
||||
nr_processor_groups = GetActiveProcessorGroupCountProc();
|
||||
|
||||
if (!QueryWorkingSetExProc) {
|
||||
HMODULE psapi = LoadLibrary("psapi.dll");
|
||||
HMODULE psapi = LoadLibrary(TEXT("psapi.dll"));
|
||||
if (psapi)
|
||||
QueryWorkingSetExProc = (PFN_QUERYWORKINGSETEX) GetProcAddress(psapi, "QueryWorkingSetEx");
|
||||
}
|
||||
|
||||
ntdll = GetModuleHandle("ntdll");
|
||||
ntdll = GetModuleHandle(TEXT("ntdll"));
|
||||
RtlGetVersionProc = (PFN_RTLGETVERSION) GetProcAddress(ntdll, "RtlGetVersion");
|
||||
|
||||
#if HWLOC_HAVE_GCC_W_CAST_FUNCTION_TYPE
|
||||
@@ -363,6 +338,173 @@ static int hwloc_bitmap_to_single_ULONG_PTR(hwloc_const_bitmap_t set, unsigned *
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**********************
|
||||
* Processor Groups
|
||||
*/
|
||||
|
||||
static unsigned long max_numanode_index = 0;
|
||||
|
||||
static unsigned long nr_processor_groups = 1;
|
||||
static hwloc_cpuset_t * processor_group_cpusets = NULL;
|
||||
|
||||
static void
|
||||
hwloc_win_get_processor_groups(void)
|
||||
{
|
||||
HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
|
||||
DWORD length;
|
||||
unsigned i;
|
||||
|
||||
hwloc_debug("querying windows processor groups\n");
|
||||
|
||||
if (!GetLogicalProcessorInformationExProc)
|
||||
goto error;
|
||||
|
||||
nr_processor_groups = GetActiveProcessorGroupCountProc();
|
||||
if (!nr_processor_groups)
|
||||
goto error;
|
||||
|
||||
hwloc_debug("found %lu windows processor groups\n", nr_processor_groups);
|
||||
|
||||
if (nr_processor_groups > 1 && SIZEOF_VOID_P == 4) {
|
||||
if (HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc/windows: multiple processor groups found on 32bits Windows, topology may be invalid/incomplete.\n");
|
||||
}
|
||||
|
||||
length = 0;
|
||||
procInfoTotal = NULL;
|
||||
|
||||
while (1) {
|
||||
if (GetLogicalProcessorInformationExProc(RelationGroup, procInfoTotal, &length))
|
||||
break;
|
||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
||||
goto error;
|
||||
tmpprocInfoTotal = realloc(procInfoTotal, length);
|
||||
if (!tmpprocInfoTotal)
|
||||
goto error_with_procinfo;
|
||||
procInfoTotal = tmpprocInfoTotal;
|
||||
}
|
||||
|
||||
processor_group_cpusets = calloc(nr_processor_groups, sizeof(*processor_group_cpusets));
|
||||
if (!processor_group_cpusets)
|
||||
goto error_with_procinfo;
|
||||
|
||||
for (procInfo = procInfoTotal;
|
||||
(void*) procInfo < (void*) ((uintptr_t) procInfoTotal + length);
|
||||
procInfo = (void*) ((uintptr_t) procInfo + procInfo->Size)) {
|
||||
unsigned id;
|
||||
|
||||
assert(procInfo->Relationship == RelationGroup);
|
||||
|
||||
hwloc_debug("Found %u active windows processor groups\n",
|
||||
(unsigned) procInfo->Group.ActiveGroupCount);
|
||||
for (id = 0; id < procInfo->Group.ActiveGroupCount; id++) {
|
||||
KAFFINITY mask;
|
||||
hwloc_bitmap_t set;
|
||||
|
||||
set = hwloc_bitmap_alloc();
|
||||
if (!set)
|
||||
goto error_with_cpusets;
|
||||
|
||||
mask = procInfo->Group.GroupInfo[id].ActiveProcessorMask;
|
||||
hwloc_debug("group %u with %u cpus mask 0x%llx\n", id,
|
||||
(unsigned) procInfo->Group.GroupInfo[id].ActiveProcessorCount, (unsigned long long) mask);
|
||||
/* KAFFINITY is ULONG_PTR */
|
||||
hwloc_bitmap_set_ith_ULONG_PTR(set, id, mask);
|
||||
/* FIXME: what if running 32bits on a 64bits windows with 64-processor groups?
|
||||
* ULONG_PTR is 32bits, so half the group is invisible?
|
||||
* maybe scale id to id*8/sizeof(ULONG_PTR) so that groups are 64-PU aligned?
|
||||
*/
|
||||
hwloc_debug_2args_bitmap("group %u %d bitmap %s\n", id, procInfo->Group.GroupInfo[id].ActiveProcessorCount, set);
|
||||
processor_group_cpusets[id] = set;
|
||||
}
|
||||
}
|
||||
|
||||
free(procInfoTotal);
|
||||
return;
|
||||
|
||||
error_with_cpusets:
|
||||
for(i=0; i<nr_processor_groups; i++) {
|
||||
if (processor_group_cpusets[i])
|
||||
hwloc_bitmap_free(processor_group_cpusets[i]);
|
||||
}
|
||||
free(processor_group_cpusets);
|
||||
processor_group_cpusets = NULL;
|
||||
error_with_procinfo:
|
||||
free(procInfoTotal);
|
||||
error:
|
||||
/* on error set nr to 1 and keep cpusets NULL. We'll use the topology cpuset whenever needed */
|
||||
nr_processor_groups = 1;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_win_free_processor_groups(void)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=0; i<nr_processor_groups; i++) {
|
||||
if (processor_group_cpusets[i])
|
||||
hwloc_bitmap_free(processor_group_cpusets[i]);
|
||||
}
|
||||
free(processor_group_cpusets);
|
||||
processor_group_cpusets = NULL;
|
||||
nr_processor_groups = 1;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
hwloc_windows_get_nr_processor_groups(hwloc_topology_t topology, unsigned long flags)
|
||||
{
|
||||
if (!topology->is_loaded || !topology->is_thissystem) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return nr_processor_groups;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_windows_get_processor_group_cpuset(hwloc_topology_t topology, unsigned pg_index, hwloc_cpuset_t cpuset, unsigned long flags)
|
||||
{
|
||||
if (!topology->is_loaded || !topology->is_thissystem) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!cpuset) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (pg_index >= nr_processor_groups) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (!processor_group_cpusets) {
|
||||
assert(nr_processor_groups == 1);
|
||||
/* we found no processor groups, return the entire topology as a single one */
|
||||
hwloc_bitmap_copy(cpuset, topology->levels[0][0]->cpuset);
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (!processor_group_cpusets[pg_index]) {
|
||||
errno = ENOENT;
|
||||
return -1;
|
||||
}
|
||||
|
||||
hwloc_bitmap_copy(cpuset, processor_group_cpusets[pg_index]);
|
||||
return 0;
|
||||
}
|
||||
|
||||
/**************************************************************
|
||||
* hwloc PU numbering with respect to Windows processor groups
|
||||
*
|
||||
@@ -845,9 +987,15 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
OSVERSIONINFOEX osvi;
|
||||
char versionstr[20];
|
||||
char hostname[122] = "";
|
||||
unsigned hostname_size = sizeof(hostname);
|
||||
#if !defined(__CYGWIN__)
|
||||
DWORD hostname_size = sizeof(hostname);
|
||||
#else
|
||||
size_t hostname_size = sizeof(hostname);
|
||||
#endif
|
||||
int has_efficiencyclass = 0;
|
||||
struct hwloc_win_efficiency_classes eclasses;
|
||||
char *env = getenv("HWLOC_WINDOWS_PROCESSOR_GROUP_OBJS");
|
||||
int keep_pgroup_objs = (env && atoi(env));
|
||||
|
||||
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
|
||||
|
||||
@@ -878,137 +1026,8 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
|
||||
GetSystemInfo(&SystemInfo);
|
||||
|
||||
if (!GetLogicalProcessorInformationExProc && GetLogicalProcessorInformationProc) {
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION procInfo, tmpprocInfo;
|
||||
unsigned id;
|
||||
unsigned i;
|
||||
struct hwloc_obj *obj;
|
||||
hwloc_obj_type_t type;
|
||||
|
||||
length = 0;
|
||||
procInfo = NULL;
|
||||
|
||||
while (1) {
|
||||
if (GetLogicalProcessorInformationProc(procInfo, &length))
|
||||
break;
|
||||
if (GetLastError() != ERROR_INSUFFICIENT_BUFFER)
|
||||
return -1;
|
||||
tmpprocInfo = realloc(procInfo, length);
|
||||
if (!tmpprocInfo) {
|
||||
free(procInfo);
|
||||
goto out;
|
||||
}
|
||||
procInfo = tmpprocInfo;
|
||||
}
|
||||
|
||||
assert(!length || procInfo);
|
||||
|
||||
for (i = 0; i < length / sizeof(*procInfo); i++) {
|
||||
|
||||
/* Ignore unknown caches */
|
||||
if (procInfo->Relationship == RelationCache
|
||||
&& procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
continue;
|
||||
|
||||
id = HWLOC_UNKNOWN_INDEX;
|
||||
switch (procInfo[i].Relationship) {
|
||||
case RelationNumaNode:
|
||||
type = HWLOC_OBJ_NUMANODE;
|
||||
id = procInfo[i].NumaNode.NodeNumber;
|
||||
gotnuma++;
|
||||
if (id > max_numanode_index)
|
||||
max_numanode_index = id;
|
||||
break;
|
||||
case RelationProcessorPackage:
|
||||
type = HWLOC_OBJ_PACKAGE;
|
||||
break;
|
||||
case RelationCache:
|
||||
type = (procInfo[i].Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo[i].Cache.Level - 1;
|
||||
break;
|
||||
case RelationProcessorCore:
|
||||
type = HWLOC_OBJ_CORE;
|
||||
break;
|
||||
case RelationGroup:
|
||||
default:
|
||||
type = HWLOC_OBJ_GROUP;
|
||||
break;
|
||||
}
|
||||
|
||||
if (!hwloc_filter_check_keep_object_type(topology, type))
|
||||
continue;
|
||||
|
||||
obj = hwloc_alloc_setup_object(topology, type, id);
|
||||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
hwloc_debug("%s#%u mask %llx\n", hwloc_obj_type_string(type), id, (unsigned long long) procInfo[i].ProcessorMask);
|
||||
/* ProcessorMask is a ULONG_PTR */
|
||||
hwloc_bitmap_set_ith_ULONG_PTR(obj->cpuset, 0, procInfo[i].ProcessorMask);
|
||||
hwloc_debug_2args_bitmap("%s#%u bitmap %s\n", hwloc_obj_type_string(type), id, obj->cpuset);
|
||||
|
||||
switch (type) {
|
||||
case HWLOC_OBJ_NUMANODE:
|
||||
{
|
||||
ULONGLONG avail;
|
||||
obj->nodeset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_set(obj->nodeset, id);
|
||||
if ((GetNumaAvailableMemoryNodeExProc && GetNumaAvailableMemoryNodeExProc(id, &avail))
|
||||
|| (GetNumaAvailableMemoryNodeProc && GetNumaAvailableMemoryNodeProc(id, &avail))) {
|
||||
obj->attr->numanode.local_memory = avail;
|
||||
gotnumamemory++;
|
||||
}
|
||||
obj->attr->numanode.page_types_len = 2;
|
||||
obj->attr->numanode.page_types = malloc(2 * sizeof(*obj->attr->numanode.page_types));
|
||||
memset(obj->attr->numanode.page_types, 0, 2 * sizeof(*obj->attr->numanode.page_types));
|
||||
obj->attr->numanode.page_types_len = 1;
|
||||
obj->attr->numanode.page_types[0].size = SystemInfo.dwPageSize;
|
||||
#if HAVE_DECL__SC_LARGE_PAGESIZE
|
||||
obj->attr->numanode.page_types_len++;
|
||||
obj->attr->numanode.page_types[1].size = sysconf(_SC_LARGE_PAGESIZE);
|
||||
#endif
|
||||
break;
|
||||
}
|
||||
case HWLOC_OBJ_L1CACHE:
|
||||
case HWLOC_OBJ_L2CACHE:
|
||||
case HWLOC_OBJ_L3CACHE:
|
||||
case HWLOC_OBJ_L4CACHE:
|
||||
case HWLOC_OBJ_L5CACHE:
|
||||
case HWLOC_OBJ_L1ICACHE:
|
||||
case HWLOC_OBJ_L2ICACHE:
|
||||
case HWLOC_OBJ_L3ICACHE:
|
||||
obj->attr->cache.size = procInfo[i].Cache.Size;
|
||||
obj->attr->cache.associativity = procInfo[i].Cache.Associativity == CACHE_FULLY_ASSOCIATIVE ? -1 : procInfo[i].Cache.Associativity ;
|
||||
obj->attr->cache.linesize = procInfo[i].Cache.LineSize;
|
||||
obj->attr->cache.depth = procInfo[i].Cache.Level;
|
||||
switch (procInfo->Cache.Type) {
|
||||
case CacheUnified:
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_UNIFIED;
|
||||
break;
|
||||
case CacheData:
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_DATA;
|
||||
break;
|
||||
case CacheInstruction:
|
||||
obj->attr->cache.type = HWLOC_OBJ_CACHE_INSTRUCTION;
|
||||
break;
|
||||
default:
|
||||
hwloc_free_unlinked_object(obj);
|
||||
continue;
|
||||
}
|
||||
break;
|
||||
case HWLOC_OBJ_GROUP:
|
||||
obj->attr->group.kind = procInfo[i].Relationship == RelationGroup ? HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP : HWLOC_GROUP_KIND_WINDOWS_RELATIONSHIP_UNKNOWN;
|
||||
break;
|
||||
default:
|
||||
break;
|
||||
}
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation");
|
||||
}
|
||||
|
||||
free(procInfo);
|
||||
}
|
||||
|
||||
if (GetLogicalProcessorInformationExProc) {
|
||||
PSYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX procInfoTotal, tmpprocInfoTotal, procInfo;
|
||||
HWLOC_SYSTEM_LOGICAL_PROCESSOR_INFORMATION_EX *procInfoTotal, *tmpprocInfoTotal, *procInfo;
|
||||
unsigned id;
|
||||
struct hwloc_obj *obj;
|
||||
hwloc_obj_type_t type;
|
||||
@@ -1036,19 +1055,31 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
unsigned efficiency_class = 0;
|
||||
GROUP_AFFINITY *GroupMask;
|
||||
|
||||
/* Ignore unknown caches */
|
||||
if (procInfo->Relationship == RelationCache
|
||||
&& procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
continue;
|
||||
if (procInfo->Relationship == RelationCache) {
|
||||
if (!topology->want_some_cpu_caches)
|
||||
/* TODO: check if RelationAll&~RelationCache works? */
|
||||
continue;
|
||||
if (procInfo->Cache.Type != CacheUnified
|
||||
&& procInfo->Cache.Type != CacheData
|
||||
&& procInfo->Cache.Type != CacheInstruction)
|
||||
/* Ignore unknown caches */
|
||||
continue;
|
||||
}
|
||||
|
||||
id = HWLOC_UNKNOWN_INDEX;
|
||||
switch (procInfo->Relationship) {
|
||||
case RelationNumaNode:
|
||||
type = HWLOC_OBJ_NUMANODE;
|
||||
num = 1;
|
||||
GroupMask = &procInfo->NumaNode.GroupMask;
|
||||
/* Starting with Windows 11 and Server 2022, the GroupCount field is valid and >=1
|
||||
* and we may read GroupMasks[]. Older releases have GroupCount==0 and we must read GroupMask.
|
||||
*/
|
||||
if (procInfo->NumaNode.GroupCount) {
|
||||
num = procInfo->NumaNode.GroupCount;
|
||||
GroupMask = procInfo->NumaNode.GroupMasks;
|
||||
} else {
|
||||
num = 1;
|
||||
GroupMask = &procInfo->NumaNode.GroupMask;
|
||||
}
|
||||
id = procInfo->NumaNode.NodeNumber;
|
||||
gotnuma++;
|
||||
if (id > max_numanode_index)
|
||||
@@ -1061,18 +1092,20 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
break;
|
||||
case RelationCache:
|
||||
type = (procInfo->Cache.Type == CacheInstruction ? HWLOC_OBJ_L1ICACHE : HWLOC_OBJ_L1CACHE) + procInfo->Cache.Level - 1;
|
||||
num = 1;
|
||||
GroupMask = &procInfo->Cache.GroupMask;
|
||||
/* GroupCount added approximately with NumaNode.GroupCount above */
|
||||
if (procInfo->Cache.GroupCount) {
|
||||
num = procInfo->Cache.GroupCount;
|
||||
GroupMask = procInfo->Cache.GroupMasks;
|
||||
} else {
|
||||
num = 1;
|
||||
GroupMask = &procInfo->Cache.GroupMask;
|
||||
}
|
||||
break;
|
||||
case RelationProcessorCore:
|
||||
type = HWLOC_OBJ_CORE;
|
||||
num = procInfo->Processor.GroupCount;
|
||||
GroupMask = procInfo->Processor.GroupMask;
|
||||
if (has_efficiencyclass)
|
||||
/* the EfficiencyClass field didn't exist before Windows10 and recent MSVC headers,
|
||||
* so just access it manually instead of trying to detect it.
|
||||
*/
|
||||
efficiency_class = * ((&procInfo->Processor.Flags) + 1);
|
||||
efficiency_class = procInfo->Processor.EfficiencyClass;
|
||||
break;
|
||||
case RelationGroup:
|
||||
/* So strange an interface... */
|
||||
@@ -1097,11 +1130,12 @@ hwloc_look_windows(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
groups_pu_set = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_or(groups_pu_set, groups_pu_set, set);
|
||||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||
/* Ignore processor groups unless requested and filtered-in */
|
||||
if (keep_pgroup_objs && hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||
obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, id);
|
||||
obj->cpuset = set;
|
||||
obj->attr->group.kind = HWLOC_GROUP_KIND_WINDOWS_PROCESSOR_GROUP;
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformation:ProcessorGroup");
|
||||
hwloc__insert_object_by_cpuset(topology, NULL, obj, "windows:GetLogicalProcessorInformationEx:ProcessorGroup");
|
||||
} else
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
@@ -1328,11 +1362,13 @@ hwloc_set_windows_hooks(struct hwloc_binding_hooks *hooks,
|
||||
static int hwloc_windows_component_init(unsigned long flags __hwloc_attribute_unused)
|
||||
{
|
||||
hwloc_win_get_function_ptrs();
|
||||
hwloc_win_get_processor_groups();
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hwloc_windows_component_finalize(unsigned long flags __hwloc_attribute_unused)
|
||||
{
|
||||
hwloc_win_free_processor_groups();
|
||||
}
|
||||
|
||||
static struct hwloc_backend *
|
||||
|
||||
422
src/3rdparty/hwloc/src/topology-x86.c
vendored
422
src/3rdparty/hwloc/src/topology-x86.c
vendored
@@ -1,17 +1,21 @@
|
||||
/*
|
||||
* Copyright © 2010-2021 Inria. All rights reserved.
|
||||
* Copyright © 2010-2024 Inria. All rights reserved.
|
||||
* Copyright © 2010-2013 Université Bordeaux
|
||||
* Copyright © 2010-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*
|
||||
*
|
||||
* This backend is only used when the operating system does not export
|
||||
* This backend is mostly used when the operating system does not export
|
||||
* the necessary hardware topology information to user-space applications.
|
||||
* Currently, only the FreeBSD backend relies on this x86 backend.
|
||||
* Currently, FreeBSD and NetBSD only add PUs and then fallback to this
|
||||
* backend for CPU/Cache discovery.
|
||||
*
|
||||
* Other backends such as Linux have their own way to retrieve various
|
||||
* pieces of hardware topology information from the operating system
|
||||
* on various architectures, without having to use this x86-specific code.
|
||||
* But this backend is still used after them to annotate some objects with
|
||||
* additional details (CPU info in Package, Inclusiveness in Caches).
|
||||
* It may also be enabled manually to work-around bugs in native OS discovery.
|
||||
*/
|
||||
|
||||
#include "private/autogen/config.h"
|
||||
@@ -35,6 +39,12 @@ struct hwloc_x86_backend_data_s {
|
||||
int apicid_unique;
|
||||
char *src_cpuiddump_path;
|
||||
int is_knl;
|
||||
int is_hybrid;
|
||||
int found_die_ids;
|
||||
int found_complex_ids;
|
||||
int found_unit_ids;
|
||||
int found_module_ids;
|
||||
int found_tile_ids;
|
||||
};
|
||||
|
||||
/************************************
|
||||
@@ -77,7 +87,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
||||
|
||||
cpuiddump = malloc(sizeof(*cpuiddump));
|
||||
if (!cpuiddump) {
|
||||
fprintf(stderr, "Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate cpuiddump for PU #%u, ignoring cpuiddump.\n", idx);
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -88,7 +98,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
||||
snprintf(filename, filenamelen, "%s/pu%u", dirpath, idx);
|
||||
file = fopen(filename, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
fprintf(stderr, "hwloc/x86: Could not read dumped cpuid file %s, ignoring cpuiddump.\n", filename);
|
||||
goto out_with_filename;
|
||||
}
|
||||
|
||||
@@ -97,7 +107,7 @@ cpuiddump_read(const char *dirpath, unsigned idx)
|
||||
nr++;
|
||||
cpuiddump->entries = malloc(nr * sizeof(struct cpuiddump_entry));
|
||||
if (!cpuiddump->entries) {
|
||||
fprintf(stderr, "Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
fprintf(stderr, "hwloc/x86: Failed to allocate %u cpuiddump entries for PU #%u, ignoring cpuiddump.\n", nr, idx);
|
||||
goto out_with_file;
|
||||
}
|
||||
|
||||
@@ -153,7 +163,7 @@ cpuiddump_find_by_input(unsigned *eax, unsigned *ebx, unsigned *ecx, unsigned *e
|
||||
return;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
fprintf(stderr, "hwloc/x86: Couldn't find %x,%x,%x,%x in dumped cpuid, returning 0s.\n",
|
||||
*eax, *ebx, *ecx, *edx);
|
||||
*eax = 0;
|
||||
*ebx = 0;
|
||||
@@ -207,7 +217,8 @@ struct procinfo {
|
||||
#define TILE 4
|
||||
#define MODULE 5
|
||||
#define DIE 6
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 7
|
||||
#define COMPLEX 7
|
||||
#define HWLOC_X86_PROCINFO_ID_NR 8
|
||||
unsigned ids[HWLOC_X86_PROCINFO_ID_NR];
|
||||
unsigned *otherids;
|
||||
unsigned levels;
|
||||
@@ -311,7 +322,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
||||
/* the code below doesn't want any other cache yet */
|
||||
assert(!infos->numcaches);
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
@@ -322,7 +333,7 @@ static void read_amd_caches_topoext(struct procinfo *infos, struct cpuiddump *sr
|
||||
|
||||
cache = infos->cache = malloc(infos->numcaches * sizeof(*infos->cache));
|
||||
if (cache) {
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x8000001d;
|
||||
ecx = cachenum;
|
||||
@@ -375,7 +386,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
||||
unsigned cachenum;
|
||||
struct cacheinfo *cache;
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
@@ -397,7 +408,7 @@ static void read_intel_caches(struct hwloc_x86_backend_data_s *data, struct proc
|
||||
infos->cache = tmpcaches;
|
||||
cache = &infos->cache[oldnumcaches];
|
||||
|
||||
for (cachenum = 0; ; cachenum++) {
|
||||
for (cachenum = 0; cachenum<16 /* guard */; cachenum++) {
|
||||
unsigned long linesize, linepart, ways, sets;
|
||||
eax = 0x04;
|
||||
ecx = cachenum;
|
||||
@@ -477,7 +488,7 @@ static void read_amd_cores_legacy(struct procinfo *infos, struct cpuiddump *src_
|
||||
}
|
||||
|
||||
/* AMD unit/node from CPUID 0x8000001e leaf (topoext) */
|
||||
static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags, struct cpuiddump *src_cpuiddump)
|
||||
static void read_amd_cores_topoext(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned long flags __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned apic_id, nodes_per_proc = 0;
|
||||
unsigned eax, ebx, ecx, edx;
|
||||
@@ -486,7 +497,6 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->apicid = apic_id = eax;
|
||||
|
||||
if (flags & HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES) {
|
||||
if (infos->cpufamilynumber == 0x16) {
|
||||
/* ecx is reserved */
|
||||
infos->ids[NODE] = 0;
|
||||
@@ -497,15 +507,16 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
||||
nodes_per_proc = ((ecx >> 8) & 7) + 1;
|
||||
}
|
||||
if ((infos->cpufamilynumber == 0x15 && nodes_per_proc > 2)
|
||||
|| ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)) {
|
||||
|| ((infos->cpufamilynumber == 0x17 || infos->cpufamilynumber == 0x18) && nodes_per_proc > 4)
|
||||
|| (infos->cpufamilynumber == 0x19 && nodes_per_proc > 1)) {
|
||||
hwloc_debug("warning: undefined nodes_per_proc value %u, assuming it means %u\n", nodes_per_proc, nodes_per_proc);
|
||||
}
|
||||
}
|
||||
|
||||
if (infos->cpufamilynumber <= 0x16) { /* topoext appeared in 0x15 and compute-units were only used in 0x15 and 0x16 */
|
||||
unsigned cores_per_unit;
|
||||
/* coreid was obtained from read_amd_cores_legacy() earlier */
|
||||
infos->ids[UNIT] = ebx & 0xff;
|
||||
data->found_unit_ids = 1;
|
||||
cores_per_unit = ((ebx >> 8) & 0xff) + 1;
|
||||
hwloc_debug("topoext %08x, %u nodes, node %u, %u cores in unit %u\n", apic_id, nodes_per_proc, infos->ids[NODE], cores_per_unit, infos->ids[UNIT]);
|
||||
/* coreid and unitid are package-wide (core 0-15 and unit 0-7 on 16-core 2-NUMAnode processor).
|
||||
@@ -520,19 +531,29 @@ static void read_amd_cores_topoext(struct procinfo *infos, unsigned long flags,
|
||||
}
|
||||
}
|
||||
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration) */
|
||||
static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf, struct cpuiddump *src_cpuiddump)
|
||||
/* Intel core/thread or even die/module/tile from CPUID 0x0b or 0x1f leaves (v1 and v2 extended topology enumeration)
|
||||
* or AMD core/thread or even complex/ccd from CPUID 0x0b or 0x80000026 (extended CPU topology)
|
||||
*/
|
||||
static void read_extended_topo(struct hwloc_x86_backend_data_s *data, struct procinfo *infos, unsigned leaf, enum cpuid_type cpuid_type __hwloc_attribute_unused, struct cpuiddump *src_cpuiddump)
|
||||
{
|
||||
unsigned level, apic_nextshift, apic_number, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned level, apic_nextshift, apic_type, apic_id = 0, apic_shift = 0, id;
|
||||
unsigned threadid __hwloc_attribute_unused = 0; /* shut-up compiler */
|
||||
unsigned eax, ebx, ecx = 0, edx;
|
||||
int apic_packageshift = 0;
|
||||
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
/* Intel specifies that the 0x0b/0x1f loop should stop when we get "invalid domain" (0 in ecx[8:15])
|
||||
* (if so, we also get 0 in eax/ebx for invalid subleaves). Zhaoxin implements this too.
|
||||
* However AMD rather says that the 0x80000026/0x0b loop should stop when we get "no thread at this level" (0 in ebx[0:15]).
|
||||
*
|
||||
* Linux kernel <= 6.8 used "invalid domain" for both Intel and AMD (in detect_extended_topology())
|
||||
* but x86 discovery revamp in 6.9 now properly checks both Intel and AMD conditions (in topo_subleaf()).
|
||||
* So let's assume we are allowed to break-out once one of the Intel+AMD conditions is met.
|
||||
*/
|
||||
if (!(ebx & 0xffff) || !(ecx & 0xff00))
|
||||
break;
|
||||
apic_packageshift = eax & 0x1f;
|
||||
}
|
||||
@@ -541,47 +562,68 @@ static void read_intel_cores_exttopoenum(struct procinfo *infos, unsigned leaf,
|
||||
infos->otherids = malloc(level * sizeof(*infos->otherids));
|
||||
if (infos->otherids) {
|
||||
infos->levels = level;
|
||||
for (level = 0; ; level++) {
|
||||
for (level = 0; level<32 /* guard */; level++) {
|
||||
ecx = level;
|
||||
eax = leaf;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
if (!eax && !ebx)
|
||||
break;
|
||||
if (!(ebx & 0xffff) || !(ecx & 0xff00))
|
||||
break;
|
||||
apic_nextshift = eax & 0x1f;
|
||||
apic_number = ebx & 0xffff;
|
||||
apic_type = (ecx & 0xff00) >> 8;
|
||||
apic_id = edx;
|
||||
id = (apic_id >> apic_shift) & ((1 << (apic_packageshift - apic_shift)) - 1);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u num %2u type %u id %2u\n", apic_id, level, apic_nextshift, apic_number, apic_type, id);
|
||||
hwloc_debug("x2APIC %08x %u: nextshift %u nextnumber %2u type %u id %2u\n",
|
||||
apic_id,
|
||||
level,
|
||||
apic_nextshift,
|
||||
ebx & 0xffff /* number of threads in next level */,
|
||||
apic_type,
|
||||
id);
|
||||
infos->apicid = apic_id;
|
||||
infos->otherids[level] = UINT_MAX;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
/* apic_number is the actual number of threads per core */
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 3:
|
||||
infos->ids[MODULE] = id;
|
||||
/* apic_number is the actual number of threads per tile */
|
||||
break;
|
||||
case 4:
|
||||
infos->ids[TILE] = id;
|
||||
/* apic_number is the actual number of threads per die */
|
||||
break;
|
||||
case 5:
|
||||
infos->ids[DIE] = id;
|
||||
/* apic_number is the actual number of threads per package */
|
||||
break;
|
||||
default:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
switch (apic_type) {
|
||||
case 1:
|
||||
threadid = id;
|
||||
break;
|
||||
case 2:
|
||||
infos->ids[CORE] = id;
|
||||
break;
|
||||
case 3:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_complex_ids = 1;
|
||||
infos->ids[COMPLEX] = id;
|
||||
} else {
|
||||
data->found_module_ids = 1;
|
||||
infos->ids[MODULE] = id;
|
||||
}
|
||||
break;
|
||||
case 4:
|
||||
if (leaf == 0x80000026) {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
} else {
|
||||
data->found_tile_ids = 1;
|
||||
infos->ids[TILE] = id;
|
||||
}
|
||||
break;
|
||||
case 5:
|
||||
if (leaf == 0x80000026) {
|
||||
goto unknown_type;
|
||||
} else {
|
||||
data->found_die_ids = 1;
|
||||
infos->ids[DIE] = id;
|
||||
}
|
||||
break;
|
||||
case 6:
|
||||
/* TODO: "DieGrp" on Intel */
|
||||
/* fallthrough */
|
||||
default:
|
||||
unknown_type:
|
||||
hwloc_debug("x2APIC %u: unknown type %u\n", level, apic_type);
|
||||
infos->otherids[level] = apic_id >> apic_shift;
|
||||
break;
|
||||
}
|
||||
apic_shift = apic_nextshift;
|
||||
}
|
||||
infos->apicid = apic_id;
|
||||
infos->ids[PKG] = apic_id >> apic_shift;
|
||||
@@ -610,10 +652,13 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
eax = 0x01;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->apicid = ebx >> 24;
|
||||
if (edx & (1 << 28))
|
||||
if (edx & (1 << 28)) {
|
||||
legacy_max_log_proc = 1 << hwloc_flsl(((ebx >> 16) & 0xff) - 1);
|
||||
else
|
||||
} else {
|
||||
hwloc_debug("HTT bit not set in CPUID 0x01.edx, assuming legacy_max_log_proc = 1\n");
|
||||
legacy_max_log_proc = 1;
|
||||
}
|
||||
|
||||
hwloc_debug("APIC ID 0x%02x legacy_max_log_proc %u\n", infos->apicid, legacy_max_log_proc);
|
||||
infos->ids[PKG] = infos->apicid / legacy_max_log_proc;
|
||||
legacy_log_proc_id = infos->apicid % legacy_max_log_proc;
|
||||
@@ -676,22 +721,34 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
unsigned max_nbcores;
|
||||
unsigned max_nbthreads;
|
||||
unsigned threadid __hwloc_attribute_unused;
|
||||
hwloc_debug("Trying to get core/thread IDs from 0x04...\n");
|
||||
max_nbcores = ((eax >> 26) & 0x3f) + 1;
|
||||
max_nbthreads = legacy_max_log_proc / max_nbcores;
|
||||
hwloc_debug("thus %u threads\n", max_nbthreads);
|
||||
threadid = legacy_log_proc_id % max_nbthreads;
|
||||
infos->ids[CORE] = legacy_log_proc_id / max_nbthreads;
|
||||
hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
|
||||
hwloc_debug("found %u cores max\n", max_nbcores);
|
||||
/* some VMs (e.g. issue#525) don't report valid information, check things before dividing by 0. */
|
||||
if (!max_nbcores) {
|
||||
hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of cores\n");
|
||||
} else {
|
||||
max_nbthreads = legacy_max_log_proc / max_nbcores;
|
||||
hwloc_debug("found %u threads max\n", max_nbthreads);
|
||||
if (!max_nbthreads) {
|
||||
hwloc_debug("cannot detect core/thread IDs from 0x04 without a valid max of threads\n");
|
||||
} else {
|
||||
threadid = legacy_log_proc_id % max_nbthreads;
|
||||
infos->ids[CORE] = legacy_log_proc_id / max_nbthreads;
|
||||
hwloc_debug("this is thread %u of core %u\n", threadid, infos->ids[CORE]);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (highest_cpuid >= 0x1a && has_hybrid(features)) {
|
||||
/* Get hybrid cpu information from cpuid 0x1a */
|
||||
/* Get hybrid cpu information from cpuid 0x1a on Intel */
|
||||
eax = 0x1a;
|
||||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
infos->hybridcoretype = eax >> 24;
|
||||
infos->hybridnativemodel = eax & 0xffffff;
|
||||
data->is_hybrid = 1;
|
||||
}
|
||||
|
||||
/*********************************************************************************
|
||||
@@ -713,23 +770,30 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
*
|
||||
* Only needed when x2apic supported if NUMA nodes are needed.
|
||||
*/
|
||||
read_amd_cores_topoext(infos, flags, src_cpuiddump);
|
||||
read_amd_cores_topoext(data, infos, flags, src_cpuiddump);
|
||||
}
|
||||
|
||||
if ((cpuid_type == intel) && highest_cpuid >= 0x1f) {
|
||||
if ((cpuid_type == amd) && highest_ext_cpuid >= 0x80000026) {
|
||||
/* Get socket/die/complex/core/thread information from cpuid 0x80000026
|
||||
* (AMD Extended CPU Topology)
|
||||
*/
|
||||
read_extended_topo(data, infos, 0x80000026, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == zhaoxin) && highest_cpuid >= 0x1f) {
|
||||
/* Get package/die/module/tile/core/thread information from cpuid 0x1f
|
||||
* (Intel v2 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x1f, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x1f, cpuid_type, src_cpuiddump);
|
||||
|
||||
} else if ((cpuid_type == intel || cpuid_type == amd || cpuid_type == zhaoxin)
|
||||
&& highest_cpuid >= 0x0b && has_x2apic(features)) {
|
||||
/* Get package/core/thread information from cpuid 0x0b
|
||||
* (Intel v1 Extended Topology Enumeration)
|
||||
*/
|
||||
read_intel_cores_exttopoenum(infos, 0x0b, src_cpuiddump);
|
||||
read_extended_topo(data, infos, 0x0b, cpuid_type, src_cpuiddump);
|
||||
}
|
||||
|
||||
if (backend->topology->want_some_cpu_caches) {
|
||||
/**************************************
|
||||
* Get caches from CPU-specific leaves
|
||||
*/
|
||||
@@ -772,13 +836,19 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
|
||||
} else if (cpuid_type == amd) {
|
||||
/* AMD quirks */
|
||||
if (infos->cpufamilynumber == 0x17
|
||||
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
||||
/* AMD family 0x17 always shares L3 between 8 APIC ids,
|
||||
* even when only 6 APIC ids are enabled and reported in nbthreads_sharing
|
||||
* (on 24-core CPUs).
|
||||
if (infos->cpufamilynumber >= 0x17 && cache->level == 3) {
|
||||
/* AMD family 0x19 always shares L3 between 16 APIC ids (8 HT cores).
|
||||
* while Family 0x17 shares between 8 APIC ids (4 HT cores).
|
||||
* But many models have less APIC ids enabled and reported in nbthreads_sharing.
|
||||
* It means we must round-up nbthreads_sharing to the nearest power of 2
|
||||
* before computing cacheid.
|
||||
*/
|
||||
cache->cacheid = infos->apicid / 8;
|
||||
unsigned nbapics_sharing = cache->nbthreads_sharing;
|
||||
if (nbapics_sharing & (nbapics_sharing-1))
|
||||
/* not a power of two, round-up */
|
||||
nbapics_sharing = 1U<<(1+hwloc_ffsl(nbapics_sharing));
|
||||
|
||||
cache->cacheid = infos->apicid / nbapics_sharing;
|
||||
|
||||
} else if (infos->cpufamilynumber== 0x10 && infos->cpumodelnumber == 0x9
|
||||
&& cache->level == 3
|
||||
@@ -804,7 +874,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
} else if (infos->cpufamilynumber == 0x15
|
||||
&& (infos->cpumodelnumber == 0x1 /* Bulldozer */ || infos->cpumodelnumber == 0x2 /* Piledriver */)
|
||||
&& cache->level == 3 && cache->nbthreads_sharing == 6) {
|
||||
/* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours below,
|
||||
/* AMD Bulldozer and Piledriver 12-core processors have same APIC ids as Magny-Cours above,
|
||||
* but we can't merge the checks because the original nbthreads_sharing must be exactly 6 here.
|
||||
*/
|
||||
cache->cacheid = (infos->apicid % legacy_max_log_proc) / cache->nbthreads_sharing /* cacheid within the package */
|
||||
@@ -821,6 +891,7 @@ static void look_proc(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (hwloc_bitmap_isset(data->apicid_set, infos->apicid))
|
||||
data->apicid_unique = 0;
|
||||
@@ -1022,21 +1093,34 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_GROUP)) {
|
||||
if (fulldiscovery) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Complex inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
COMPLEX, "Complex",
|
||||
HWLOC_GROUP_KIND_AMD_COMPLEX, 0);
|
||||
}
|
||||
if (data->found_unit_ids) {
|
||||
/* Look for AMD Compute units inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
UNIT, "Compute Unit",
|
||||
HWLOC_GROUP_KIND_AMD_COMPUTE_UNIT, 0);
|
||||
}
|
||||
if (data->found_module_ids) {
|
||||
/* Look for Intel Modules inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
MODULE, "Module",
|
||||
HWLOC_GROUP_KIND_INTEL_MODULE, 0);
|
||||
}
|
||||
if (data->found_tile_ids) {
|
||||
/* Look for Intel Tiles inside packages */
|
||||
hwloc_bitmap_copy(remaining_cpuset, complete_cpuset);
|
||||
hwloc_x86_add_groups(topology, infos, nbprocs, remaining_cpuset,
|
||||
TILE, "Tile",
|
||||
HWLOC_GROUP_KIND_INTEL_TILE, 0);
|
||||
}
|
||||
|
||||
/* Look for unknown objects */
|
||||
if (infos[one].otherids) {
|
||||
@@ -1070,7 +1154,8 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
}
|
||||
}
|
||||
|
||||
if (hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
if (data->found_die_ids
|
||||
&& hwloc_filter_check_keep_object_type(topology, HWLOC_OBJ_DIE)) {
|
||||
/* Look for Intel Dies inside packages */
|
||||
if (fulldiscovery) {
|
||||
hwloc_bitmap_t die_cpuset;
|
||||
@@ -1228,6 +1313,18 @@ static void summarize(struct hwloc_backend *backend, struct procinfo *infos, uns
|
||||
}
|
||||
}
|
||||
cache = hwloc_alloc_setup_object(topology, otype, HWLOC_UNKNOWN_INDEX);
|
||||
/* We don't specify the os_index of caches because we want to be
|
||||
* 100% sure they are identical to what the Linux kernel reports
|
||||
* (so that things like resctrl work).
|
||||
* However, vendor/model-specific quirks in the x86 code above
|
||||
* make this difficult.
|
||||
*
|
||||
* Caveat: if the x86 backend is used on Linux to avoid kernel bugs,
|
||||
* IDs won't be available to resctrl users. But resctrl heavily
|
||||
* relies on the kernel x86 discovery being non-buggy anyway.
|
||||
*
|
||||
* TODO: make this optional? or only disable it on Linux?
|
||||
*/
|
||||
cache->attr->cache.depth = level;
|
||||
cache->attr->cache.size = infos[i].cache[l].size;
|
||||
cache->attr->cache.linesize = infos[i].cache[l].linesize;
|
||||
@@ -1257,7 +1354,8 @@ static int
|
||||
look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long flags,
|
||||
unsigned highest_cpuid, unsigned highest_ext_cpuid, unsigned *features, enum cpuid_type cpuid_type,
|
||||
int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags),
|
||||
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags))
|
||||
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags),
|
||||
hwloc_bitmap_t restrict_set)
|
||||
{
|
||||
struct hwloc_x86_backend_data_s *data = backend->private_data;
|
||||
struct hwloc_topology *topology = backend->topology;
|
||||
@@ -1277,6 +1375,12 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
|
||||
|
||||
for (i = 0; i < nbprocs; i++) {
|
||||
struct cpuiddump *src_cpuiddump = NULL;
|
||||
|
||||
if (restrict_set && !hwloc_bitmap_isset(restrict_set, i)) {
|
||||
/* skip this CPU outside of the binding mask */
|
||||
continue;
|
||||
}
|
||||
|
||||
if (data->src_cpuiddump_path) {
|
||||
src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, i);
|
||||
if (!src_cpuiddump)
|
||||
@@ -1306,40 +1410,45 @@ look_procs(struct hwloc_backend *backend, struct procinfo *infos, unsigned long
|
||||
if (data->apicid_unique) {
|
||||
summarize(backend, infos, flags);
|
||||
|
||||
if (has_hybrid(features)) {
|
||||
if (data->is_hybrid
|
||||
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* use hybrid info for cpukinds */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
if (cpuid_type == intel) {
|
||||
/* Hybrid Intel */
|
||||
hwloc_bitmap_t atomset = hwloc_bitmap_alloc();
|
||||
hwloc_bitmap_t coreset = hwloc_bitmap_alloc();
|
||||
for(i=0; i<nbprocs; i++) {
|
||||
if (infos[i].hybridcoretype == 0x20)
|
||||
hwloc_bitmap_set(atomset, i);
|
||||
else if (infos[i].hybridcoretype == 0x40)
|
||||
hwloc_bitmap_set(coreset, i);
|
||||
}
|
||||
/* register IntelAtom set if any */
|
||||
if (!hwloc_bitmap_iszero(atomset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelAtom";
|
||||
hwloc_internal_cpukinds_register(topology, atomset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(atomset);
|
||||
}
|
||||
/* register IntelCore set if any */
|
||||
if (!hwloc_bitmap_iszero(coreset)) {
|
||||
struct hwloc_info_s infoattr;
|
||||
infoattr.name = (char *) "CoreType";
|
||||
infoattr.value = (char *) "IntelCore";
|
||||
hwloc_internal_cpukinds_register(topology, coreset, HWLOC_CPUKIND_EFFICIENCY_UNKNOWN, &infoattr, 1, 0);
|
||||
/* the cpuset is given to the callee */
|
||||
} else {
|
||||
hwloc_bitmap_free(coreset);
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
hwloc_debug("x86 APIC IDs aren't unique, x86 discovery ignored.\n");
|
||||
/* do nothing and return success, so that the caller does nothing either */
|
||||
}
|
||||
/* if !data->apicid_unique, do nothing and return success, so that the caller does nothing either */
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -1410,12 +1519,21 @@ static
|
||||
int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
{
|
||||
struct hwloc_x86_backend_data_s *data = backend->private_data;
|
||||
struct hwloc_topology *topology = backend->topology;
|
||||
unsigned nbprocs = data->nbprocs;
|
||||
unsigned eax, ebx, ecx = 0, edx;
|
||||
unsigned i;
|
||||
unsigned highest_cpuid;
|
||||
unsigned highest_ext_cpuid;
|
||||
/* This stores cpuid features with the same indexing as Linux */
|
||||
/* This stores cpuid features with the same indexing as Linux:
|
||||
* [0] = 0x1 edx
|
||||
* [1] = 0x80000001 edx
|
||||
* [4] = 0x1 ecx
|
||||
* [6] = 0x80000001 ecx
|
||||
* [9] = 0x7/0 ebx
|
||||
* [16] = 0x7/0 ecx
|
||||
* [18] = 0x7/0 edx
|
||||
*/
|
||||
unsigned features[19] = { 0 };
|
||||
struct procinfo *infos = NULL;
|
||||
enum cpuid_type cpuid_type = unknown;
|
||||
@@ -1425,9 +1543,21 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
struct hwloc_topology_membind_support memsupport __hwloc_attribute_unused;
|
||||
int (*get_cpubind)(hwloc_topology_t topology, hwloc_cpuset_t set, int flags) = NULL;
|
||||
int (*set_cpubind)(hwloc_topology_t topology, hwloc_const_cpuset_t set, int flags) = NULL;
|
||||
hwloc_bitmap_t restrict_set = NULL;
|
||||
struct cpuiddump *src_cpuiddump = NULL;
|
||||
int ret = -1;
|
||||
|
||||
/* check if binding works */
|
||||
memset(&hooks, 0, sizeof(hooks));
|
||||
support.membind = &memsupport;
|
||||
/* We could just copy the main hooks (except in some corner cases),
|
||||
* but the current overhead is negligible, so just always reget them.
|
||||
*/
|
||||
hwloc_set_native_binding_hooks(&hooks, &support);
|
||||
/* in theory, those are only needed if !data->src_cpuiddump_path || HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_BINDING
|
||||
* but that's the vast majority of cases anyway, and the overhead is very small.
|
||||
*/
|
||||
|
||||
if (data->src_cpuiddump_path) {
|
||||
/* Just read cpuid from the dump (implies !topology->is_thissystem by default) */
|
||||
src_cpuiddump = cpuiddump_read(data->src_cpuiddump_path, 0);
|
||||
@@ -1440,13 +1570,6 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
* we may still force use this backend when debugging with !thissystem.
|
||||
*/
|
||||
|
||||
/* check if binding works */
|
||||
memset(&hooks, 0, sizeof(hooks));
|
||||
support.membind = &memsupport;
|
||||
/* We could just copy the main hooks (except in some corner cases),
|
||||
* but the current overhead is negligible, so just always reget them.
|
||||
*/
|
||||
hwloc_set_native_binding_hooks(&hooks, &support);
|
||||
if (hooks.get_thisthread_cpubind && hooks.set_thisthread_cpubind) {
|
||||
get_cpubind = hooks.get_thisthread_cpubind;
|
||||
set_cpubind = hooks.set_thisthread_cpubind;
|
||||
@@ -1466,6 +1589,20 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
}
|
||||
}
|
||||
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) {
|
||||
restrict_set = hwloc_bitmap_alloc();
|
||||
if (!restrict_set)
|
||||
goto out;
|
||||
if (hooks.get_thisproc_cpubind)
|
||||
hooks.get_thisproc_cpubind(topology, restrict_set, 0);
|
||||
else if (hooks.get_thisthread_cpubind)
|
||||
hooks.get_thisthread_cpubind(topology, restrict_set, 0);
|
||||
if (hwloc_bitmap_iszero(restrict_set)) {
|
||||
hwloc_bitmap_free(restrict_set);
|
||||
restrict_set = NULL;
|
||||
}
|
||||
}
|
||||
|
||||
if (!src_cpuiddump && !hwloc_have_x86_cpuid())
|
||||
goto out;
|
||||
|
||||
@@ -1516,6 +1653,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
ecx = 0;
|
||||
cpuid_or_from_dump(&eax, &ebx, &ecx, &edx, src_cpuiddump);
|
||||
features[9] = ebx;
|
||||
features[16] = ecx;
|
||||
features[18] = edx;
|
||||
}
|
||||
|
||||
@@ -1530,7 +1668,7 @@ int hwloc_look_x86(struct hwloc_backend *backend, unsigned long flags)
|
||||
|
||||
ret = look_procs(backend, infos, flags,
|
||||
highest_cpuid, highest_ext_cpuid, features, cpuid_type,
|
||||
get_cpubind, set_cpubind);
|
||||
get_cpubind, set_cpubind, restrict_set);
|
||||
if (!ret)
|
||||
/* success, we're done */
|
||||
goto out_with_os_state;
|
||||
@@ -1555,6 +1693,7 @@ out_with_infos:
|
||||
}
|
||||
|
||||
out:
|
||||
hwloc_bitmap_free(restrict_set);
|
||||
if (src_cpuiddump)
|
||||
cpuiddump_free(src_cpuiddump);
|
||||
return ret;
|
||||
@@ -1571,6 +1710,11 @@ hwloc_x86_discover(struct hwloc_backend *backend, struct hwloc_disc_status *dsta
|
||||
|
||||
assert(dstatus->phase == HWLOC_DISC_PHASE_CPU);
|
||||
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING) {
|
||||
/* TODO: Things would work if there's a single PU, no need to rebind */
|
||||
return 0;
|
||||
}
|
||||
|
||||
if (getenv("HWLOC_X86_TOPOEXT_NUMANODES")) {
|
||||
flags |= HWLOC_X86_DISC_FLAG_TOPOEXT_NUMANODES;
|
||||
}
|
||||
@@ -1661,17 +1805,17 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
||||
sprintf(path, "%s/hwloc-cpuid-info", src_cpuiddump_path);
|
||||
file = fopen(path, "r");
|
||||
if (!file) {
|
||||
fprintf(stderr, "Couldn't open dumped cpuid summary %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Couldn't open dumped cpuid summary %s\n", path);
|
||||
goto out_with_path;
|
||||
}
|
||||
if (!fgets(line, sizeof(line), file)) {
|
||||
fprintf(stderr, "Found read dumped cpuid summary in %s\n", path);
|
||||
fprintf(stderr, "hwloc/x86: Found read dumped cpuid summary in %s\n", path);
|
||||
fclose(file);
|
||||
goto out_with_path;
|
||||
}
|
||||
fclose(file);
|
||||
if (strcmp(line, "Architecture: x86\n")) {
|
||||
fprintf(stderr, "Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
if (strncmp(line, "Architecture: x86", 17)) {
|
||||
fprintf(stderr, "hwloc/x86: Found non-x86 dumped cpuid summary in %s: %s\n", path, line);
|
||||
goto out_with_path;
|
||||
}
|
||||
free(path);
|
||||
@@ -1683,19 +1827,19 @@ hwloc_x86_check_cpuiddump_input(const char *src_cpuiddump_path, hwloc_bitmap_t s
|
||||
if (!*end)
|
||||
hwloc_bitmap_set(set, idx);
|
||||
else
|
||||
fprintf(stderr, "Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Ignoring invalid dirent `%s' in dumped cpuid directory `%s'\n",
|
||||
dirent->d_name, src_cpuiddump_path);
|
||||
}
|
||||
}
|
||||
closedir(dir);
|
||||
|
||||
if (hwloc_bitmap_iszero(set)) {
|
||||
fprintf(stderr, "Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Did not find any valid pu%%u entry in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
} else if (hwloc_bitmap_last(set) != hwloc_bitmap_weight(set) - 1) {
|
||||
/* The x86 backends enforces contigous set of PUs starting at 0 so far */
|
||||
fprintf(stderr, "Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
fprintf(stderr, "hwloc/x86: Found non-contigous pu%%u range in dumped cpuid directory `%s'\n",
|
||||
src_cpuiddump_path);
|
||||
return -1;
|
||||
}
|
||||
@@ -1747,9 +1891,15 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
||||
|
||||
/* default values */
|
||||
data->is_knl = 0;
|
||||
data->is_hybrid = 0;
|
||||
data->apicid_set = hwloc_bitmap_alloc();
|
||||
data->apicid_unique = 1;
|
||||
data->src_cpuiddump_path = NULL;
|
||||
data->found_die_ids = 0;
|
||||
data->found_complex_ids = 0;
|
||||
data->found_unit_ids = 0;
|
||||
data->found_module_ids = 0;
|
||||
data->found_tile_ids = 0;
|
||||
|
||||
src_cpuiddump_path = getenv("HWLOC_CPUID_PATH");
|
||||
if (src_cpuiddump_path) {
|
||||
@@ -1760,7 +1910,7 @@ hwloc_x86_component_instantiate(struct hwloc_topology *topology,
|
||||
assert(!hwloc_bitmap_iszero(set)); /* enforced by hwloc_x86_check_cpuiddump_input() */
|
||||
data->nbprocs = hwloc_bitmap_weight(set);
|
||||
} else {
|
||||
fprintf(stderr, "Ignoring dumped cpuid directory.\n");
|
||||
fprintf(stderr, "hwloc/x86: Ignoring dumped cpuid directory.\n");
|
||||
}
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
|
||||
13
src/3rdparty/hwloc/src/topology-xml-nolibxml.c
vendored
13
src/3rdparty/hwloc/src/topology-xml-nolibxml.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -41,7 +41,7 @@ typedef struct hwloc__nolibxml_import_state_data_s {
|
||||
static char *
|
||||
hwloc__nolibxml_import_ignore_spaces(char *buffer)
|
||||
{
|
||||
return buffer + strspn(buffer, " \t\n");
|
||||
return buffer + strspn(buffer, " \t\n\r");
|
||||
}
|
||||
|
||||
static int
|
||||
@@ -411,12 +411,12 @@ hwloc_nolibxml_backend_init(struct hwloc_xml_backend_data_s *bdata,
|
||||
bdata->data = nbdata;
|
||||
|
||||
if (xmlbuffer) {
|
||||
nbdata->buffer = malloc(xmlbuflen+1);
|
||||
nbdata->buffer = malloc(xmlbuflen);
|
||||
if (!nbdata->buffer)
|
||||
goto out_with_nbdata;
|
||||
nbdata->buflen = xmlbuflen+1;
|
||||
nbdata->buflen = xmlbuflen;
|
||||
memcpy(nbdata->buffer, xmlbuffer, xmlbuflen);
|
||||
nbdata->buffer[xmlbuflen] = '\0';
|
||||
nbdata->buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
int err = hwloc_nolibxml_read_file(xmlpath, &nbdata->buffer, &nbdata->buflen);
|
||||
@@ -453,8 +453,9 @@ hwloc_nolibxml_import_diff(struct hwloc__xml_import_state_s *state,
|
||||
buffer = malloc(xmlbuflen);
|
||||
if (!buffer)
|
||||
goto out;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buflen = xmlbuflen;
|
||||
memcpy(buffer, xmlbuffer, xmlbuflen);
|
||||
buffer[xmlbuflen-1] = '\0'; /* make sure it's there as requested in the API */
|
||||
|
||||
} else {
|
||||
ret = hwloc_nolibxml_read_file(xmlpath, &buffer, &buflen);
|
||||
|
||||
152
src/3rdparty/hwloc/src/topology-xml.c
vendored
152
src/3rdparty/hwloc/src/topology-xml.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2024 Inria. All rights reserved.
|
||||
* Copyright © 2009-2011, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2018 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -123,6 +123,17 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
fprintf(stderr, "%s: unexpected zero gp_index, topology may be invalid\n", state->global->msgprefix);
|
||||
if (obj->gp_index >= topology->next_gp_index)
|
||||
topology->next_gp_index = obj->gp_index + 1;
|
||||
} else if (!strcmp(name, "id")) { /* forward compat */
|
||||
if (!strncmp(value, "obj", 3)) {
|
||||
obj->gp_index = strtoull(value+3, NULL, 10);
|
||||
if (!obj->gp_index && hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: unexpected zero id, topology may be invalid\n", state->global->msgprefix);
|
||||
if (obj->gp_index >= topology->next_gp_index)
|
||||
topology->next_gp_index = obj->gp_index + 1;
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: unexpected id `%s' not-starting with `obj', ignoring\n", state->global->msgprefix, value);
|
||||
}
|
||||
} else if (!strcmp(name, "cpuset")) {
|
||||
if (!obj->cpuset)
|
||||
obj->cpuset = hwloc_bitmap_alloc();
|
||||
@@ -192,8 +203,9 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
|| lvalue == HWLOC_OBJ_CACHE_INSTRUCTION)
|
||||
obj->attr->cache.type = (hwloc_obj_cache_type_t) lvalue;
|
||||
else
|
||||
fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n",
|
||||
state->global->msgprefix, lvalue);
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring invalid cache_type attribute %lu\n",
|
||||
state->global->msgprefix, lvalue);
|
||||
} else if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring cache_type attribute for non-cache object type\n",
|
||||
state->global->msgprefix);
|
||||
@@ -242,7 +254,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
else if (!strcmp(name, "dont_merge")) {
|
||||
unsigned long lvalue = strtoul(value, NULL, 10);
|
||||
if (obj->type == HWLOC_OBJ_GROUP)
|
||||
obj->attr->group.dont_merge = lvalue;
|
||||
obj->attr->group.dont_merge = (unsigned char) lvalue;
|
||||
else if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring dont_merge attribute for non-group object type\n",
|
||||
state->global->msgprefix);
|
||||
@@ -262,8 +274,8 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
#ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
|
||||
} else if (domain > 0xffff) {
|
||||
static int warned = 0;
|
||||
if (!warned && !hwloc_hide_errors())
|
||||
fprintf(stderr, "Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
|
||||
if (!warned && HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc/xml: Ignoring PCI device with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
|
||||
warned = 1;
|
||||
*ignore = 1;
|
||||
#endif
|
||||
@@ -337,6 +349,7 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
} else {
|
||||
obj->attr->bridge.upstream_type = (hwloc_obj_bridge_type_t) upstream_type;
|
||||
obj->attr->bridge.downstream_type = (hwloc_obj_bridge_type_t) downstream_type;
|
||||
/* FIXME verify that upstream/downstream type is valid */
|
||||
};
|
||||
break;
|
||||
}
|
||||
@@ -361,12 +374,13 @@ hwloc__xml_import_object_attr(struct hwloc_topology *topology,
|
||||
#ifndef HWLOC_HAVE_32BITS_PCI_DOMAIN
|
||||
} else if (domain > 0xffff) {
|
||||
static int warned = 0;
|
||||
if (!warned && !hwloc_hide_errors())
|
||||
fprintf(stderr, "Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
|
||||
if (!warned && HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc/xml: Ignoring bridge to PCI with non-16bit domain.\nPass --enable-32bits-pci-domain to configure to support such devices\n(warning: it would break the library ABI, don't enable unless really needed).\n");
|
||||
warned = 1;
|
||||
*ignore = 1;
|
||||
#endif
|
||||
} else {
|
||||
/* FIXME verify that downstream type vs pci info are valid */
|
||||
obj->attr->bridge.downstream.pci.domain = domain;
|
||||
obj->attr->bridge.downstream.pci.secondary_bus = secbus;
|
||||
obj->attr->bridge.downstream.pci.subordinate_bus = subbus;
|
||||
@@ -548,7 +562,13 @@ hwloc__xml_import_pagetype(hwloc_topology_t topology __hwloc_attribute_unused, s
|
||||
char *attrname, *attrvalue;
|
||||
if (state->global->next_attr(state, &attrname, &attrvalue) < 0)
|
||||
break;
|
||||
if (!strcmp(attrname, "size"))
|
||||
if (!strcmp(attrname, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
int ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
return -1;
|
||||
/* ignored */
|
||||
} else if (!strcmp(attrname, "size"))
|
||||
size = strtoull(attrvalue, NULL, 10);
|
||||
else if (!strcmp(attrname, "count"))
|
||||
count = strtoull(attrvalue, NULL, 10);
|
||||
@@ -852,6 +872,10 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
||||
/* deal with possible future type */
|
||||
obj->type = HWLOC_OBJ_GROUP;
|
||||
obj->attr->group.kind = HWLOC_GROUP_KIND_INTEL_MODULE;
|
||||
} else if (!strcasecmp(attrvalue, "Cluster")) {
|
||||
/* deal with possible future type */
|
||||
obj->type = HWLOC_OBJ_GROUP;
|
||||
obj->attr->group.kind = HWLOC_GROUP_KIND_LINUX_CLUSTER;
|
||||
} else if (!strcasecmp(attrvalue, "MemCache")) {
|
||||
/* ignore possible future type */
|
||||
obj->type = _HWLOC_OBJ_FUTURE;
|
||||
@@ -1146,6 +1170,48 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
||||
data->last_numanode = obj;
|
||||
}
|
||||
|
||||
/* 3.0 forward compatibility */
|
||||
if (data->version_major >= 3 && obj->type == HWLOC_OBJ_OS_DEVICE) {
|
||||
/* osdev.type changed into bitmak in 3.0 */
|
||||
if (obj->attr->osdev.type & 3 /* STORAGE|MEMORY for BLOCK */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_BLOCK;
|
||||
} else if (obj->attr->osdev.type & 8 /* COPROC for COPROC and rsmi/nvml GPUs */) {
|
||||
if (obj->subtype && (!strcmp(obj->subtype, "RSMI") || !strcmp(obj->subtype, "NVML")))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_COPROC;
|
||||
} else if (obj->attr->osdev.type & 4 /* GPU for non-COPROC GPUs */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_GPU;
|
||||
} else if (obj->attr->osdev.type & 32 /* OFED */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
} else if (obj->attr->osdev.type & 16 /* NET for NET and BXI v2-fake-OFED */) {
|
||||
if (obj->subtype && !strcmp(obj->subtype, "BXI"))
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_OPENFABRICS;
|
||||
else
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_NETWORK;
|
||||
} else if (obj->attr->osdev.type & 64 /* DMA */) {
|
||||
obj->attr->osdev.type = HWLOC_OBJ_OSDEV_DMA;
|
||||
} else { /* none or unknown */
|
||||
obj->attr->osdev.type = (hwloc_obj_osdev_type_t) -1;
|
||||
}
|
||||
/* Backend info only in root */
|
||||
if (obj->subtype && !hwloc_obj_get_info_by_name(obj, "Backend")) {
|
||||
if (!strcmp(obj->subtype, "CUDA")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "CUDA");
|
||||
} else if (!strcmp(obj->subtype, "NVML")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "NVML");
|
||||
} else if (!strcmp(obj->subtype, "OpenCL")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "OpenCL");
|
||||
} else if (!strcmp(obj->subtype, "RSMI")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "RSMI");
|
||||
} else if (!strcmp(obj->subtype, "LevelZero")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "LevelZero");
|
||||
} else if (!strcmp(obj->subtype, "Display")) {
|
||||
hwloc_obj_add_info(obj, "Backend", "GL");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (!hwloc_filter_check_keep_object(topology, obj)) {
|
||||
/* Ignore this object instead of inserting it.
|
||||
*
|
||||
@@ -1232,7 +1298,7 @@ hwloc__xml_import_object(hwloc_topology_t topology,
|
||||
/* next should be before cur */
|
||||
if (!childrengotignored) {
|
||||
static int reported = 0;
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
if (!reported && HWLOC_SHOW_CRITICAL_ERRORS()) {
|
||||
hwloc__xml_import_report_outoforder(topology, next, cur);
|
||||
reported = 1;
|
||||
}
|
||||
@@ -1282,7 +1348,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology,
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
|
||||
#endif
|
||||
|
||||
@@ -1316,6 +1382,7 @@ hwloc__xml_v2import_support(hwloc_topology_t topology,
|
||||
else DO(membind,firsttouch_membind);
|
||||
else DO(membind,bind_membind);
|
||||
else DO(membind,interleave_membind);
|
||||
else DO(membind,weighted_interleave_membind);
|
||||
else DO(membind,nexttouch_membind);
|
||||
else DO(membind,migrate_membind);
|
||||
else DO(membind,get_area_memlocation);
|
||||
@@ -1374,6 +1441,10 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
||||
}
|
||||
else if (!strcmp(attrname, "kind")) {
|
||||
kind = strtoul(attrvalue, NULL, 10);
|
||||
/* forward compat with "HOPS" kind in v3 */
|
||||
if (kind & (1UL<<5))
|
||||
/* hops becomes latency */
|
||||
kind = (kind & ~(1UL<<5)) | HWLOC_DISTANCES_KIND_MEANS_LATENCY;
|
||||
}
|
||||
else if (!strcmp(attrname, "name")) {
|
||||
name = attrvalue;
|
||||
@@ -1419,7 +1490,14 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
||||
if (ret <= 0)
|
||||
break;
|
||||
|
||||
if (!strcmp(tag, "indexes"))
|
||||
if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, state);
|
||||
if (ret < 0)
|
||||
goto out_with_arrays;
|
||||
/* ignored */
|
||||
continue;
|
||||
} else if (!strcmp(tag, "indexes"))
|
||||
is_index = 1;
|
||||
else if (!strcmp(tag, "u64values"))
|
||||
is_u64values = 1;
|
||||
@@ -1565,7 +1643,10 @@ hwloc__xml_v2import_distances(hwloc_topology_t topology,
|
||||
}
|
||||
}
|
||||
|
||||
hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0);
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)
|
||||
goto out_ignore;
|
||||
|
||||
hwloc_internal_distances_add_by_index(topology, name, unique_type, different_types, nbobjs, indexes, u64values, kind, 0 /* assume grouping was applied when this matrix was discovered before exporting to XML */);
|
||||
|
||||
/* prevent freeing below */
|
||||
indexes = NULL;
|
||||
@@ -1719,7 +1800,8 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
|
||||
}
|
||||
}
|
||||
|
||||
if (name && flags != (unsigned long) -1) {
|
||||
if (name && flags != (unsigned long) -1
|
||||
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) {
|
||||
hwloc_memattr_id_t _id;
|
||||
|
||||
ret = hwloc_memattr_get_by_name(topology, name, &_id);
|
||||
@@ -1748,6 +1830,10 @@ hwloc__xml_import_memattr(hwloc_topology_t topology,
|
||||
|
||||
if (!strcmp(tag, "memattr_value")) {
|
||||
ret = hwloc__xml_import_memattr_value(topology, id, flags, &childstate);
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
/* ignored */
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: memattr with unrecognized child %s\n",
|
||||
@@ -1830,7 +1916,13 @@ hwloc__xml_import_cpukind(hwloc_topology_t topology,
|
||||
goto error;
|
||||
}
|
||||
|
||||
hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS) {
|
||||
hwloc__free_infos(infos, nr_infos);
|
||||
hwloc_bitmap_free(cpuset);
|
||||
} else {
|
||||
hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
|
||||
hwloc__free_infos(infos, nr_infos);
|
||||
}
|
||||
|
||||
return state->global->close_tag(state);
|
||||
|
||||
@@ -2070,9 +2162,10 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
|
||||
if (data->version_major > 2) {
|
||||
if (data->version_major > 3
|
||||
|| (data->version_major == 3 && data->version_minor > 0)) {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 2\n",
|
||||
fprintf(stderr, "%s: cannot import XML version %u.%u > 3.0\n",
|
||||
data->msgprefix, data->version_major, data->version_minor);
|
||||
goto err;
|
||||
}
|
||||
@@ -2120,6 +2213,13 @@ hwloc_look_xml(struct hwloc_backend *backend, struct hwloc_disc_status *dstatus)
|
||||
ret = hwloc__xml_import_cpukind(topology, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
} else if (!strcmp(tag, "info")) {
|
||||
char *infoname, *infovalue;
|
||||
ret = hwloc___xml_import_info(&infoname, &infovalue, &childstate);
|
||||
if (ret < 0)
|
||||
goto failed;
|
||||
/* move 3.x topology info back to the root object */
|
||||
hwloc_obj_add_info(topology->levels[0][0], infoname, infovalue);
|
||||
} else {
|
||||
if (hwloc__xml_verbose())
|
||||
fprintf(stderr, "%s: ignoring unknown tag `%s' after root object.\n",
|
||||
@@ -2165,7 +2265,8 @@ done:
|
||||
* but it would require to have those objects in the original XML order (like the first_numanode cousin-list).
|
||||
* because the topology order can be different if some parents are ignored during load.
|
||||
*/
|
||||
if (nbobjs == data->nbnumanodes) {
|
||||
if (nbobjs == data->nbnumanodes
|
||||
&& !(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) {
|
||||
hwloc_obj_t *objs = malloc(nbobjs*sizeof(hwloc_obj_t));
|
||||
uint64_t *values = malloc(nbobjs*nbobjs*sizeof(*values));
|
||||
assert(data->nbnumanodes > 0); /* v1dist->nbobjs is >0 after import */
|
||||
@@ -2647,7 +2748,8 @@ hwloc__xml_export_object_contents (hwloc__xml_export_state_t state, hwloc_topolo
|
||||
|
||||
logical_to_v2array = malloc(nbobjs * sizeof(*logical_to_v2array));
|
||||
if (!logical_to_v2array) {
|
||||
fprintf(stderr, "xml/export/v1: failed to allocated logical_to_v2array\n");
|
||||
if (HWLOC_SHOW_ALL_ERRORS())
|
||||
fprintf(stderr, "hwloc/xml/export/v1: failed to allocated logical_to_v2array\n");
|
||||
continue;
|
||||
}
|
||||
|
||||
@@ -2821,6 +2923,7 @@ hwloc__xml_v1export_object_with_memory(hwloc__xml_export_state_t parentstate, hw
|
||||
/* child has sibling, we must add a Group around those memory children */
|
||||
hwloc_obj_t group = parentstate->global->v1_memory_group;
|
||||
parentstate->new_child(parentstate, &gstate, "object");
|
||||
group->parent = obj->parent;
|
||||
group->cpuset = obj->cpuset;
|
||||
group->complete_cpuset = obj->complete_cpuset;
|
||||
group->nodeset = obj->nodeset;
|
||||
@@ -2993,7 +3096,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_support) == 4*sizeof(void*));
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_discovery_support) == 6);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_cpubind_support) == 11);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 15);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_membind_support) == 16);
|
||||
HWLOC_BUILD_ASSERT(sizeof(struct hwloc_topology_misc_support) == 1);
|
||||
#endif
|
||||
|
||||
@@ -3038,6 +3141,7 @@ hwloc__xml_v2export_support(hwloc__xml_export_state_t parentstate, hwloc_topolog
|
||||
DO(membind,firsttouch_membind);
|
||||
DO(membind,bind_membind);
|
||||
DO(membind,interleave_membind);
|
||||
DO(membind,weighted_interleave_membind);
|
||||
DO(membind,nexttouch_membind);
|
||||
DO(membind,migrate_membind);
|
||||
DO(membind,get_area_memlocation);
|
||||
@@ -3119,9 +3223,11 @@ hwloc__xml_export_memattrs(hwloc__xml_export_state_t state, hwloc_topology_t top
|
||||
continue;
|
||||
|
||||
imattr = &topology->memattrs[id];
|
||||
if ((id == HWLOC_MEMATTR_ID_LATENCY || id == HWLOC_MEMATTR_ID_BANDWIDTH)
|
||||
&& !imattr->nr_targets)
|
||||
/* no need to export target-less attributes for initial attributes, no release support attributes without those definitions */
|
||||
if (id < HWLOC_MEMATTR_ID_MAX && !imattr->nr_targets)
|
||||
/* no need to export standard attributes without any target,
|
||||
* their definition is now standardized,
|
||||
* the old hwloc importing this XML may recreate these attributes just like it would for a non-imported topology.
|
||||
*/
|
||||
continue;
|
||||
|
||||
state->new_child(state, &mstate, "memattr");
|
||||
|
||||
430
src/3rdparty/hwloc/src/topology.c
vendored
430
src/3rdparty/hwloc/src/topology.c
vendored
@@ -1,8 +1,9 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2023 Inria. All rights reserved.
|
||||
* Copyright © 2009-2012, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* Copyright © 2022 IBM Corporation. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
@@ -52,6 +53,57 @@
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
|
||||
#ifdef HWLOC_HAVE_LEVELZERO
|
||||
/*
|
||||
* Define ZES_ENABLE_SYSMAN=1 early so that the LevelZero backend gets Sysman enabled.
|
||||
*
|
||||
* Only if the levelzero was enabled in this build so that we don't enable sysman
|
||||
* for external levelzero users when hwloc doesn't need it. If somebody ever loads
|
||||
* an external levelzero plugin in a hwloc library built without levelzero (unlikely),
|
||||
* he may have to manually set ZES_ENABLE_SYSMAN=1.
|
||||
*
|
||||
* Use the constructor if supported and/or the Windows DllMain callback.
|
||||
* Do it in the main hwloc library instead of the levelzero component because
|
||||
* the latter could be loaded later as a plugin.
|
||||
*
|
||||
* L0 seems to be using getenv() to check this variable on Windows
|
||||
* (at least in the Intel Compute-Runtime of March 2021),
|
||||
* but setenv() doesn't seem to exist on Windows, hence use putenv() to set the variable.
|
||||
*
|
||||
* For the record, Get/SetEnvironmentVariable() is not exactly the same as getenv/putenv():
|
||||
* - getenv() doesn't see what was set with SetEnvironmentVariable()
|
||||
* - GetEnvironmentVariable() doesn't see putenv() in cygwin (while it does in MSVC and MinGW).
|
||||
* Hence, if L0 ever switches from getenv() to GetEnvironmentVariable(),
|
||||
* it will break in cygwin, we'll have to use both putenv() and SetEnvironmentVariable().
|
||||
* Hopefully L0 will provide a way to enable Sysman without env vars before it happens.
|
||||
*/
|
||||
#if HWLOC_HAVE_ATTRIBUTE_CONSTRUCTOR
|
||||
static void hwloc_constructor(void) __attribute__((constructor));
|
||||
static void hwloc_constructor(void)
|
||||
{
|
||||
if (!getenv("ZES_ENABLE_SYSMAN"))
|
||||
#ifdef HWLOC_WIN_SYS
|
||||
putenv("ZES_ENABLE_SYSMAN=1");
|
||||
#else
|
||||
setenv("ZES_ENABLE_SYSMAN", "1", 1);
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
#ifdef HWLOC_WIN_SYS
|
||||
BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved)
|
||||
{
|
||||
if (fdwReason == DLL_PROCESS_ATTACH) {
|
||||
if (!getenv("ZES_ENABLE_SYSMAN"))
|
||||
/* Windows does not have a setenv, so use putenv. */
|
||||
putenv((char *) "ZES_ENABLE_SYSMAN=1");
|
||||
}
|
||||
return TRUE;
|
||||
}
|
||||
#endif
|
||||
#endif /* HWLOC_HAVE_LEVELZERO */
|
||||
|
||||
|
||||
unsigned hwloc_get_api_version(void)
|
||||
{
|
||||
return HWLOC_API_VERSION;
|
||||
@@ -62,14 +114,25 @@ int hwloc_topology_abi_check(hwloc_topology_t topology)
|
||||
return topology->topology_abi != HWLOC_TOPOLOGY_ABI ? -1 : 0;
|
||||
}
|
||||
|
||||
/* callers should rather use wrappers HWLOC_SHOW_ALL_ERRORS() and HWLOC_SHOW_CRITICAL_ERRORS() for clarity */
|
||||
int hwloc_hide_errors(void)
|
||||
{
|
||||
static int hide = 0;
|
||||
static int hide = 1; /* only show critical errors by default. lstopo will show others */
|
||||
static int checked = 0;
|
||||
if (!checked) {
|
||||
const char *envvar = getenv("HWLOC_HIDE_ERRORS");
|
||||
if (envvar)
|
||||
if (envvar) {
|
||||
hide = atoi(envvar);
|
||||
#ifdef HWLOC_DEBUG
|
||||
} else {
|
||||
/* if debug is enabled and HWLOC_DEBUG_VERBOSE isn't forced to 0,
|
||||
* show all errors jus like we show all debug messages.
|
||||
*/
|
||||
envvar = getenv("HWLOC_DEBUG_VERBOSE");
|
||||
if (!envvar || atoi(envvar))
|
||||
hide = 0;
|
||||
#endif
|
||||
}
|
||||
checked = 1;
|
||||
}
|
||||
return hide;
|
||||
@@ -83,21 +146,24 @@ report_insert_error_format_obj(char *buf, size_t buflen, hwloc_obj_t obj)
|
||||
char typestr[64];
|
||||
char *cpusetstr;
|
||||
char *nodesetstr = NULL;
|
||||
char indexstr[64] = "";
|
||||
char groupstr[64] = "";
|
||||
|
||||
hwloc_obj_type_snprintf(typestr, sizeof(typestr), obj, 0);
|
||||
hwloc_bitmap_asprintf(&cpusetstr, obj->cpuset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(indexstr, sizeof(indexstr), "P#%u ", obj->os_index);
|
||||
if (obj->type == HWLOC_OBJ_GROUP)
|
||||
snprintf(groupstr, sizeof(groupstr), "groupkind %u-%u ", obj->attr->group.kind, obj->attr->group.subkind);
|
||||
if (obj->nodeset) /* may be missing during insert */
|
||||
hwloc_bitmap_asprintf(&nodesetstr, obj->nodeset);
|
||||
if (obj->os_index != HWLOC_UNKNOWN_INDEX)
|
||||
snprintf(buf, buflen, "%s (P#%u cpuset %s%s%s)",
|
||||
typestr, obj->os_index, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
else
|
||||
snprintf(buf, buflen, "%s (cpuset %s%s%s)",
|
||||
typestr, cpusetstr,
|
||||
nodesetstr ? " nodeset " : "",
|
||||
nodesetstr ? nodesetstr : "");
|
||||
snprintf(buf, buflen, "%s (%s%s%s%s%scpuset %s%s%s)",
|
||||
typestr,
|
||||
indexstr,
|
||||
obj->subtype ? "subtype " : "", obj->subtype ? obj->subtype : "", obj->subtype ? " " : "",
|
||||
groupstr,
|
||||
cpusetstr,
|
||||
nodesetstr ? " nodeset " : "", nodesetstr ? nodesetstr : "");
|
||||
free(cpusetstr);
|
||||
free(nodesetstr);
|
||||
}
|
||||
@@ -106,7 +172,7 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
|
||||
{
|
||||
static int reported = 0;
|
||||
|
||||
if (reason && !reported && !hwloc_hide_errors()) {
|
||||
if (reason && !reported && HWLOC_SHOW_CRITICAL_ERRORS()) {
|
||||
char newstr[512];
|
||||
char oldstr[512];
|
||||
report_insert_error_format_obj(newstr, sizeof(newstr), new);
|
||||
@@ -115,8 +181,9 @@ static void report_insert_error(hwloc_obj_t new, hwloc_obj_t old, const char *ms
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s received invalid information from the operating system.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Failed with: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s at %s\n", newstr, oldstr);
|
||||
fprintf(stderr, "* Failed with error: %s\n", msg);
|
||||
fprintf(stderr, "* while inserting %s\n", newstr);
|
||||
fprintf(stderr, "* at %s\n", oldstr);
|
||||
fprintf(stderr, "* coming from: %s\n", reason);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* The following FAQ entry in the hwloc documentation may help:\n");
|
||||
@@ -398,6 +465,20 @@ hwloc_debug_print_objects(int indent __hwloc_attribute_unused, hwloc_obj_t obj)
|
||||
#define hwloc_debug_print_objects(indent, obj) do { /* nothing */ } while (0)
|
||||
#endif /* !HWLOC_DEBUG */
|
||||
|
||||
int hwloc_obj_set_subtype(hwloc_topology_t topology __hwloc_attribute_unused, hwloc_obj_t obj, const char *subtype)
|
||||
{
|
||||
char *new = NULL;
|
||||
if (subtype) {
|
||||
new = strdup(subtype);
|
||||
if (!new)
|
||||
return -1;
|
||||
}
|
||||
if (obj->subtype)
|
||||
free(obj->subtype);
|
||||
obj->subtype = new;
|
||||
return 0;
|
||||
}
|
||||
|
||||
void hwloc__free_infos(struct hwloc_info_s *infos, unsigned count)
|
||||
{
|
||||
unsigned i;
|
||||
@@ -616,7 +697,8 @@ unlink_and_free_object_and_children(hwloc_obj_t *pobj)
|
||||
void
|
||||
hwloc_free_object_and_children(hwloc_obj_t obj)
|
||||
{
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
if (obj)
|
||||
unlink_and_free_object_and_children(&obj);
|
||||
}
|
||||
|
||||
/* Free an object, its next siblings and their children without unlinking from parent.
|
||||
@@ -1862,14 +1944,33 @@ hwloc_topology_alloc_group_object(struct hwloc_topology *topology)
|
||||
return hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX);
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_free_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
if (!topology->is_loaded) {
|
||||
/* this could actually work, see insert() below */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
errno = EPERM;
|
||||
return -1;
|
||||
}
|
||||
hwloc_free_unlinked_object(obj);
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void hwloc_propagate_symmetric_subtree(hwloc_topology_t topology, hwloc_obj_t root);
|
||||
static void propagate_total_memory(hwloc_obj_t obj);
|
||||
static void hwloc_set_group_depth(hwloc_topology_t topology);
|
||||
static void hwloc_connect_children(hwloc_obj_t parent);
|
||||
static int hwloc_connect_levels(hwloc_topology_t topology);
|
||||
static int hwloc_connect_special_levels(hwloc_topology_t topology);
|
||||
|
||||
hwloc_obj_t
|
||||
hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t obj)
|
||||
{
|
||||
hwloc_obj_t res, root;
|
||||
hwloc_obj_t res, root, child;
|
||||
int cmp;
|
||||
|
||||
if (!topology->is_loaded) {
|
||||
@@ -1879,6 +1980,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
||||
return NULL;
|
||||
}
|
||||
if (topology->adopted_shmem_addr) {
|
||||
hwloc_free_unlinked_object(obj);
|
||||
errno = EPERM;
|
||||
return NULL;
|
||||
}
|
||||
@@ -1932,6 +2034,7 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
||||
res = hwloc__insert_object_by_cpuset(topology, NULL, obj, NULL /* do not show errors on stdout */);
|
||||
} else {
|
||||
/* just merge root */
|
||||
hwloc_free_unlinked_object(obj);
|
||||
res = root;
|
||||
}
|
||||
|
||||
@@ -1958,6 +2061,13 @@ hwloc_topology_insert_group_object(struct hwloc_topology *topology, hwloc_obj_t
|
||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
||||
return NULL;
|
||||
|
||||
/* Compute group total_memory. */
|
||||
res->total_memory = 0;
|
||||
for_each_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
for_each_memory_child(child, res)
|
||||
res->total_memory += child->total_memory;
|
||||
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
hwloc_set_group_depth(topology);
|
||||
|
||||
@@ -2188,11 +2298,13 @@ fixup_sets(hwloc_obj_t obj)
|
||||
int
|
||||
hwloc_obj_add_other_obj_sets(hwloc_obj_t dst, hwloc_obj_t src)
|
||||
{
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set); \
|
||||
#define ADD_OTHER_OBJ_SET(_dst, _src, _set) \
|
||||
if ((_src)->_set) { \
|
||||
if (!(_dst)->_set) \
|
||||
(_dst)->_set = hwloc_bitmap_alloc(); \
|
||||
if (!(_dst)->_set \
|
||||
|| hwloc_bitmap_or((_dst)->_set, (_dst)->_set, (_src)->_set) < 0) \
|
||||
return -1; \
|
||||
}
|
||||
ADD_OTHER_OBJ_SET(dst, src, cpuset);
|
||||
ADD_OTHER_OBJ_SET(dst, src, complete_cpuset);
|
||||
@@ -2307,9 +2419,15 @@ hwloc__filter_bridges(hwloc_topology_t topology, hwloc_obj_t root, unsigned dept
|
||||
|
||||
child->attr->bridge.depth = depth;
|
||||
|
||||
if (child->type == HWLOC_OBJ_BRIDGE
|
||||
&& filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
|
||||
&& !child->io_first_child) {
|
||||
/* remove bridges that have no child,
|
||||
* and pci-to-non-pci bridges (pcidev) that no child either.
|
||||
* keep NVSwitch since they may be used in NVLink matrices.
|
||||
*/
|
||||
if (filter == HWLOC_TYPE_FILTER_KEEP_IMPORTANT
|
||||
&& !child->io_first_child
|
||||
&& (child->type == HWLOC_OBJ_BRIDGE
|
||||
|| (child->type == HWLOC_OBJ_PCI_DEVICE && (child->attr->pcidev.class_id >> 8) == 0x06
|
||||
&& (!child->subtype || strcmp(child->subtype, "NVSwitch"))))) {
|
||||
unlink_and_free_single_object(pchild);
|
||||
topology->modified = 1;
|
||||
}
|
||||
@@ -2432,13 +2550,26 @@ hwloc_compare_levels_structure(hwloc_topology_t topology, unsigned i)
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* return > 0 if any level was removed, which means reconnect is needed */
|
||||
static void
|
||||
/* return > 0 if any level was removed.
|
||||
* performs its own reconnect internally if needed
|
||||
*/
|
||||
static int
|
||||
hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
||||
{
|
||||
unsigned i, j;
|
||||
int res = 0;
|
||||
|
||||
if (topology->modified) {
|
||||
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
||||
* and at the end of this function:
|
||||
* - we need normal levels before merging.
|
||||
* - and we'll need to update special levels after merging.
|
||||
*/
|
||||
hwloc_connect_children(topology->levels[0][0]);
|
||||
if (hwloc_connect_levels(topology) < 0)
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* start from the bottom since we'll remove intermediate levels */
|
||||
for(i=topology->nb_levels-1; i>0; i--) {
|
||||
int replacechild = 0, replaceparent = 0;
|
||||
@@ -2604,6 +2735,22 @@ hwloc_filter_levels_keep_structure(hwloc_topology_t topology)
|
||||
topology->type_depth[type] = HWLOC_TYPE_DEPTH_MULTIPLE;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if (res > 0 || topology-> modified) {
|
||||
/* WARNING: hwloc_topology_reconnect() is duplicated partially here
|
||||
* and at the beginning of this function.
|
||||
* If we merged some levels, some child+parent special children lisst
|
||||
* may have been merged, hence specials level might need reordering,
|
||||
* So reconnect special levels only here at the end
|
||||
* (it's not needed at the beginning of this function).
|
||||
*/
|
||||
if (hwloc_connect_special_levels(topology) < 0)
|
||||
return -1;
|
||||
topology->modified = 0;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void
|
||||
@@ -2921,9 +3068,9 @@ hwloc_list_special_objects(hwloc_topology_t topology, hwloc_obj_t obj)
|
||||
}
|
||||
}
|
||||
|
||||
/* Build I/O levels */
|
||||
/* Build Memory, I/O and Misc levels */
|
||||
static int
|
||||
hwloc_connect_io_misc_levels(hwloc_topology_t topology)
|
||||
hwloc_connect_special_levels(hwloc_topology_t topology)
|
||||
{
|
||||
unsigned i;
|
||||
|
||||
@@ -3088,7 +3235,8 @@ hwloc_connect_levels(hwloc_topology_t topology)
|
||||
tmpnbobjs = realloc(topology->level_nbobjects,
|
||||
2 * topology->nb_levels_allocated * sizeof(*topology->level_nbobjects));
|
||||
if (!tmplevels || !tmpnbobjs) {
|
||||
fprintf(stderr, "hwloc failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2);
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: failed to realloc level arrays to %u\n", topology->nb_levels_allocated * 2);
|
||||
|
||||
/* if one realloc succeeded, make sure the caller will free the new buffer */
|
||||
if (tmplevels)
|
||||
@@ -3133,6 +3281,10 @@ hwloc_connect_levels(hwloc_topology_t topology)
|
||||
int
|
||||
hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||
{
|
||||
/* WARNING: when updating this function, the replicated code must
|
||||
* also be updated inside hwloc_filter_levels_keep_structure()
|
||||
*/
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
@@ -3145,7 +3297,7 @@ hwloc_topology_reconnect(struct hwloc_topology *topology, unsigned long flags)
|
||||
if (hwloc_connect_levels(topology) < 0)
|
||||
return -1;
|
||||
|
||||
if (hwloc_connect_io_misc_levels(topology) < 0)
|
||||
if (hwloc_connect_special_levels(topology) < 0)
|
||||
return -1;
|
||||
|
||||
topology->modified = 0;
|
||||
@@ -3441,6 +3593,8 @@ hwloc_discover(struct hwloc_topology *topology,
|
||||
/*
|
||||
* Additional discovery
|
||||
*/
|
||||
hwloc_pci_discovery_prepare(topology);
|
||||
|
||||
if (topology->backend_phases & HWLOC_DISC_PHASE_PCI) {
|
||||
dstatus->phase = HWLOC_DISC_PHASE_PCI;
|
||||
hwloc_discover_by_phase(topology, dstatus, "PCI");
|
||||
@@ -3458,6 +3612,8 @@ hwloc_discover(struct hwloc_topology *topology,
|
||||
hwloc_discover_by_phase(topology, dstatus, "ANNOTATE");
|
||||
}
|
||||
|
||||
hwloc_pci_discovery_exit(topology); /* pci needed up to annotate */
|
||||
|
||||
if (getenv("HWLOC_DEBUG_SORT_CHILDREN"))
|
||||
hwloc_debug_sort_children(topology->levels[0][0]);
|
||||
|
||||
@@ -3470,28 +3626,28 @@ hwloc_discover(struct hwloc_topology *topology,
|
||||
hwloc_debug("%s", "\nRemoving empty objects\n");
|
||||
remove_empty(topology, &topology->levels[0][0]);
|
||||
if (!topology->levels[0][0]) {
|
||||
fprintf(stderr, "Topology became empty, aborting!\n");
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Topology became empty, aborting!\n");
|
||||
return -1;
|
||||
}
|
||||
if (hwloc_bitmap_iszero(topology->levels[0][0]->cpuset)) {
|
||||
fprintf(stderr, "Topology does not contain any PU, aborting!\n");
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Topology does not contain any PU, aborting!\n");
|
||||
return -1;
|
||||
}
|
||||
if (hwloc_bitmap_iszero(topology->levels[0][0]->nodeset)) {
|
||||
fprintf(stderr, "Topology does not contain any NUMA node, aborting!\n");
|
||||
if (HWLOC_SHOW_CRITICAL_ERRORS())
|
||||
fprintf(stderr, "hwloc: Topology does not contain any NUMA node, aborting!\n");
|
||||
return -1;
|
||||
}
|
||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||
|
||||
/* Reconnect things after all these changes.
|
||||
* Often needed because of Groups inserted for I/Os.
|
||||
* And required for KEEP_STRUCTURE below.
|
||||
*/
|
||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
||||
return -1;
|
||||
|
||||
hwloc_debug("%s", "\nRemoving levels with HWLOC_TYPE_FILTER_KEEP_STRUCTURE\n");
|
||||
hwloc_filter_levels_keep_structure(topology);
|
||||
if (hwloc_filter_levels_keep_structure(topology) < 0)
|
||||
return -1;
|
||||
/* takes care of reconnecting children/levels internally,
|
||||
* because it needs normal levels.
|
||||
* and it's often needed below because of Groups inserted for I/Os anyway */
|
||||
hwloc_debug_print_objects(0, topology->levels[0][0]);
|
||||
|
||||
/* accumulate children memory in total_memory fields (only once parent is set) */
|
||||
@@ -3620,6 +3776,7 @@ hwloc__topology_init (struct hwloc_topology **topologyp,
|
||||
|
||||
hwloc__topology_filter_init(topology);
|
||||
|
||||
/* always initialize since we don't know flags to disable those yet */
|
||||
hwloc_internal_distances_init(topology);
|
||||
hwloc_internal_memattrs_init(topology);
|
||||
hwloc_internal_cpukinds_init(topology);
|
||||
@@ -3716,7 +3873,27 @@ hwloc_topology_set_flags (struct hwloc_topology *topology, unsigned long flags)
|
||||
return -1;
|
||||
}
|
||||
|
||||
if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT)) {
|
||||
if (flags & ~(HWLOC_TOPOLOGY_FLAG_INCLUDE_DISALLOWED
|
||||
|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM
|
||||
|HWLOC_TOPOLOGY_FLAG_THISSYSTEM_ALLOWED_RESOURCES
|
||||
|HWLOC_TOPOLOGY_FLAG_IMPORT_SUPPORT
|
||||
|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING
|
||||
|HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING
|
||||
|HWLOC_TOPOLOGY_FLAG_DONT_CHANGE_BINDING
|
||||
|HWLOC_TOPOLOGY_FLAG_NO_DISTANCES
|
||||
|HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS
|
||||
|HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
if ((flags & (HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) == HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) {
|
||||
/* RESTRICT_TO_CPUBINDING requires THISSYSTEM for binding */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if ((flags & (HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING|HWLOC_TOPOLOGY_FLAG_IS_THISSYSTEM)) == HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) {
|
||||
/* RESTRICT_TO_MEMBINDING requires THISSYSTEM for binding */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
@@ -3812,8 +3989,12 @@ int
|
||||
hwloc_topology_set_cache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3821,17 +4002,25 @@ int
|
||||
hwloc_topology_set_icache_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
unsigned i;
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc_topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
for(i=HWLOC_OBJ_L1ICACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
hwloc__topology_set_type_filter(topology, (hwloc_obj_type_t) i, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_topology_set_io_types_filter(hwloc_topology_t topology, enum hwloc_type_filter_e filter)
|
||||
{
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc_topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
if (topology->is_loaded) {
|
||||
errno = EBUSY;
|
||||
return -1;
|
||||
}
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_BRIDGE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_PCI_DEVICE, filter);
|
||||
hwloc__topology_set_type_filter(topology, HWLOC_OBJ_OS_DEVICE, filter);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -3852,9 +4041,12 @@ hwloc_topology_clear (struct hwloc_topology *topology)
|
||||
{
|
||||
/* no need to set to NULL after free() since callers will call setup_defaults() or just destroy the rest of the topology */
|
||||
unsigned l;
|
||||
|
||||
/* always destroy cpukinds/distances/memattrs since there are always initialized during init() */
|
||||
hwloc_internal_cpukinds_destroy(topology);
|
||||
hwloc_internal_distances_destroy(topology);
|
||||
hwloc_internal_memattrs_destroy(topology);
|
||||
|
||||
hwloc_free_object_and_children(topology->levels[0][0]);
|
||||
hwloc_bitmap_free(topology->allowed_cpuset);
|
||||
hwloc_bitmap_free(topology->allowed_nodeset);
|
||||
@@ -3894,6 +4086,7 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
||||
{
|
||||
struct hwloc_disc_status dstatus;
|
||||
const char *env;
|
||||
unsigned i;
|
||||
int err;
|
||||
|
||||
if (topology->is_loaded) {
|
||||
@@ -3902,8 +4095,18 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
||||
}
|
||||
|
||||
/* initialize envvar-related things */
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_prepare(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_prepare(topology);
|
||||
|
||||
/* check if any cpu cache filter is not NONE */
|
||||
topology->want_some_cpu_caches = 0;
|
||||
for(i=HWLOC_OBJ_L1CACHE; i<=HWLOC_OBJ_L3ICACHE; i++)
|
||||
if (topology->type_filter[i] != HWLOC_TYPE_FILTER_KEEP_NONE) {
|
||||
topology->want_some_cpu_caches = 1;
|
||||
break;
|
||||
}
|
||||
|
||||
if (getenv("HWLOC_XML_USERDATA_NOT_DECODED"))
|
||||
topology->userdata_not_decoded = 1;
|
||||
@@ -3970,39 +4173,70 @@ hwloc_topology_load (struct hwloc_topology *topology)
|
||||
*/
|
||||
hwloc_set_binding_hooks(topology);
|
||||
|
||||
hwloc_pci_discovery_prepare(topology);
|
||||
|
||||
/* actual topology discovery */
|
||||
err = hwloc_discover(topology, &dstatus);
|
||||
if (err < 0)
|
||||
goto out;
|
||||
|
||||
hwloc_pci_discovery_exit(topology);
|
||||
|
||||
#ifndef HWLOC_DEBUG
|
||||
if (getenv("HWLOC_DEBUG_CHECK"))
|
||||
#endif
|
||||
hwloc_topology_check(topology);
|
||||
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS)) {
|
||||
/* Rank cpukinds */
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
}
|
||||
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES)) {
|
||||
/* Mark distances objs arrays as invalid since we may have removed objects
|
||||
* from the topology after adding the distances (remove_empty, etc).
|
||||
* It would be hard to actually verify whether it's needed.
|
||||
*/
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
/* And refresh distances so that multithreaded concurrent distances_get()
|
||||
* don't refresh() concurrently (disallowed).
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
}
|
||||
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS)) {
|
||||
int force_memtiers = (getenv("HWLOC_MEMTIERS_REFRESH") != NULL);
|
||||
/* Same for memattrs */
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
/* update memtiers unless XML */
|
||||
if (force_memtiers || strcmp(topology->backends->component->name, "xml"))
|
||||
hwloc_internal_memattrs_guess_memory_tiers(topology, force_memtiers);
|
||||
}
|
||||
|
||||
topology->is_loaded = 1;
|
||||
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_CPUBINDING) {
|
||||
/* FIXME: filter directly in backends during the discovery.
|
||||
* Only x86 does it because binding may cause issues on Windows.
|
||||
*/
|
||||
hwloc_bitmap_t set = hwloc_bitmap_alloc();
|
||||
if (set) {
|
||||
err = hwloc_get_cpubind(topology, set, HWLOC_CPUBIND_STRICT);
|
||||
if (!err)
|
||||
hwloc_topology_restrict(topology, set, 0);
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
}
|
||||
if (topology->flags & HWLOC_TOPOLOGY_FLAG_RESTRICT_TO_MEMBINDING) {
|
||||
/* FIXME: filter directly in backends during the discovery.
|
||||
*/
|
||||
hwloc_bitmap_t set = hwloc_bitmap_alloc();
|
||||
hwloc_membind_policy_t policy;
|
||||
if (set) {
|
||||
err = hwloc_get_membind(topology, set, &policy, HWLOC_MEMBIND_STRICT | HWLOC_MEMBIND_BYNODESET);
|
||||
if (!err)
|
||||
hwloc_topology_restrict(topology, set, HWLOC_RESTRICT_FLAG_BYNODESET);
|
||||
hwloc_bitmap_free(set);
|
||||
}
|
||||
}
|
||||
|
||||
if (topology->backend_phases & HWLOC_DISC_PHASE_TWEAK) {
|
||||
dstatus.phase = HWLOC_DISC_PHASE_TWEAK;
|
||||
hwloc_discover_by_phase(topology, &dstatus, "TWEAK");
|
||||
@@ -4033,20 +4267,11 @@ restrict_object_by_cpuset(hwloc_topology_t topology, unsigned long flags, hwloc_
|
||||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_CPULESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_cpuset)) {
|
||||
/* we're empty, there's a NUMAnode below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* nodeset cannot intersect unless cpuset intersects or is empty */
|
||||
if (droppednodeset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_cpuset));
|
||||
}
|
||||
if (droppednodeset) {
|
||||
if (droppednodeset && hwloc_bitmap_intersects(obj->complete_nodeset, droppednodeset)) {
|
||||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
@@ -4099,20 +4324,11 @@ restrict_object_by_nodeset(hwloc_topology_t topology, unsigned long flags, hwloc
|
||||
hwloc_bitmap_andnot(obj->nodeset, obj->nodeset, droppednodeset);
|
||||
hwloc_bitmap_andnot(obj->complete_nodeset, obj->complete_nodeset, droppednodeset);
|
||||
modified = 1;
|
||||
} else {
|
||||
if ((flags & HWLOC_RESTRICT_FLAG_REMOVE_MEMLESS)
|
||||
&& hwloc_bitmap_iszero(obj->complete_nodeset)) {
|
||||
/* we're empty, there's a PU below us, it'll be removed this time */
|
||||
modified = 1;
|
||||
}
|
||||
/* cpuset cannot intersect unless nodeset intersects or is empty */
|
||||
if (droppedcpuset)
|
||||
assert(!hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)
|
||||
|| hwloc_bitmap_iszero(obj->complete_nodeset));
|
||||
}
|
||||
if (droppedcpuset) {
|
||||
if (droppedcpuset && hwloc_bitmap_intersects(obj->complete_cpuset, droppedcpuset)) {
|
||||
hwloc_bitmap_andnot(obj->cpuset, obj->cpuset, droppedcpuset);
|
||||
hwloc_bitmap_andnot(obj->complete_cpuset, obj->complete_cpuset, droppedcpuset);
|
||||
modified = 1;
|
||||
}
|
||||
|
||||
if (modified) {
|
||||
@@ -4278,17 +4494,21 @@ hwloc_topology_restrict(struct hwloc_topology *topology, hwloc_const_bitmap_t se
|
||||
hwloc_bitmap_free(droppedcpuset);
|
||||
hwloc_bitmap_free(droppednodeset);
|
||||
|
||||
if (hwloc_topology_reconnect(topology, 0) < 0)
|
||||
if (hwloc_filter_levels_keep_structure(topology) < 0) /* takes care of reconnecting internally */
|
||||
goto out;
|
||||
|
||||
/* some objects may have disappeared, we need to update distances objs arrays */
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
/* some objects may have disappeared and sets were modified,
|
||||
* we need to update distances, etc */
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_invalidate_cached_objs(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_need_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
|
||||
hwloc_filter_levels_keep_structure(topology);
|
||||
hwloc_propagate_symmetric_subtree(topology, topology->levels[0][0]);
|
||||
propagate_total_memory(topology->levels[0][0]);
|
||||
hwloc_internal_cpukinds_restrict(topology);
|
||||
|
||||
#ifndef HWLOC_DEBUG
|
||||
if (getenv("HWLOC_DEBUG_CHECK"))
|
||||
@@ -4376,9 +4596,12 @@ hwloc_topology_allow(struct hwloc_topology *topology,
|
||||
int
|
||||
hwloc_topology_refresh(struct hwloc_topology *topology)
|
||||
{
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_CPUKINDS))
|
||||
hwloc_internal_cpukinds_rank(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_DISTANCES))
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
if (!(topology->flags & HWLOC_TOPOLOGY_FLAG_NO_MEMATTRS))
|
||||
hwloc_internal_memattrs_refresh(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
@@ -4930,6 +5153,9 @@ hwloc_topology_check(struct hwloc_topology *topology)
|
||||
for(i=HWLOC_OBJ_TYPE_MIN; i<HWLOC_OBJ_TYPE_MAX; i++)
|
||||
assert(obj_type_order[obj_order_type[i]] == i);
|
||||
|
||||
if (!topology->is_loaded)
|
||||
return;
|
||||
|
||||
depth = hwloc_topology_get_depth(topology);
|
||||
|
||||
assert(!topology->modified);
|
||||
|
||||
107
src/3rdparty/hwloc/src/traversal.c
vendored
107
src/3rdparty/hwloc/src/traversal.c
vendored
@@ -1,6 +1,6 @@
|
||||
/*
|
||||
* Copyright © 2009 CNRS
|
||||
* Copyright © 2009-2020 Inria. All rights reserved.
|
||||
* Copyright © 2009-2021 Inria. All rights reserved.
|
||||
* Copyright © 2009-2010, 2020 Université Bordeaux
|
||||
* Copyright © 2009-2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
@@ -395,6 +395,8 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep,
|
||||
} else if (hwloc__type_match(string, "pcibridge", 5)) {
|
||||
type = HWLOC_OBJ_BRIDGE;
|
||||
ubtype = HWLOC_OBJ_BRIDGE_PCI;
|
||||
/* if downstream_type can ever be non-PCI, we'll have to make strings more precise,
|
||||
* or relax the hwloc_type_sscanf test */
|
||||
|
||||
} else if (hwloc__type_match(string, "pcidev", 3)) {
|
||||
type = HWLOC_OBJ_PCI_DEVICE;
|
||||
@@ -448,7 +450,9 @@ hwloc_type_sscanf(const char *string, hwloc_obj_type_t *typep,
|
||||
attrp->group.depth = depthattr;
|
||||
} else if (type == HWLOC_OBJ_BRIDGE && attrsize >= sizeof(attrp->bridge)) {
|
||||
attrp->bridge.upstream_type = ubtype;
|
||||
attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI; /* nothing else so far */
|
||||
attrp->bridge.downstream_type = HWLOC_OBJ_BRIDGE_PCI;
|
||||
/* if downstream_type can ever be non-PCI, we'll have to make strings more precise,
|
||||
* or relax the hwloc_type_sscanf test */
|
||||
} else if (type == HWLOC_OBJ_OS_DEVICE && attrsize >= sizeof(attrp->osdev)) {
|
||||
attrp->osdev.type = ostype;
|
||||
}
|
||||
@@ -531,6 +535,9 @@ hwloc_obj_type_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
|
||||
else
|
||||
return hwloc_snprintf(string, size, "%s", hwloc_obj_type_string(type));
|
||||
case HWLOC_OBJ_BRIDGE:
|
||||
/* if downstream_type can ever be non-PCI, we'll have to make strings more precise,
|
||||
* or relax the hwloc_type_sscanf test */
|
||||
assert(obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI);
|
||||
return hwloc_snprintf(string, size, obj->attr->bridge.upstream_type == HWLOC_OBJ_BRIDGE_PCI ? "PCIBridge" : "HostBridge");
|
||||
case HWLOC_OBJ_PCI_DEVICE:
|
||||
return hwloc_snprintf(string, size, "PCI");
|
||||
@@ -648,8 +655,11 @@ hwloc_obj_attr_snprintf(char * __hwloc_restrict string, size_t size, hwloc_obj_t
|
||||
} else
|
||||
*up = '\0';
|
||||
/* downstream is_PCI */
|
||||
snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]",
|
||||
obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus);
|
||||
if (obj->attr->bridge.downstream_type == HWLOC_OBJ_BRIDGE_PCI) {
|
||||
snprintf(down, sizeof(down), "buses=%04x:[%02x-%02x]",
|
||||
obj->attr->bridge.downstream.pci.domain, obj->attr->bridge.downstream.pci.secondary_bus, obj->attr->bridge.downstream.pci.subordinate_bus);
|
||||
} else
|
||||
assert(0);
|
||||
if (*up)
|
||||
res = hwloc_snprintf(string, size, "%s%s%s", up, separator, down);
|
||||
else
|
||||
@@ -736,3 +746,92 @@ int hwloc_bitmap_singlify_per_core(hwloc_topology_t topology, hwloc_bitmap_t cpu
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
hwloc_obj_t
|
||||
hwloc_get_obj_with_same_locality(hwloc_topology_t topology, hwloc_obj_t src,
|
||||
hwloc_obj_type_t type, const char *subtype, const char *nameprefix,
|
||||
unsigned long flags)
|
||||
{
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (hwloc_obj_type_is_normal(src->type) || hwloc_obj_type_is_memory(src->type)) {
|
||||
/* normal/memory type, look for normal/memory type with same sets */
|
||||
hwloc_obj_t obj;
|
||||
|
||||
if (!hwloc_obj_type_is_normal(type) && !hwloc_obj_type_is_memory(type)) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
obj = NULL;
|
||||
while ((obj = hwloc_get_next_obj_by_type(topology, type, obj)) != NULL) {
|
||||
if (!hwloc_bitmap_isequal(src->cpuset, obj->cpuset)
|
||||
|| !hwloc_bitmap_isequal(src->nodeset, obj->nodeset))
|
||||
continue;
|
||||
if (subtype && (!obj->subtype || strcasecmp(subtype, obj->subtype)))
|
||||
continue;
|
||||
if (nameprefix && (!obj->name || hwloc_strncasecmp(nameprefix, obj->name, strlen(nameprefix))))
|
||||
continue;
|
||||
return obj;
|
||||
}
|
||||
errno = ENOENT;
|
||||
return NULL;
|
||||
|
||||
} else if (hwloc_obj_type_is_io(src->type)) {
|
||||
/* I/O device, look for PCI/OS in same PCI */
|
||||
hwloc_obj_t pci;
|
||||
|
||||
if ((src->type != HWLOC_OBJ_OS_DEVICE && src->type != HWLOC_OBJ_PCI_DEVICE)
|
||||
|| (type != HWLOC_OBJ_OS_DEVICE && type != HWLOC_OBJ_PCI_DEVICE)) {
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
/* walk up to find the container */
|
||||
pci = src;
|
||||
while (pci->type == HWLOC_OBJ_OS_DEVICE)
|
||||
pci = pci->parent;
|
||||
|
||||
if (type == HWLOC_OBJ_PCI_DEVICE) {
|
||||
if (pci->type != HWLOC_OBJ_PCI_DEVICE) {
|
||||
errno = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
if (subtype && (!pci->subtype || strcasecmp(subtype, pci->subtype))) {
|
||||
errno = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
if (nameprefix && (!pci->name || hwloc_strncasecmp(nameprefix, pci->name, strlen(nameprefix)))) {
|
||||
errno = ENOENT;
|
||||
return NULL;
|
||||
}
|
||||
return pci;
|
||||
|
||||
} else {
|
||||
/* find a matching osdev child */
|
||||
assert(type == HWLOC_OBJ_OS_DEVICE);
|
||||
/* FIXME: won't work if we ever store osdevs in osdevs */
|
||||
hwloc_obj_t child;
|
||||
for(child = pci->io_first_child; child; child = child->next_sibling) {
|
||||
if (child->type != HWLOC_OBJ_OS_DEVICE)
|
||||
/* FIXME: should never occur currently */
|
||||
continue;
|
||||
if (subtype && (!child->subtype || strcasecmp(subtype, child->subtype)))
|
||||
continue;
|
||||
if (nameprefix && (!child->name || hwloc_strncasecmp(nameprefix, child->name, strlen(nameprefix))))
|
||||
continue;
|
||||
return child;
|
||||
}
|
||||
}
|
||||
errno = ENOENT;
|
||||
return NULL;
|
||||
|
||||
} else {
|
||||
/* nothing for Misc */
|
||||
errno = EINVAL;
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
2
src/3rdparty/libethash/CMakeLists.txt
vendored
2
src/3rdparty/libethash/CMakeLists.txt
vendored
@@ -1,4 +1,4 @@
|
||||
cmake_minimum_required (VERSION 2.8.12)
|
||||
cmake_minimum_required(VERSION 3.10)
|
||||
project (ethash C)
|
||||
|
||||
set(CMAKE_C_FLAGS_RELEASE "${CMAKE_C_FLAGS_RELEASE} -Os")
|
||||
|
||||
225
src/3rdparty/llhttp/api.c
vendored
225
src/3rdparty/llhttp/api.c
vendored
@@ -4,7 +4,7 @@
|
||||
|
||||
#include "llhttp.h"
|
||||
|
||||
#define CALLBACK_MAYBE(PARSER, NAME, ...) \
|
||||
#define CALLBACK_MAYBE(PARSER, NAME) \
|
||||
do { \
|
||||
const llhttp_settings_t* settings; \
|
||||
settings = (const llhttp_settings_t*) (PARSER)->settings; \
|
||||
@@ -12,7 +12,22 @@
|
||||
err = 0; \
|
||||
break; \
|
||||
} \
|
||||
err = settings->NAME(__VA_ARGS__); \
|
||||
err = settings->NAME((PARSER)); \
|
||||
} while (0)
|
||||
|
||||
#define SPAN_CALLBACK_MAYBE(PARSER, NAME, START, LEN) \
|
||||
do { \
|
||||
const llhttp_settings_t* settings; \
|
||||
settings = (const llhttp_settings_t*) (PARSER)->settings; \
|
||||
if (settings == NULL || settings->NAME == NULL) { \
|
||||
err = 0; \
|
||||
break; \
|
||||
} \
|
||||
err = settings->NAME((PARSER), (START), (LEN)); \
|
||||
if (err == -1) { \
|
||||
err = HPE_USER; \
|
||||
llhttp_set_error_reason((PARSER), "Span callback error in " #NAME); \
|
||||
} \
|
||||
} while (0)
|
||||
|
||||
void llhttp_init(llhttp_t* parser, llhttp_type_t type,
|
||||
@@ -31,21 +46,25 @@ extern int wasm_on_url(llhttp_t* p, const char* at, size_t length);
|
||||
extern int wasm_on_status(llhttp_t* p, const char* at, size_t length);
|
||||
extern int wasm_on_header_field(llhttp_t* p, const char* at, size_t length);
|
||||
extern int wasm_on_header_value(llhttp_t* p, const char* at, size_t length);
|
||||
extern int wasm_on_headers_complete(llhttp_t * p);
|
||||
extern int wasm_on_headers_complete(llhttp_t * p, int status_code,
|
||||
uint8_t upgrade, int should_keep_alive);
|
||||
extern int wasm_on_body(llhttp_t* p, const char* at, size_t length);
|
||||
extern int wasm_on_message_complete(llhttp_t * p);
|
||||
|
||||
static int wasm_on_headers_complete_wrap(llhttp_t* p) {
|
||||
return wasm_on_headers_complete(p, p->status_code, p->upgrade,
|
||||
llhttp_should_keep_alive(p));
|
||||
}
|
||||
|
||||
const llhttp_settings_t wasm_settings = {
|
||||
wasm_on_message_begin,
|
||||
wasm_on_url,
|
||||
wasm_on_status,
|
||||
wasm_on_header_field,
|
||||
wasm_on_header_value,
|
||||
wasm_on_headers_complete,
|
||||
wasm_on_body,
|
||||
wasm_on_message_complete,
|
||||
NULL,
|
||||
NULL,
|
||||
.on_message_begin = wasm_on_message_begin,
|
||||
.on_url = wasm_on_url,
|
||||
.on_status = wasm_on_status,
|
||||
.on_header_field = wasm_on_header_field,
|
||||
.on_header_value = wasm_on_header_value,
|
||||
.on_headers_complete = wasm_on_headers_complete_wrap,
|
||||
.on_body = wasm_on_body,
|
||||
.on_message_complete = wasm_on_message_complete,
|
||||
};
|
||||
|
||||
|
||||
@@ -59,6 +78,8 @@ void llhttp_free(llhttp_t* parser) {
|
||||
free(parser);
|
||||
}
|
||||
|
||||
#endif // defined(__wasm__)
|
||||
|
||||
/* Some getters required to get stuff from the parser */
|
||||
|
||||
uint8_t llhttp_get_type(llhttp_t* parser) {
|
||||
@@ -85,14 +106,12 @@ uint8_t llhttp_get_upgrade(llhttp_t* parser) {
|
||||
return parser->upgrade;
|
||||
}
|
||||
|
||||
#endif // defined(__wasm__)
|
||||
|
||||
|
||||
void llhttp_reset(llhttp_t* parser) {
|
||||
llhttp_type_t type = parser->type;
|
||||
const llhttp_settings_t* settings = parser->settings;
|
||||
void* data = parser->data;
|
||||
uint8_t lenient_flags = parser->lenient_flags;
|
||||
uint16_t lenient_flags = parser->lenient_flags;
|
||||
|
||||
llhttp__internal_init(parser);
|
||||
|
||||
@@ -123,7 +142,7 @@ llhttp_errno_t llhttp_finish(llhttp_t* parser) {
|
||||
|
||||
switch (parser->finish) {
|
||||
case HTTP_FINISH_SAFE_WITH_CB:
|
||||
CALLBACK_MAYBE(parser, on_message_complete, parser);
|
||||
CALLBACK_MAYBE(parser, on_message_complete);
|
||||
if (err != HPE_OK) return err;
|
||||
|
||||
/* FALLTHROUGH */
|
||||
@@ -199,12 +218,21 @@ const char* llhttp_errno_name(llhttp_errno_t err) {
|
||||
const char* llhttp_method_name(llhttp_method_t method) {
|
||||
#define HTTP_METHOD_GEN(NUM, NAME, STRING) case HTTP_##NAME: return #STRING;
|
||||
switch (method) {
|
||||
HTTP_METHOD_MAP(HTTP_METHOD_GEN)
|
||||
HTTP_ALL_METHOD_MAP(HTTP_METHOD_GEN)
|
||||
default: abort();
|
||||
}
|
||||
#undef HTTP_METHOD_GEN
|
||||
}
|
||||
|
||||
const char* llhttp_status_name(llhttp_status_t status) {
|
||||
#define HTTP_STATUS_GEN(NUM, NAME, STRING) case HTTP_STATUS_##NAME: return #STRING;
|
||||
switch (status) {
|
||||
HTTP_STATUS_MAP(HTTP_STATUS_GEN)
|
||||
default: abort();
|
||||
}
|
||||
#undef HTTP_STATUS_GEN
|
||||
}
|
||||
|
||||
|
||||
void llhttp_set_lenient_headers(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
@@ -232,103 +260,236 @@ void llhttp_set_lenient_keep_alive(llhttp_t* parser, int enabled) {
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_transfer_encoding(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_TRANSFER_ENCODING;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_TRANSFER_ENCODING;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_version(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_VERSION;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_VERSION;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_data_after_close(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_DATA_AFTER_CLOSE;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_DATA_AFTER_CLOSE;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_optional_lf_after_cr(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_OPTIONAL_LF_AFTER_CR;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_OPTIONAL_LF_AFTER_CR;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_optional_crlf_after_chunk(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_OPTIONAL_CRLF_AFTER_CHUNK;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_OPTIONAL_CRLF_AFTER_CHUNK;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_OPTIONAL_CR_BEFORE_LF;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_OPTIONAL_CR_BEFORE_LF;
|
||||
}
|
||||
}
|
||||
|
||||
void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, int enabled) {
|
||||
if (enabled) {
|
||||
parser->lenient_flags |= LENIENT_SPACES_AFTER_CHUNK_SIZE;
|
||||
} else {
|
||||
parser->lenient_flags &= ~LENIENT_SPACES_AFTER_CHUNK_SIZE;
|
||||
}
|
||||
}
|
||||
|
||||
/* Callbacks */
|
||||
|
||||
|
||||
int llhttp__on_message_begin(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_message_begin, s);
|
||||
CALLBACK_MAYBE(s, on_message_begin);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_protocol(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
SPAN_CALLBACK_MAYBE(s, on_protocol, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_protocol_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_protocol_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_url(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_url, s, p, endp - p);
|
||||
SPAN_CALLBACK_MAYBE(s, on_url, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_url_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_url_complete, s);
|
||||
CALLBACK_MAYBE(s, on_url_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_status(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_status, s, p, endp - p);
|
||||
SPAN_CALLBACK_MAYBE(s, on_status, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_status_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_status_complete, s);
|
||||
CALLBACK_MAYBE(s, on_status_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_method(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
SPAN_CALLBACK_MAYBE(s, on_method, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_method_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_method_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_version(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
SPAN_CALLBACK_MAYBE(s, on_version, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_version_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_version_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_header_field(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_header_field, s, p, endp - p);
|
||||
SPAN_CALLBACK_MAYBE(s, on_header_field, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_header_field_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_header_field_complete, s);
|
||||
CALLBACK_MAYBE(s, on_header_field_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_header_value(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_header_value, s, p, endp - p);
|
||||
SPAN_CALLBACK_MAYBE(s, on_header_value, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_header_value_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_header_value_complete, s);
|
||||
CALLBACK_MAYBE(s, on_header_value_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_headers_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_headers_complete, s);
|
||||
CALLBACK_MAYBE(s, on_headers_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_message_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_message_complete, s);
|
||||
CALLBACK_MAYBE(s, on_message_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_body(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_body, s, p, endp - p);
|
||||
SPAN_CALLBACK_MAYBE(s, on_body, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_header(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_chunk_header, s);
|
||||
CALLBACK_MAYBE(s, on_chunk_header);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_extension_name(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
SPAN_CALLBACK_MAYBE(s, on_chunk_extension_name, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_extension_name_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_chunk_extension_name_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_extension_value(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
SPAN_CALLBACK_MAYBE(s, on_chunk_extension_value, p, endp - p);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_extension_value_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_chunk_extension_value_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_chunk_complete(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_chunk_complete, s);
|
||||
CALLBACK_MAYBE(s, on_chunk_complete);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
int llhttp__on_reset(llhttp_t* s, const char* p, const char* endp) {
|
||||
int err;
|
||||
CALLBACK_MAYBE(s, on_reset);
|
||||
return err;
|
||||
}
|
||||
|
||||
|
||||
253
src/3rdparty/llhttp/api.h
vendored
253
src/3rdparty/llhttp/api.h
vendored
@@ -1,253 +0,0 @@
|
||||
#ifndef INCLUDE_LLHTTP_API_H_
|
||||
#define INCLUDE_LLHTTP_API_H_
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
#include <stddef.h>
|
||||
|
||||
#if defined(__wasm__)
|
||||
#define LLHTTP_EXPORT __attribute__((visibility("default")))
|
||||
#else
|
||||
#define LLHTTP_EXPORT
|
||||
#endif
|
||||
|
||||
typedef llhttp__internal_t llhttp_t;
|
||||
typedef struct llhttp_settings_s llhttp_settings_t;
|
||||
|
||||
typedef int (*llhttp_data_cb)(llhttp_t*, const char *at, size_t length);
|
||||
typedef int (*llhttp_cb)(llhttp_t*);
|
||||
|
||||
struct llhttp_settings_s {
|
||||
/* Possible return values 0, -1, `HPE_PAUSED` */
|
||||
llhttp_cb on_message_begin;
|
||||
|
||||
llhttp_data_cb on_url;
|
||||
llhttp_data_cb on_status;
|
||||
llhttp_data_cb on_header_field;
|
||||
llhttp_data_cb on_header_value;
|
||||
|
||||
/* Possible return values:
|
||||
* 0 - Proceed normally
|
||||
* 1 - Assume that request/response has no body, and proceed to parsing the
|
||||
* next message
|
||||
* 2 - Assume absence of body (as above) and make `llhttp_execute()` return
|
||||
* `HPE_PAUSED_UPGRADE`
|
||||
* -1 - Error
|
||||
* `HPE_PAUSED`
|
||||
*/
|
||||
llhttp_cb on_headers_complete;
|
||||
|
||||
llhttp_data_cb on_body;
|
||||
|
||||
/* Possible return values 0, -1, `HPE_PAUSED` */
|
||||
llhttp_cb on_message_complete;
|
||||
|
||||
/* When on_chunk_header is called, the current chunk length is stored
|
||||
* in parser->content_length.
|
||||
* Possible return values 0, -1, `HPE_PAUSED`
|
||||
*/
|
||||
llhttp_cb on_chunk_header;
|
||||
llhttp_cb on_chunk_complete;
|
||||
|
||||
llhttp_cb on_url_complete;
|
||||
llhttp_cb on_status_complete;
|
||||
llhttp_cb on_header_field_complete;
|
||||
llhttp_cb on_header_value_complete;
|
||||
};
|
||||
|
||||
/* Initialize the parser with specific type and user settings.
|
||||
*
|
||||
* NOTE: lifetime of `settings` has to be at least the same as the lifetime of
|
||||
* the `parser` here. In practice, `settings` has to be either a static
|
||||
* variable or be allocated with `malloc`, `new`, etc.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_init(llhttp_t* parser, llhttp_type_t type,
|
||||
const llhttp_settings_t* settings);
|
||||
|
||||
#if defined(__wasm__)
|
||||
|
||||
LLHTTP_EXPORT
|
||||
llhttp_t* llhttp_alloc(llhttp_type_t type);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_free(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_type(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_http_major(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_http_minor(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_method(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
int llhttp_get_status_code(llhttp_t* parser);
|
||||
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_upgrade(llhttp_t* parser);
|
||||
|
||||
#endif // defined(__wasm__)
|
||||
|
||||
/* Reset an already initialized parser back to the start state, preserving the
|
||||
* existing parser type, callback settings, user data, and lenient flags.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_reset(llhttp_t* parser);
|
||||
|
||||
/* Initialize the settings object */
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_settings_init(llhttp_settings_t* settings);
|
||||
|
||||
/* Parse full or partial request/response, invoking user callbacks along the
|
||||
* way.
|
||||
*
|
||||
* If any of `llhttp_data_cb` returns errno not equal to `HPE_OK` - the parsing
|
||||
* interrupts, and such errno is returned from `llhttp_execute()`. If
|
||||
* `HPE_PAUSED` was used as a errno, the execution can be resumed with
|
||||
* `llhttp_resume()` call.
|
||||
*
|
||||
* In a special case of CONNECT/Upgrade request/response `HPE_PAUSED_UPGRADE`
|
||||
* is returned after fully parsing the request/response. If the user wishes to
|
||||
* continue parsing, they need to invoke `llhttp_resume_after_upgrade()`.
|
||||
*
|
||||
* NOTE: if this function ever returns a non-pause type error, it will continue
|
||||
* to return the same error upon each successive call up until `llhttp_init()`
|
||||
* is called.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
llhttp_errno_t llhttp_execute(llhttp_t* parser, const char* data, size_t len);
|
||||
|
||||
/* This method should be called when the other side has no further bytes to
|
||||
* send (e.g. shutdown of readable side of the TCP connection.)
|
||||
*
|
||||
* Requests without `Content-Length` and other messages might require treating
|
||||
* all incoming bytes as the part of the body, up to the last byte of the
|
||||
* connection. This method will invoke `on_message_complete()` callback if the
|
||||
* request was terminated safely. Otherwise a error code would be returned.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
llhttp_errno_t llhttp_finish(llhttp_t* parser);
|
||||
|
||||
/* Returns `1` if the incoming message is parsed until the last byte, and has
|
||||
* to be completed by calling `llhttp_finish()` on EOF
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
int llhttp_message_needs_eof(const llhttp_t* parser);
|
||||
|
||||
/* Returns `1` if there might be any other messages following the last that was
|
||||
* successfully parsed.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
int llhttp_should_keep_alive(const llhttp_t* parser);
|
||||
|
||||
/* Make further calls of `llhttp_execute()` return `HPE_PAUSED` and set
|
||||
* appropriate error reason.
|
||||
*
|
||||
* Important: do not call this from user callbacks! User callbacks must return
|
||||
* `HPE_PAUSED` if pausing is required.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_pause(llhttp_t* parser);
|
||||
|
||||
/* Might be called to resume the execution after the pause in user's callback.
|
||||
* See `llhttp_execute()` above for details.
|
||||
*
|
||||
* Call this only if `llhttp_execute()` returns `HPE_PAUSED`.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_resume(llhttp_t* parser);
|
||||
|
||||
/* Might be called to resume the execution after the pause in user's callback.
|
||||
* See `llhttp_execute()` above for details.
|
||||
*
|
||||
* Call this only if `llhttp_execute()` returns `HPE_PAUSED_UPGRADE`
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_resume_after_upgrade(llhttp_t* parser);
|
||||
|
||||
/* Returns the latest return error */
|
||||
LLHTTP_EXPORT
|
||||
llhttp_errno_t llhttp_get_errno(const llhttp_t* parser);
|
||||
|
||||
/* Returns the verbal explanation of the latest returned error.
|
||||
*
|
||||
* Note: User callback should set error reason when returning the error. See
|
||||
* `llhttp_set_error_reason()` for details.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_get_error_reason(const llhttp_t* parser);
|
||||
|
||||
/* Assign verbal description to the returned error. Must be called in user
|
||||
* callbacks right before returning the errno.
|
||||
*
|
||||
* Note: `HPE_USER` error code might be useful in user callbacks.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_error_reason(llhttp_t* parser, const char* reason);
|
||||
|
||||
/* Returns the pointer to the last parsed byte before the returned error. The
|
||||
* pointer is relative to the `data` argument of `llhttp_execute()`.
|
||||
*
|
||||
* Note: this method might be useful for counting the number of parsed bytes.
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_get_error_pos(const llhttp_t* parser);
|
||||
|
||||
/* Returns textual name of error code */
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_errno_name(llhttp_errno_t err);
|
||||
|
||||
/* Returns textual name of HTTP method */
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_method_name(llhttp_method_t method);
|
||||
|
||||
|
||||
/* Enables/disables lenient header value parsing (disabled by default).
|
||||
*
|
||||
* Lenient parsing disables header value token checks, extending llhttp's
|
||||
* protocol support to highly non-compliant clients/server. No
|
||||
* `HPE_INVALID_HEADER_TOKEN` will be raised for incorrect header values when
|
||||
* lenient parsing is "on".
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_headers(llhttp_t* parser, int enabled);
|
||||
|
||||
|
||||
/* Enables/disables lenient handling of conflicting `Transfer-Encoding` and
|
||||
* `Content-Length` headers (disabled by default).
|
||||
*
|
||||
* Normally `llhttp` would error when `Transfer-Encoding` is present in
|
||||
* conjunction with `Content-Length`. This error is important to prevent HTTP
|
||||
* request smuggling, but may be less desirable for small number of cases
|
||||
* involving legacy servers.
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled);
|
||||
|
||||
|
||||
/* Enables/disables lenient handling of `Connection: close` and HTTP/1.0
|
||||
* requests responses.
|
||||
*
|
||||
* Normally `llhttp` would error on (in strict mode) or discard (in loose mode)
|
||||
* the HTTP request/response after the request/response with `Connection: close`
|
||||
* and `Content-Length`. This is important to prevent cache poisoning attacks,
|
||||
* but might interact badly with outdated and insecure clients. With this flag
|
||||
* the extra request/response will be parsed normally.
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
*/
|
||||
void llhttp_set_lenient_keep_alive(llhttp_t* parser, int enabled);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif /* INCLUDE_LLHTTP_API_H_ */
|
||||
29
src/3rdparty/llhttp/http.c
vendored
29
src/3rdparty/llhttp/http.c
vendored
@@ -39,20 +39,41 @@ int llhttp__after_headers_complete(llhttp_t* parser, const char* p,
|
||||
int hasBody;
|
||||
|
||||
hasBody = parser->flags & F_CHUNKED || parser->content_length > 0;
|
||||
if (parser->upgrade && (parser->method == HTTP_CONNECT ||
|
||||
(parser->flags & F_SKIPBODY) || !hasBody)) {
|
||||
if (
|
||||
(parser->upgrade && (parser->method == HTTP_CONNECT ||
|
||||
(parser->flags & F_SKIPBODY) || !hasBody)) ||
|
||||
/* See RFC 2616 section 4.4 - 1xx e.g. Continue */
|
||||
(parser->type == HTTP_RESPONSE && parser->status_code == 101)
|
||||
) {
|
||||
/* Exit, the rest of the message is in a different protocol. */
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (parser->flags & F_SKIPBODY) {
|
||||
if (parser->type == HTTP_RESPONSE && parser->status_code == 100) {
|
||||
/* No body, restart as the message is complete */
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* See RFC 2616 section 4.4 */
|
||||
if (
|
||||
parser->flags & F_SKIPBODY || /* response to a HEAD request */
|
||||
(
|
||||
parser->type == HTTP_RESPONSE && (
|
||||
parser->status_code == 102 || /* Processing */
|
||||
parser->status_code == 103 || /* Early Hints */
|
||||
parser->status_code == 204 || /* No Content */
|
||||
parser->status_code == 304 /* Not Modified */
|
||||
)
|
||||
)
|
||||
) {
|
||||
return 0;
|
||||
} else if (parser->flags & F_CHUNKED) {
|
||||
/* chunked encoding - ignore Content-Length header, prepare for a chunk */
|
||||
return 2;
|
||||
} else if (parser->flags & F_TRANSFER_ENCODING) {
|
||||
if (parser->type == HTTP_REQUEST &&
|
||||
(parser->lenient_flags & LENIENT_CHUNKED_LENGTH) == 0) {
|
||||
(parser->lenient_flags & LENIENT_CHUNKED_LENGTH) == 0 &&
|
||||
(parser->lenient_flags & LENIENT_TRANSFER_ENCODING) == 0) {
|
||||
/* RFC 7230 3.3.3 */
|
||||
|
||||
/* If a Transfer-Encoding header field
|
||||
|
||||
14087
src/3rdparty/llhttp/llhttp.c
vendored
14087
src/3rdparty/llhttp/llhttp.c
vendored
File diff suppressed because it is too large
Load Diff
441
src/3rdparty/llhttp/llhttp.h
vendored
441
src/3rdparty/llhttp/llhttp.h
vendored
@@ -1,14 +1,11 @@
|
||||
|
||||
#ifndef INCLUDE_LLHTTP_H_
|
||||
#define INCLUDE_LLHTTP_H_
|
||||
|
||||
#define LLHTTP_VERSION_MAJOR 5
|
||||
#define LLHTTP_VERSION_MINOR 1
|
||||
#define LLHTTP_VERSION_MAJOR 9
|
||||
#define LLHTTP_VERSION_MINOR 3
|
||||
#define LLHTTP_VERSION_PATCH 0
|
||||
|
||||
#ifndef LLHTTP_STRICT_MODE
|
||||
# define LLHTTP_STRICT_MODE 0
|
||||
#endif
|
||||
|
||||
#ifndef INCLUDE_LLHTTP_ITSELF_H_
|
||||
#define INCLUDE_LLHTTP_ITSELF_H_
|
||||
#ifdef __cplusplus
|
||||
@@ -33,11 +30,12 @@ struct llhttp__internal_s {
|
||||
uint8_t http_major;
|
||||
uint8_t http_minor;
|
||||
uint8_t header_state;
|
||||
uint8_t lenient_flags;
|
||||
uint16_t lenient_flags;
|
||||
uint8_t upgrade;
|
||||
uint8_t finish;
|
||||
uint16_t flags;
|
||||
uint16_t status_code;
|
||||
uint8_t initial_message_completed;
|
||||
void* settings;
|
||||
};
|
||||
|
||||
@@ -49,6 +47,7 @@ int llhttp__internal_execute(llhttp__internal_t* s, const char* p, const char* e
|
||||
#endif
|
||||
#endif /* INCLUDE_LLHTTP_ITSELF_H_ */
|
||||
|
||||
|
||||
#ifndef LLLLHTTP_C_HEADERS_
|
||||
#define LLLLHTTP_C_HEADERS_
|
||||
#ifdef __cplusplus
|
||||
@@ -59,8 +58,10 @@ enum llhttp_errno {
|
||||
HPE_OK = 0,
|
||||
HPE_INTERNAL = 1,
|
||||
HPE_STRICT = 2,
|
||||
HPE_CR_EXPECTED = 25,
|
||||
HPE_LF_EXPECTED = 3,
|
||||
HPE_UNEXPECTED_CONTENT_LENGTH = 4,
|
||||
HPE_UNEXPECTED_SPACE = 30,
|
||||
HPE_CLOSED_CONNECTION = 5,
|
||||
HPE_INVALID_METHOD = 6,
|
||||
HPE_INVALID_URL = 7,
|
||||
@@ -80,7 +81,17 @@ enum llhttp_errno {
|
||||
HPE_PAUSED = 21,
|
||||
HPE_PAUSED_UPGRADE = 22,
|
||||
HPE_PAUSED_H2_UPGRADE = 23,
|
||||
HPE_USER = 24
|
||||
HPE_USER = 24,
|
||||
HPE_CB_URL_COMPLETE = 26,
|
||||
HPE_CB_STATUS_COMPLETE = 27,
|
||||
HPE_CB_METHOD_COMPLETE = 32,
|
||||
HPE_CB_VERSION_COMPLETE = 33,
|
||||
HPE_CB_HEADER_FIELD_COMPLETE = 28,
|
||||
HPE_CB_HEADER_VALUE_COMPLETE = 29,
|
||||
HPE_CB_CHUNK_EXTENSION_NAME_COMPLETE = 34,
|
||||
HPE_CB_CHUNK_EXTENSION_VALUE_COMPLETE = 35,
|
||||
HPE_CB_RESET = 31,
|
||||
HPE_CB_PROTOCOL_COMPLETE = 38
|
||||
};
|
||||
typedef enum llhttp_errno llhttp_errno_t;
|
||||
|
||||
@@ -100,7 +111,14 @@ typedef enum llhttp_flags llhttp_flags_t;
|
||||
enum llhttp_lenient_flags {
|
||||
LENIENT_HEADERS = 0x1,
|
||||
LENIENT_CHUNKED_LENGTH = 0x2,
|
||||
LENIENT_KEEP_ALIVE = 0x4
|
||||
LENIENT_KEEP_ALIVE = 0x4,
|
||||
LENIENT_TRANSFER_ENCODING = 0x8,
|
||||
LENIENT_VERSION = 0x10,
|
||||
LENIENT_DATA_AFTER_CLOSE = 0x20,
|
||||
LENIENT_OPTIONAL_LF_AFTER_CR = 0x40,
|
||||
LENIENT_OPTIONAL_CRLF_AFTER_CHUNK = 0x80,
|
||||
LENIENT_OPTIONAL_CR_BEFORE_LF = 0x100,
|
||||
LENIENT_SPACES_AFTER_CHUNK_SIZE = 0x200
|
||||
};
|
||||
typedef enum llhttp_lenient_flags llhttp_lenient_flags_t;
|
||||
|
||||
@@ -164,16 +182,122 @@ enum llhttp_method {
|
||||
HTTP_SET_PARAMETER = 42,
|
||||
HTTP_REDIRECT = 43,
|
||||
HTTP_RECORD = 44,
|
||||
HTTP_FLUSH = 45
|
||||
HTTP_FLUSH = 45,
|
||||
HTTP_QUERY = 46
|
||||
};
|
||||
typedef enum llhttp_method llhttp_method_t;
|
||||
|
||||
enum llhttp_status {
|
||||
HTTP_STATUS_CONTINUE = 100,
|
||||
HTTP_STATUS_SWITCHING_PROTOCOLS = 101,
|
||||
HTTP_STATUS_PROCESSING = 102,
|
||||
HTTP_STATUS_EARLY_HINTS = 103,
|
||||
HTTP_STATUS_RESPONSE_IS_STALE = 110,
|
||||
HTTP_STATUS_REVALIDATION_FAILED = 111,
|
||||
HTTP_STATUS_DISCONNECTED_OPERATION = 112,
|
||||
HTTP_STATUS_HEURISTIC_EXPIRATION = 113,
|
||||
HTTP_STATUS_MISCELLANEOUS_WARNING = 199,
|
||||
HTTP_STATUS_OK = 200,
|
||||
HTTP_STATUS_CREATED = 201,
|
||||
HTTP_STATUS_ACCEPTED = 202,
|
||||
HTTP_STATUS_NON_AUTHORITATIVE_INFORMATION = 203,
|
||||
HTTP_STATUS_NO_CONTENT = 204,
|
||||
HTTP_STATUS_RESET_CONTENT = 205,
|
||||
HTTP_STATUS_PARTIAL_CONTENT = 206,
|
||||
HTTP_STATUS_MULTI_STATUS = 207,
|
||||
HTTP_STATUS_ALREADY_REPORTED = 208,
|
||||
HTTP_STATUS_TRANSFORMATION_APPLIED = 214,
|
||||
HTTP_STATUS_IM_USED = 226,
|
||||
HTTP_STATUS_MISCELLANEOUS_PERSISTENT_WARNING = 299,
|
||||
HTTP_STATUS_MULTIPLE_CHOICES = 300,
|
||||
HTTP_STATUS_MOVED_PERMANENTLY = 301,
|
||||
HTTP_STATUS_FOUND = 302,
|
||||
HTTP_STATUS_SEE_OTHER = 303,
|
||||
HTTP_STATUS_NOT_MODIFIED = 304,
|
||||
HTTP_STATUS_USE_PROXY = 305,
|
||||
HTTP_STATUS_SWITCH_PROXY = 306,
|
||||
HTTP_STATUS_TEMPORARY_REDIRECT = 307,
|
||||
HTTP_STATUS_PERMANENT_REDIRECT = 308,
|
||||
HTTP_STATUS_BAD_REQUEST = 400,
|
||||
HTTP_STATUS_UNAUTHORIZED = 401,
|
||||
HTTP_STATUS_PAYMENT_REQUIRED = 402,
|
||||
HTTP_STATUS_FORBIDDEN = 403,
|
||||
HTTP_STATUS_NOT_FOUND = 404,
|
||||
HTTP_STATUS_METHOD_NOT_ALLOWED = 405,
|
||||
HTTP_STATUS_NOT_ACCEPTABLE = 406,
|
||||
HTTP_STATUS_PROXY_AUTHENTICATION_REQUIRED = 407,
|
||||
HTTP_STATUS_REQUEST_TIMEOUT = 408,
|
||||
HTTP_STATUS_CONFLICT = 409,
|
||||
HTTP_STATUS_GONE = 410,
|
||||
HTTP_STATUS_LENGTH_REQUIRED = 411,
|
||||
HTTP_STATUS_PRECONDITION_FAILED = 412,
|
||||
HTTP_STATUS_PAYLOAD_TOO_LARGE = 413,
|
||||
HTTP_STATUS_URI_TOO_LONG = 414,
|
||||
HTTP_STATUS_UNSUPPORTED_MEDIA_TYPE = 415,
|
||||
HTTP_STATUS_RANGE_NOT_SATISFIABLE = 416,
|
||||
HTTP_STATUS_EXPECTATION_FAILED = 417,
|
||||
HTTP_STATUS_IM_A_TEAPOT = 418,
|
||||
HTTP_STATUS_PAGE_EXPIRED = 419,
|
||||
HTTP_STATUS_ENHANCE_YOUR_CALM = 420,
|
||||
HTTP_STATUS_MISDIRECTED_REQUEST = 421,
|
||||
HTTP_STATUS_UNPROCESSABLE_ENTITY = 422,
|
||||
HTTP_STATUS_LOCKED = 423,
|
||||
HTTP_STATUS_FAILED_DEPENDENCY = 424,
|
||||
HTTP_STATUS_TOO_EARLY = 425,
|
||||
HTTP_STATUS_UPGRADE_REQUIRED = 426,
|
||||
HTTP_STATUS_PRECONDITION_REQUIRED = 428,
|
||||
HTTP_STATUS_TOO_MANY_REQUESTS = 429,
|
||||
HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE_UNOFFICIAL = 430,
|
||||
HTTP_STATUS_REQUEST_HEADER_FIELDS_TOO_LARGE = 431,
|
||||
HTTP_STATUS_LOGIN_TIMEOUT = 440,
|
||||
HTTP_STATUS_NO_RESPONSE = 444,
|
||||
HTTP_STATUS_RETRY_WITH = 449,
|
||||
HTTP_STATUS_BLOCKED_BY_PARENTAL_CONTROL = 450,
|
||||
HTTP_STATUS_UNAVAILABLE_FOR_LEGAL_REASONS = 451,
|
||||
HTTP_STATUS_CLIENT_CLOSED_LOAD_BALANCED_REQUEST = 460,
|
||||
HTTP_STATUS_INVALID_X_FORWARDED_FOR = 463,
|
||||
HTTP_STATUS_REQUEST_HEADER_TOO_LARGE = 494,
|
||||
HTTP_STATUS_SSL_CERTIFICATE_ERROR = 495,
|
||||
HTTP_STATUS_SSL_CERTIFICATE_REQUIRED = 496,
|
||||
HTTP_STATUS_HTTP_REQUEST_SENT_TO_HTTPS_PORT = 497,
|
||||
HTTP_STATUS_INVALID_TOKEN = 498,
|
||||
HTTP_STATUS_CLIENT_CLOSED_REQUEST = 499,
|
||||
HTTP_STATUS_INTERNAL_SERVER_ERROR = 500,
|
||||
HTTP_STATUS_NOT_IMPLEMENTED = 501,
|
||||
HTTP_STATUS_BAD_GATEWAY = 502,
|
||||
HTTP_STATUS_SERVICE_UNAVAILABLE = 503,
|
||||
HTTP_STATUS_GATEWAY_TIMEOUT = 504,
|
||||
HTTP_STATUS_HTTP_VERSION_NOT_SUPPORTED = 505,
|
||||
HTTP_STATUS_VARIANT_ALSO_NEGOTIATES = 506,
|
||||
HTTP_STATUS_INSUFFICIENT_STORAGE = 507,
|
||||
HTTP_STATUS_LOOP_DETECTED = 508,
|
||||
HTTP_STATUS_BANDWIDTH_LIMIT_EXCEEDED = 509,
|
||||
HTTP_STATUS_NOT_EXTENDED = 510,
|
||||
HTTP_STATUS_NETWORK_AUTHENTICATION_REQUIRED = 511,
|
||||
HTTP_STATUS_WEB_SERVER_UNKNOWN_ERROR = 520,
|
||||
HTTP_STATUS_WEB_SERVER_IS_DOWN = 521,
|
||||
HTTP_STATUS_CONNECTION_TIMEOUT = 522,
|
||||
HTTP_STATUS_ORIGIN_IS_UNREACHABLE = 523,
|
||||
HTTP_STATUS_TIMEOUT_OCCURED = 524,
|
||||
HTTP_STATUS_SSL_HANDSHAKE_FAILED = 525,
|
||||
HTTP_STATUS_INVALID_SSL_CERTIFICATE = 526,
|
||||
HTTP_STATUS_RAILGUN_ERROR = 527,
|
||||
HTTP_STATUS_SITE_IS_OVERLOADED = 529,
|
||||
HTTP_STATUS_SITE_IS_FROZEN = 530,
|
||||
HTTP_STATUS_IDENTITY_PROVIDER_AUTHENTICATION_ERROR = 561,
|
||||
HTTP_STATUS_NETWORK_READ_TIMEOUT = 598,
|
||||
HTTP_STATUS_NETWORK_CONNECT_TIMEOUT = 599
|
||||
};
|
||||
typedef enum llhttp_status llhttp_status_t;
|
||||
|
||||
#define HTTP_ERRNO_MAP(XX) \
|
||||
XX(0, OK, OK) \
|
||||
XX(1, INTERNAL, INTERNAL) \
|
||||
XX(2, STRICT, STRICT) \
|
||||
XX(25, CR_EXPECTED, CR_EXPECTED) \
|
||||
XX(3, LF_EXPECTED, LF_EXPECTED) \
|
||||
XX(4, UNEXPECTED_CONTENT_LENGTH, UNEXPECTED_CONTENT_LENGTH) \
|
||||
XX(30, UNEXPECTED_SPACE, UNEXPECTED_SPACE) \
|
||||
XX(5, CLOSED_CONNECTION, CLOSED_CONNECTION) \
|
||||
XX(6, INVALID_METHOD, INVALID_METHOD) \
|
||||
XX(7, INVALID_URL, INVALID_URL) \
|
||||
@@ -194,9 +318,74 @@ typedef enum llhttp_method llhttp_method_t;
|
||||
XX(22, PAUSED_UPGRADE, PAUSED_UPGRADE) \
|
||||
XX(23, PAUSED_H2_UPGRADE, PAUSED_H2_UPGRADE) \
|
||||
XX(24, USER, USER) \
|
||||
XX(26, CB_URL_COMPLETE, CB_URL_COMPLETE) \
|
||||
XX(27, CB_STATUS_COMPLETE, CB_STATUS_COMPLETE) \
|
||||
XX(32, CB_METHOD_COMPLETE, CB_METHOD_COMPLETE) \
|
||||
XX(33, CB_VERSION_COMPLETE, CB_VERSION_COMPLETE) \
|
||||
XX(28, CB_HEADER_FIELD_COMPLETE, CB_HEADER_FIELD_COMPLETE) \
|
||||
XX(29, CB_HEADER_VALUE_COMPLETE, CB_HEADER_VALUE_COMPLETE) \
|
||||
XX(34, CB_CHUNK_EXTENSION_NAME_COMPLETE, CB_CHUNK_EXTENSION_NAME_COMPLETE) \
|
||||
XX(35, CB_CHUNK_EXTENSION_VALUE_COMPLETE, CB_CHUNK_EXTENSION_VALUE_COMPLETE) \
|
||||
XX(31, CB_RESET, CB_RESET) \
|
||||
XX(38, CB_PROTOCOL_COMPLETE, CB_PROTOCOL_COMPLETE) \
|
||||
|
||||
|
||||
#define HTTP_METHOD_MAP(XX) \
|
||||
XX(0, DELETE, DELETE) \
|
||||
XX(1, GET, GET) \
|
||||
XX(2, HEAD, HEAD) \
|
||||
XX(3, POST, POST) \
|
||||
XX(4, PUT, PUT) \
|
||||
XX(5, CONNECT, CONNECT) \
|
||||
XX(6, OPTIONS, OPTIONS) \
|
||||
XX(7, TRACE, TRACE) \
|
||||
XX(8, COPY, COPY) \
|
||||
XX(9, LOCK, LOCK) \
|
||||
XX(10, MKCOL, MKCOL) \
|
||||
XX(11, MOVE, MOVE) \
|
||||
XX(12, PROPFIND, PROPFIND) \
|
||||
XX(13, PROPPATCH, PROPPATCH) \
|
||||
XX(14, SEARCH, SEARCH) \
|
||||
XX(15, UNLOCK, UNLOCK) \
|
||||
XX(16, BIND, BIND) \
|
||||
XX(17, REBIND, REBIND) \
|
||||
XX(18, UNBIND, UNBIND) \
|
||||
XX(19, ACL, ACL) \
|
||||
XX(20, REPORT, REPORT) \
|
||||
XX(21, MKACTIVITY, MKACTIVITY) \
|
||||
XX(22, CHECKOUT, CHECKOUT) \
|
||||
XX(23, MERGE, MERGE) \
|
||||
XX(24, MSEARCH, M-SEARCH) \
|
||||
XX(25, NOTIFY, NOTIFY) \
|
||||
XX(26, SUBSCRIBE, SUBSCRIBE) \
|
||||
XX(27, UNSUBSCRIBE, UNSUBSCRIBE) \
|
||||
XX(28, PATCH, PATCH) \
|
||||
XX(29, PURGE, PURGE) \
|
||||
XX(30, MKCALENDAR, MKCALENDAR) \
|
||||
XX(31, LINK, LINK) \
|
||||
XX(32, UNLINK, UNLINK) \
|
||||
XX(33, SOURCE, SOURCE) \
|
||||
XX(46, QUERY, QUERY) \
|
||||
|
||||
|
||||
#define RTSP_METHOD_MAP(XX) \
|
||||
XX(1, GET, GET) \
|
||||
XX(3, POST, POST) \
|
||||
XX(6, OPTIONS, OPTIONS) \
|
||||
XX(35, DESCRIBE, DESCRIBE) \
|
||||
XX(36, ANNOUNCE, ANNOUNCE) \
|
||||
XX(37, SETUP, SETUP) \
|
||||
XX(38, PLAY, PLAY) \
|
||||
XX(39, PAUSE, PAUSE) \
|
||||
XX(40, TEARDOWN, TEARDOWN) \
|
||||
XX(41, GET_PARAMETER, GET_PARAMETER) \
|
||||
XX(42, SET_PARAMETER, SET_PARAMETER) \
|
||||
XX(43, REDIRECT, REDIRECT) \
|
||||
XX(44, RECORD, RECORD) \
|
||||
XX(45, FLUSH, FLUSH) \
|
||||
|
||||
|
||||
#define HTTP_ALL_METHOD_MAP(XX) \
|
||||
XX(0, DELETE, DELETE) \
|
||||
XX(1, GET, GET) \
|
||||
XX(2, HEAD, HEAD) \
|
||||
@@ -243,14 +432,117 @@ typedef enum llhttp_method llhttp_method_t;
|
||||
XX(43, REDIRECT, REDIRECT) \
|
||||
XX(44, RECORD, RECORD) \
|
||||
XX(45, FLUSH, FLUSH) \
|
||||
XX(46, QUERY, QUERY) \
|
||||
|
||||
|
||||
#define HTTP_STATUS_MAP(XX) \
|
||||
XX(100, CONTINUE, CONTINUE) \
|
||||
XX(101, SWITCHING_PROTOCOLS, SWITCHING_PROTOCOLS) \
|
||||
XX(102, PROCESSING, PROCESSING) \
|
||||
XX(103, EARLY_HINTS, EARLY_HINTS) \
|
||||
XX(110, RESPONSE_IS_STALE, RESPONSE_IS_STALE) \
|
||||
XX(111, REVALIDATION_FAILED, REVALIDATION_FAILED) \
|
||||
XX(112, DISCONNECTED_OPERATION, DISCONNECTED_OPERATION) \
|
||||
XX(113, HEURISTIC_EXPIRATION, HEURISTIC_EXPIRATION) \
|
||||
XX(199, MISCELLANEOUS_WARNING, MISCELLANEOUS_WARNING) \
|
||||
XX(200, OK, OK) \
|
||||
XX(201, CREATED, CREATED) \
|
||||
XX(202, ACCEPTED, ACCEPTED) \
|
||||
XX(203, NON_AUTHORITATIVE_INFORMATION, NON_AUTHORITATIVE_INFORMATION) \
|
||||
XX(204, NO_CONTENT, NO_CONTENT) \
|
||||
XX(205, RESET_CONTENT, RESET_CONTENT) \
|
||||
XX(206, PARTIAL_CONTENT, PARTIAL_CONTENT) \
|
||||
XX(207, MULTI_STATUS, MULTI_STATUS) \
|
||||
XX(208, ALREADY_REPORTED, ALREADY_REPORTED) \
|
||||
XX(214, TRANSFORMATION_APPLIED, TRANSFORMATION_APPLIED) \
|
||||
XX(226, IM_USED, IM_USED) \
|
||||
XX(299, MISCELLANEOUS_PERSISTENT_WARNING, MISCELLANEOUS_PERSISTENT_WARNING) \
|
||||
XX(300, MULTIPLE_CHOICES, MULTIPLE_CHOICES) \
|
||||
XX(301, MOVED_PERMANENTLY, MOVED_PERMANENTLY) \
|
||||
XX(302, FOUND, FOUND) \
|
||||
XX(303, SEE_OTHER, SEE_OTHER) \
|
||||
XX(304, NOT_MODIFIED, NOT_MODIFIED) \
|
||||
XX(305, USE_PROXY, USE_PROXY) \
|
||||
XX(306, SWITCH_PROXY, SWITCH_PROXY) \
|
||||
XX(307, TEMPORARY_REDIRECT, TEMPORARY_REDIRECT) \
|
||||
XX(308, PERMANENT_REDIRECT, PERMANENT_REDIRECT) \
|
||||
XX(400, BAD_REQUEST, BAD_REQUEST) \
|
||||
XX(401, UNAUTHORIZED, UNAUTHORIZED) \
|
||||
XX(402, PAYMENT_REQUIRED, PAYMENT_REQUIRED) \
|
||||
XX(403, FORBIDDEN, FORBIDDEN) \
|
||||
XX(404, NOT_FOUND, NOT_FOUND) \
|
||||
XX(405, METHOD_NOT_ALLOWED, METHOD_NOT_ALLOWED) \
|
||||
XX(406, NOT_ACCEPTABLE, NOT_ACCEPTABLE) \
|
||||
XX(407, PROXY_AUTHENTICATION_REQUIRED, PROXY_AUTHENTICATION_REQUIRED) \
|
||||
XX(408, REQUEST_TIMEOUT, REQUEST_TIMEOUT) \
|
||||
XX(409, CONFLICT, CONFLICT) \
|
||||
XX(410, GONE, GONE) \
|
||||
XX(411, LENGTH_REQUIRED, LENGTH_REQUIRED) \
|
||||
XX(412, PRECONDITION_FAILED, PRECONDITION_FAILED) \
|
||||
XX(413, PAYLOAD_TOO_LARGE, PAYLOAD_TOO_LARGE) \
|
||||
XX(414, URI_TOO_LONG, URI_TOO_LONG) \
|
||||
XX(415, UNSUPPORTED_MEDIA_TYPE, UNSUPPORTED_MEDIA_TYPE) \
|
||||
XX(416, RANGE_NOT_SATISFIABLE, RANGE_NOT_SATISFIABLE) \
|
||||
XX(417, EXPECTATION_FAILED, EXPECTATION_FAILED) \
|
||||
XX(418, IM_A_TEAPOT, IM_A_TEAPOT) \
|
||||
XX(419, PAGE_EXPIRED, PAGE_EXPIRED) \
|
||||
XX(420, ENHANCE_YOUR_CALM, ENHANCE_YOUR_CALM) \
|
||||
XX(421, MISDIRECTED_REQUEST, MISDIRECTED_REQUEST) \
|
||||
XX(422, UNPROCESSABLE_ENTITY, UNPROCESSABLE_ENTITY) \
|
||||
XX(423, LOCKED, LOCKED) \
|
||||
XX(424, FAILED_DEPENDENCY, FAILED_DEPENDENCY) \
|
||||
XX(425, TOO_EARLY, TOO_EARLY) \
|
||||
XX(426, UPGRADE_REQUIRED, UPGRADE_REQUIRED) \
|
||||
XX(428, PRECONDITION_REQUIRED, PRECONDITION_REQUIRED) \
|
||||
XX(429, TOO_MANY_REQUESTS, TOO_MANY_REQUESTS) \
|
||||
XX(430, REQUEST_HEADER_FIELDS_TOO_LARGE_UNOFFICIAL, REQUEST_HEADER_FIELDS_TOO_LARGE_UNOFFICIAL) \
|
||||
XX(431, REQUEST_HEADER_FIELDS_TOO_LARGE, REQUEST_HEADER_FIELDS_TOO_LARGE) \
|
||||
XX(440, LOGIN_TIMEOUT, LOGIN_TIMEOUT) \
|
||||
XX(444, NO_RESPONSE, NO_RESPONSE) \
|
||||
XX(449, RETRY_WITH, RETRY_WITH) \
|
||||
XX(450, BLOCKED_BY_PARENTAL_CONTROL, BLOCKED_BY_PARENTAL_CONTROL) \
|
||||
XX(451, UNAVAILABLE_FOR_LEGAL_REASONS, UNAVAILABLE_FOR_LEGAL_REASONS) \
|
||||
XX(460, CLIENT_CLOSED_LOAD_BALANCED_REQUEST, CLIENT_CLOSED_LOAD_BALANCED_REQUEST) \
|
||||
XX(463, INVALID_X_FORWARDED_FOR, INVALID_X_FORWARDED_FOR) \
|
||||
XX(494, REQUEST_HEADER_TOO_LARGE, REQUEST_HEADER_TOO_LARGE) \
|
||||
XX(495, SSL_CERTIFICATE_ERROR, SSL_CERTIFICATE_ERROR) \
|
||||
XX(496, SSL_CERTIFICATE_REQUIRED, SSL_CERTIFICATE_REQUIRED) \
|
||||
XX(497, HTTP_REQUEST_SENT_TO_HTTPS_PORT, HTTP_REQUEST_SENT_TO_HTTPS_PORT) \
|
||||
XX(498, INVALID_TOKEN, INVALID_TOKEN) \
|
||||
XX(499, CLIENT_CLOSED_REQUEST, CLIENT_CLOSED_REQUEST) \
|
||||
XX(500, INTERNAL_SERVER_ERROR, INTERNAL_SERVER_ERROR) \
|
||||
XX(501, NOT_IMPLEMENTED, NOT_IMPLEMENTED) \
|
||||
XX(502, BAD_GATEWAY, BAD_GATEWAY) \
|
||||
XX(503, SERVICE_UNAVAILABLE, SERVICE_UNAVAILABLE) \
|
||||
XX(504, GATEWAY_TIMEOUT, GATEWAY_TIMEOUT) \
|
||||
XX(505, HTTP_VERSION_NOT_SUPPORTED, HTTP_VERSION_NOT_SUPPORTED) \
|
||||
XX(506, VARIANT_ALSO_NEGOTIATES, VARIANT_ALSO_NEGOTIATES) \
|
||||
XX(507, INSUFFICIENT_STORAGE, INSUFFICIENT_STORAGE) \
|
||||
XX(508, LOOP_DETECTED, LOOP_DETECTED) \
|
||||
XX(509, BANDWIDTH_LIMIT_EXCEEDED, BANDWIDTH_LIMIT_EXCEEDED) \
|
||||
XX(510, NOT_EXTENDED, NOT_EXTENDED) \
|
||||
XX(511, NETWORK_AUTHENTICATION_REQUIRED, NETWORK_AUTHENTICATION_REQUIRED) \
|
||||
XX(520, WEB_SERVER_UNKNOWN_ERROR, WEB_SERVER_UNKNOWN_ERROR) \
|
||||
XX(521, WEB_SERVER_IS_DOWN, WEB_SERVER_IS_DOWN) \
|
||||
XX(522, CONNECTION_TIMEOUT, CONNECTION_TIMEOUT) \
|
||||
XX(523, ORIGIN_IS_UNREACHABLE, ORIGIN_IS_UNREACHABLE) \
|
||||
XX(524, TIMEOUT_OCCURED, TIMEOUT_OCCURED) \
|
||||
XX(525, SSL_HANDSHAKE_FAILED, SSL_HANDSHAKE_FAILED) \
|
||||
XX(526, INVALID_SSL_CERTIFICATE, INVALID_SSL_CERTIFICATE) \
|
||||
XX(527, RAILGUN_ERROR, RAILGUN_ERROR) \
|
||||
XX(529, SITE_IS_OVERLOADED, SITE_IS_OVERLOADED) \
|
||||
XX(530, SITE_IS_FROZEN, SITE_IS_FROZEN) \
|
||||
XX(561, IDENTITY_PROVIDER_AUTHENTICATION_ERROR, IDENTITY_PROVIDER_AUTHENTICATION_ERROR) \
|
||||
XX(598, NETWORK_READ_TIMEOUT, NETWORK_READ_TIMEOUT) \
|
||||
XX(599, NETWORK_CONNECT_TIMEOUT, NETWORK_CONNECT_TIMEOUT) \
|
||||
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif /* LLLLHTTP_C_HEADERS_ */
|
||||
|
||||
|
||||
#ifndef INCLUDE_LLHTTP_API_H_
|
||||
#define INCLUDE_LLHTTP_API_H_
|
||||
#ifdef __cplusplus
|
||||
@@ -274,10 +566,16 @@ struct llhttp_settings_s {
|
||||
/* Possible return values 0, -1, `HPE_PAUSED` */
|
||||
llhttp_cb on_message_begin;
|
||||
|
||||
/* Possible return values 0, -1, HPE_USER */
|
||||
llhttp_data_cb on_protocol;
|
||||
llhttp_data_cb on_url;
|
||||
llhttp_data_cb on_status;
|
||||
llhttp_data_cb on_method;
|
||||
llhttp_data_cb on_version;
|
||||
llhttp_data_cb on_header_field;
|
||||
llhttp_data_cb on_header_value;
|
||||
llhttp_data_cb on_chunk_extension_name;
|
||||
llhttp_data_cb on_chunk_extension_value;
|
||||
|
||||
/* Possible return values:
|
||||
* 0 - Proceed normally
|
||||
@@ -290,10 +588,20 @@ struct llhttp_settings_s {
|
||||
*/
|
||||
llhttp_cb on_headers_complete;
|
||||
|
||||
/* Possible return values 0, -1, HPE_USER */
|
||||
llhttp_data_cb on_body;
|
||||
|
||||
/* Possible return values 0, -1, `HPE_PAUSED` */
|
||||
llhttp_cb on_message_complete;
|
||||
llhttp_cb on_protocol_complete;
|
||||
llhttp_cb on_url_complete;
|
||||
llhttp_cb on_status_complete;
|
||||
llhttp_cb on_method_complete;
|
||||
llhttp_cb on_version_complete;
|
||||
llhttp_cb on_header_field_complete;
|
||||
llhttp_cb on_header_value_complete;
|
||||
llhttp_cb on_chunk_extension_name_complete;
|
||||
llhttp_cb on_chunk_extension_value_complete;
|
||||
|
||||
/* When on_chunk_header is called, the current chunk length is stored
|
||||
* in parser->content_length.
|
||||
@@ -301,11 +609,7 @@ struct llhttp_settings_s {
|
||||
*/
|
||||
llhttp_cb on_chunk_header;
|
||||
llhttp_cb on_chunk_complete;
|
||||
|
||||
llhttp_cb on_url_complete;
|
||||
llhttp_cb on_status_complete;
|
||||
llhttp_cb on_header_field_complete;
|
||||
llhttp_cb on_header_value_complete;
|
||||
llhttp_cb on_reset;
|
||||
};
|
||||
|
||||
/* Initialize the parser with specific type and user settings.
|
||||
@@ -318,8 +622,6 @@ LLHTTP_EXPORT
|
||||
void llhttp_init(llhttp_t* parser, llhttp_type_t type,
|
||||
const llhttp_settings_t* settings);
|
||||
|
||||
#if defined(__wasm__)
|
||||
|
||||
LLHTTP_EXPORT
|
||||
llhttp_t* llhttp_alloc(llhttp_type_t type);
|
||||
|
||||
@@ -344,8 +646,6 @@ int llhttp_get_status_code(llhttp_t* parser);
|
||||
LLHTTP_EXPORT
|
||||
uint8_t llhttp_get_upgrade(llhttp_t* parser);
|
||||
|
||||
#endif // defined(__wasm__)
|
||||
|
||||
/* Reset an already initialized parser back to the start state, preserving the
|
||||
* existing parser type, callback settings, user data, and lenient flags.
|
||||
*/
|
||||
@@ -459,6 +759,9 @@ const char* llhttp_errno_name(llhttp_errno_t err);
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_method_name(llhttp_method_t method);
|
||||
|
||||
/* Returns textual name of HTTP status */
|
||||
LLHTTP_EXPORT
|
||||
const char* llhttp_status_name(llhttp_status_t status);
|
||||
|
||||
/* Enables/disables lenient header value parsing (disabled by default).
|
||||
*
|
||||
@@ -467,7 +770,8 @@ const char* llhttp_method_name(llhttp_method_t method);
|
||||
* `HPE_INVALID_HEADER_TOKEN` will be raised for incorrect header values when
|
||||
* lenient parsing is "on".
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_headers(llhttp_t* parser, int enabled);
|
||||
@@ -481,7 +785,8 @@ void llhttp_set_lenient_headers(llhttp_t* parser, int enabled);
|
||||
* request smuggling, but may be less desirable for small number of cases
|
||||
* involving legacy servers.
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled);
|
||||
@@ -496,13 +801,105 @@ void llhttp_set_lenient_chunked_length(llhttp_t* parser, int enabled);
|
||||
* but might interact badly with outdated and insecure clients. With this flag
|
||||
* the extra request/response will be parsed normally.
|
||||
*
|
||||
* **(USE AT YOUR OWN RISK)**
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* poisoning attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_keep_alive(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of `Transfer-Encoding` header.
|
||||
*
|
||||
* Normally `llhttp` would error when a `Transfer-Encoding` has `chunked` value
|
||||
* and another value after it (either in a single header or in multiple
|
||||
* headers whose value are internally joined using `, `).
|
||||
* This is mandated by the spec to reliably determine request body size and thus
|
||||
* avoid request smuggling.
|
||||
* With this flag the extra value will be parsed normally.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_transfer_encoding(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of HTTP version.
|
||||
*
|
||||
* Normally `llhttp` would error when the HTTP version in the request or status line
|
||||
* is not `0.9`, `1.0`, `1.1` or `2.0`.
|
||||
* With this flag the invalid value will be parsed normally.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will allow unsupported
|
||||
* HTTP versions. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_version(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of additional data received after a message ends
|
||||
* and keep-alive is disabled.
|
||||
*
|
||||
* Normally `llhttp` would error when additional unexpected data is received if the message
|
||||
* contains the `Connection` header with `close` value.
|
||||
* With this flag the extra data will discarded without throwing an error.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* poisoning attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_data_after_close(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of incomplete CRLF sequences.
|
||||
*
|
||||
* Normally `llhttp` would error when a CR is not followed by LF when terminating the
|
||||
* request line, the status line, the headers or a chunk header.
|
||||
* With this flag only a CR is required to terminate such sections.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_optional_lf_after_cr(llhttp_t* parser, int enabled);
|
||||
|
||||
/*
|
||||
* Enables/disables lenient handling of line separators.
|
||||
*
|
||||
* Normally `llhttp` would error when a LF is not preceded by CR when terminating the
|
||||
* request line, the status line, the headers, a chunk header or a chunk data.
|
||||
* With this flag only a LF is required to terminate such sections.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_optional_cr_before_lf(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of chunks not separated via CRLF.
|
||||
*
|
||||
* Normally `llhttp` would error when after a chunk data a CRLF is missing before
|
||||
* starting a new chunk.
|
||||
* With this flag the new chunk can start immediately after the previous one.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_optional_crlf_after_chunk(llhttp_t* parser, int enabled);
|
||||
|
||||
/* Enables/disables lenient handling of spaces after chunk size.
|
||||
*
|
||||
* Normally `llhttp` would error when after a chunk size is followed by one or more
|
||||
* spaces are present instead of a CRLF or `;`.
|
||||
* With this flag this check is disabled.
|
||||
*
|
||||
* **Enabling this flag can pose a security issue since you will be exposed to
|
||||
* request smuggling attacks. USE WITH CAUTION!**
|
||||
*/
|
||||
LLHTTP_EXPORT
|
||||
void llhttp_set_lenient_spaces_after_chunk_size(llhttp_t* parser, int enabled);
|
||||
|
||||
#ifdef __cplusplus
|
||||
} /* extern "C" */
|
||||
#endif
|
||||
#endif /* INCLUDE_LLHTTP_API_H_ */
|
||||
|
||||
|
||||
#endif /* INCLUDE_LLHTTP_H_ */
|
||||
|
||||
541
src/3rdparty/rapidjson/allocators.h
vendored
541
src/3rdparty/rapidjson/allocators.h
vendored
@@ -1,21 +1,29 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://opensource.org/licenses/MIT
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// Unless required by applicable law or agreed to in writing, software distributed
|
||||
// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
|
||||
// CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations under the License.
|
||||
|
||||
#ifndef RAPIDJSON_ALLOCATORS_H_
|
||||
#define RAPIDJSON_ALLOCATORS_H_
|
||||
|
||||
#include "rapidjson.h"
|
||||
#include "internal/meta.h"
|
||||
|
||||
#include <memory>
|
||||
#include <limits>
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11
|
||||
#include <type_traits>
|
||||
#endif
|
||||
|
||||
RAPIDJSON_NAMESPACE_BEGIN
|
||||
|
||||
@@ -24,10 +32,10 @@ RAPIDJSON_NAMESPACE_BEGIN
|
||||
|
||||
/*! \class rapidjson::Allocator
|
||||
\brief Concept for allocating, resizing and freeing memory block.
|
||||
|
||||
|
||||
Note that Malloc() and Realloc() are non-static but Free() is static.
|
||||
|
||||
So if an allocator need to support Free(), it needs to put its pointer in
|
||||
|
||||
So if an allocator need to support Free(), it needs to put its pointer in
|
||||
the header of memory block.
|
||||
|
||||
\code
|
||||
@@ -75,28 +83,35 @@ concept Allocator {
|
||||
class CrtAllocator {
|
||||
public:
|
||||
static const bool kNeedFree = true;
|
||||
void* Malloc(size_t size) {
|
||||
void* Malloc(size_t size) {
|
||||
if (size) // behavior of malloc(0) is implementation defined.
|
||||
return std::malloc(size);
|
||||
return RAPIDJSON_MALLOC(size);
|
||||
else
|
||||
return NULL; // standardize to returning NULL.
|
||||
}
|
||||
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize) {
|
||||
(void)originalSize;
|
||||
if (newSize == 0) {
|
||||
std::free(originalPtr);
|
||||
RAPIDJSON_FREE(originalPtr);
|
||||
return NULL;
|
||||
}
|
||||
return std::realloc(originalPtr, newSize);
|
||||
return RAPIDJSON_REALLOC(originalPtr, newSize);
|
||||
}
|
||||
static void Free(void *ptr) RAPIDJSON_NOEXCEPT { RAPIDJSON_FREE(ptr); }
|
||||
|
||||
bool operator==(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
|
||||
return true;
|
||||
}
|
||||
bool operator!=(const CrtAllocator&) const RAPIDJSON_NOEXCEPT {
|
||||
return false;
|
||||
}
|
||||
static void Free(void *ptr) { std::free(ptr); }
|
||||
};
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// MemoryPoolAllocator
|
||||
|
||||
//! Default memory allocator used by the parser and DOM.
|
||||
/*! This allocator allocate memory blocks from pre-allocated memory chunks.
|
||||
/*! This allocator allocate memory blocks from pre-allocated memory chunks.
|
||||
|
||||
It does not free memory blocks. And Realloc() only allocate new memory.
|
||||
|
||||
@@ -113,16 +128,64 @@ public:
|
||||
*/
|
||||
template <typename BaseAllocator = CrtAllocator>
|
||||
class MemoryPoolAllocator {
|
||||
//! Chunk header for perpending to each chunk.
|
||||
/*! Chunks are stored as a singly linked list.
|
||||
*/
|
||||
struct ChunkHeader {
|
||||
size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself).
|
||||
size_t size; //!< Current size of allocated memory in bytes.
|
||||
ChunkHeader *next; //!< Next chunk in the linked list.
|
||||
};
|
||||
|
||||
struct SharedData {
|
||||
ChunkHeader *chunkHead; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
|
||||
BaseAllocator* ownBaseAllocator; //!< base allocator created by this object.
|
||||
size_t refcount;
|
||||
bool ownBuffer;
|
||||
};
|
||||
|
||||
static const size_t SIZEOF_SHARED_DATA = RAPIDJSON_ALIGN(sizeof(SharedData));
|
||||
static const size_t SIZEOF_CHUNK_HEADER = RAPIDJSON_ALIGN(sizeof(ChunkHeader));
|
||||
|
||||
static inline ChunkHeader *GetChunkHead(SharedData *shared)
|
||||
{
|
||||
return reinterpret_cast<ChunkHeader*>(reinterpret_cast<uint8_t*>(shared) + SIZEOF_SHARED_DATA);
|
||||
}
|
||||
static inline uint8_t *GetChunkBuffer(SharedData *shared)
|
||||
{
|
||||
return reinterpret_cast<uint8_t*>(shared->chunkHead) + SIZEOF_CHUNK_HEADER;
|
||||
}
|
||||
|
||||
static const size_t kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
|
||||
|
||||
public:
|
||||
static const bool kNeedFree = false; //!< Tell users that no need to call Free() with this allocator. (concept Allocator)
|
||||
static const bool kRefCounted = true; //!< Tell users that this allocator is reference counted on copy
|
||||
|
||||
//! Constructor with chunkSize.
|
||||
/*! \param chunkSize The size of memory chunk. The default is kDefaultChunkSize.
|
||||
\param baseAllocator The allocator for allocating memory chunks.
|
||||
*/
|
||||
MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
|
||||
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(0), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
|
||||
explicit
|
||||
MemoryPoolAllocator(size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
|
||||
chunk_capacity_(chunkSize),
|
||||
baseAllocator_(baseAllocator ? baseAllocator : RAPIDJSON_NEW(BaseAllocator)()),
|
||||
shared_(static_cast<SharedData*>(baseAllocator_ ? baseAllocator_->Malloc(SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER) : 0))
|
||||
{
|
||||
RAPIDJSON_ASSERT(baseAllocator_ != 0);
|
||||
RAPIDJSON_ASSERT(shared_ != 0);
|
||||
if (baseAllocator) {
|
||||
shared_->ownBaseAllocator = 0;
|
||||
}
|
||||
else {
|
||||
shared_->ownBaseAllocator = baseAllocator_;
|
||||
}
|
||||
shared_->chunkHead = GetChunkHead(shared_);
|
||||
shared_->chunkHead->capacity = 0;
|
||||
shared_->chunkHead->size = 0;
|
||||
shared_->chunkHead->next = 0;
|
||||
shared_->ownBuffer = true;
|
||||
shared_->refcount = 1;
|
||||
}
|
||||
|
||||
//! Constructor with user-supplied buffer.
|
||||
@@ -136,41 +199,101 @@ public:
|
||||
\param baseAllocator The allocator for allocating memory chunks.
|
||||
*/
|
||||
MemoryPoolAllocator(void *buffer, size_t size, size_t chunkSize = kDefaultChunkCapacity, BaseAllocator* baseAllocator = 0) :
|
||||
chunkHead_(0), chunk_capacity_(chunkSize), userBuffer_(buffer), baseAllocator_(baseAllocator), ownBaseAllocator_(0)
|
||||
chunk_capacity_(chunkSize),
|
||||
baseAllocator_(baseAllocator),
|
||||
shared_(static_cast<SharedData*>(AlignBuffer(buffer, size)))
|
||||
{
|
||||
RAPIDJSON_ASSERT(buffer != 0);
|
||||
RAPIDJSON_ASSERT(size > sizeof(ChunkHeader));
|
||||
chunkHead_ = reinterpret_cast<ChunkHeader*>(buffer);
|
||||
chunkHead_->capacity = size - sizeof(ChunkHeader);
|
||||
chunkHead_->size = 0;
|
||||
chunkHead_->next = 0;
|
||||
RAPIDJSON_ASSERT(size >= SIZEOF_SHARED_DATA + SIZEOF_CHUNK_HEADER);
|
||||
shared_->chunkHead = GetChunkHead(shared_);
|
||||
shared_->chunkHead->capacity = size - SIZEOF_SHARED_DATA - SIZEOF_CHUNK_HEADER;
|
||||
shared_->chunkHead->size = 0;
|
||||
shared_->chunkHead->next = 0;
|
||||
shared_->ownBaseAllocator = 0;
|
||||
shared_->ownBuffer = false;
|
||||
shared_->refcount = 1;
|
||||
}
|
||||
|
||||
MemoryPoolAllocator(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT :
|
||||
chunk_capacity_(rhs.chunk_capacity_),
|
||||
baseAllocator_(rhs.baseAllocator_),
|
||||
shared_(rhs.shared_)
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
++shared_->refcount;
|
||||
}
|
||||
MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
|
||||
++rhs.shared_->refcount;
|
||||
this->~MemoryPoolAllocator();
|
||||
baseAllocator_ = rhs.baseAllocator_;
|
||||
chunk_capacity_ = rhs.chunk_capacity_;
|
||||
shared_ = rhs.shared_;
|
||||
return *this;
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
|
||||
MemoryPoolAllocator(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT :
|
||||
chunk_capacity_(rhs.chunk_capacity_),
|
||||
baseAllocator_(rhs.baseAllocator_),
|
||||
shared_(rhs.shared_)
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
|
||||
rhs.shared_ = 0;
|
||||
}
|
||||
MemoryPoolAllocator& operator=(MemoryPoolAllocator&& rhs) RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
|
||||
this->~MemoryPoolAllocator();
|
||||
baseAllocator_ = rhs.baseAllocator_;
|
||||
chunk_capacity_ = rhs.chunk_capacity_;
|
||||
shared_ = rhs.shared_;
|
||||
rhs.shared_ = 0;
|
||||
return *this;
|
||||
}
|
||||
#endif
|
||||
|
||||
//! Destructor.
|
||||
/*! This deallocates all memory chunks, excluding the user-supplied buffer.
|
||||
*/
|
||||
~MemoryPoolAllocator() {
|
||||
~MemoryPoolAllocator() RAPIDJSON_NOEXCEPT {
|
||||
if (!shared_) {
|
||||
// do nothing if moved
|
||||
return;
|
||||
}
|
||||
if (shared_->refcount > 1) {
|
||||
--shared_->refcount;
|
||||
return;
|
||||
}
|
||||
Clear();
|
||||
RAPIDJSON_DELETE(ownBaseAllocator_);
|
||||
BaseAllocator *a = shared_->ownBaseAllocator;
|
||||
if (shared_->ownBuffer) {
|
||||
baseAllocator_->Free(shared_);
|
||||
}
|
||||
RAPIDJSON_DELETE(a);
|
||||
}
|
||||
|
||||
//! Deallocates all memory chunks, excluding the user-supplied buffer.
|
||||
void Clear() {
|
||||
while (chunkHead_ && chunkHead_ != userBuffer_) {
|
||||
ChunkHeader* next = chunkHead_->next;
|
||||
baseAllocator_->Free(chunkHead_);
|
||||
chunkHead_ = next;
|
||||
//! Deallocates all memory chunks, excluding the first/user one.
|
||||
void Clear() RAPIDJSON_NOEXCEPT {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
for (;;) {
|
||||
ChunkHeader* c = shared_->chunkHead;
|
||||
if (!c->next) {
|
||||
break;
|
||||
}
|
||||
shared_->chunkHead = c->next;
|
||||
baseAllocator_->Free(c);
|
||||
}
|
||||
if (chunkHead_ && chunkHead_ == userBuffer_)
|
||||
chunkHead_->size = 0; // Clear user buffer
|
||||
shared_->chunkHead->size = 0;
|
||||
}
|
||||
|
||||
//! Computes the total capacity of allocated memory chunks.
|
||||
/*! \return total capacity in bytes.
|
||||
*/
|
||||
size_t Capacity() const {
|
||||
size_t Capacity() const RAPIDJSON_NOEXCEPT {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
size_t capacity = 0;
|
||||
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
|
||||
for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
|
||||
capacity += c->capacity;
|
||||
return capacity;
|
||||
}
|
||||
@@ -178,25 +301,35 @@ public:
|
||||
//! Computes the memory blocks allocated.
|
||||
/*! \return total used bytes.
|
||||
*/
|
||||
size_t Size() const {
|
||||
size_t Size() const RAPIDJSON_NOEXCEPT {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
size_t size = 0;
|
||||
for (ChunkHeader* c = chunkHead_; c != 0; c = c->next)
|
||||
for (ChunkHeader* c = shared_->chunkHead; c != 0; c = c->next)
|
||||
size += c->size;
|
||||
return size;
|
||||
}
|
||||
|
||||
//! Whether the allocator is shared.
|
||||
/*! \return true or false.
|
||||
*/
|
||||
bool Shared() const RAPIDJSON_NOEXCEPT {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
return shared_->refcount > 1;
|
||||
}
|
||||
|
||||
//! Allocates a memory block. (concept Allocator)
|
||||
void* Malloc(size_t size) {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
if (!size)
|
||||
return NULL;
|
||||
|
||||
size = RAPIDJSON_ALIGN(size);
|
||||
if (chunkHead_ == 0 || chunkHead_->size + size > chunkHead_->capacity)
|
||||
if (RAPIDJSON_UNLIKELY(shared_->chunkHead->size + size > shared_->chunkHead->capacity))
|
||||
if (!AddChunk(chunk_capacity_ > size ? chunk_capacity_ : size))
|
||||
return NULL;
|
||||
|
||||
void *buffer = reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size;
|
||||
chunkHead_->size += size;
|
||||
void *buffer = GetChunkBuffer(shared_) + shared_->chunkHead->size;
|
||||
shared_->chunkHead->size += size;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
@@ -205,6 +338,7 @@ public:
|
||||
if (originalPtr == 0)
|
||||
return Malloc(newSize);
|
||||
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
if (newSize == 0)
|
||||
return NULL;
|
||||
|
||||
@@ -216,10 +350,10 @@ public:
|
||||
return originalPtr;
|
||||
|
||||
// Simply expand it if it is the last allocation and there is sufficient space
|
||||
if (originalPtr == reinterpret_cast<char *>(chunkHead_) + RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + chunkHead_->size - originalSize) {
|
||||
if (originalPtr == GetChunkBuffer(shared_) + shared_->chunkHead->size - originalSize) {
|
||||
size_t increment = static_cast<size_t>(newSize - originalSize);
|
||||
if (chunkHead_->size + increment <= chunkHead_->capacity) {
|
||||
chunkHead_->size += increment;
|
||||
if (shared_->chunkHead->size + increment <= shared_->chunkHead->capacity) {
|
||||
shared_->chunkHead->size += increment;
|
||||
return originalPtr;
|
||||
}
|
||||
}
|
||||
@@ -235,50 +369,325 @@ public:
|
||||
}
|
||||
|
||||
//! Frees a memory block (concept Allocator)
|
||||
static void Free(void *ptr) { (void)ptr; } // Do nothing
|
||||
static void Free(void *ptr) RAPIDJSON_NOEXCEPT { (void)ptr; } // Do nothing
|
||||
|
||||
//! Compare (equality) with another MemoryPoolAllocator
|
||||
bool operator==(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(shared_->refcount > 0);
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(rhs.shared_->refcount > 0);
|
||||
return shared_ == rhs.shared_;
|
||||
}
|
||||
//! Compare (inequality) with another MemoryPoolAllocator
|
||||
bool operator!=(const MemoryPoolAllocator& rhs) const RAPIDJSON_NOEXCEPT {
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
private:
|
||||
//! Copy constructor is not permitted.
|
||||
MemoryPoolAllocator(const MemoryPoolAllocator& rhs) /* = delete */;
|
||||
//! Copy assignment operator is not permitted.
|
||||
MemoryPoolAllocator& operator=(const MemoryPoolAllocator& rhs) /* = delete */;
|
||||
|
||||
//! Creates a new chunk.
|
||||
/*! \param capacity Capacity of the chunk in bytes.
|
||||
\return true if success.
|
||||
*/
|
||||
bool AddChunk(size_t capacity) {
|
||||
if (!baseAllocator_)
|
||||
ownBaseAllocator_ = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
|
||||
if (ChunkHeader* chunk = reinterpret_cast<ChunkHeader*>(baseAllocator_->Malloc(RAPIDJSON_ALIGN(sizeof(ChunkHeader)) + capacity))) {
|
||||
shared_->ownBaseAllocator = baseAllocator_ = RAPIDJSON_NEW(BaseAllocator)();
|
||||
if (ChunkHeader* chunk = static_cast<ChunkHeader*>(baseAllocator_->Malloc(SIZEOF_CHUNK_HEADER + capacity))) {
|
||||
chunk->capacity = capacity;
|
||||
chunk->size = 0;
|
||||
chunk->next = chunkHead_;
|
||||
chunkHead_ = chunk;
|
||||
chunk->next = shared_->chunkHead;
|
||||
shared_->chunkHead = chunk;
|
||||
return true;
|
||||
}
|
||||
else
|
||||
return false;
|
||||
}
|
||||
|
||||
static const int kDefaultChunkCapacity = RAPIDJSON_ALLOCATOR_DEFAULT_CHUNK_CAPACITY; //!< Default chunk capacity.
|
||||
static inline void* AlignBuffer(void* buf, size_t &size)
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(buf != 0);
|
||||
const uintptr_t mask = sizeof(void*) - 1;
|
||||
const uintptr_t ubuf = reinterpret_cast<uintptr_t>(buf);
|
||||
if (RAPIDJSON_UNLIKELY(ubuf & mask)) {
|
||||
const uintptr_t abuf = (ubuf + mask) & ~mask;
|
||||
RAPIDJSON_ASSERT(size >= abuf - ubuf);
|
||||
buf = reinterpret_cast<void*>(abuf);
|
||||
size -= abuf - ubuf;
|
||||
}
|
||||
return buf;
|
||||
}
|
||||
|
||||
//! Chunk header for perpending to each chunk.
|
||||
/*! Chunks are stored as a singly linked list.
|
||||
*/
|
||||
struct ChunkHeader {
|
||||
size_t capacity; //!< Capacity of the chunk in bytes (excluding the header itself).
|
||||
size_t size; //!< Current size of allocated memory in bytes.
|
||||
ChunkHeader *next; //!< Next chunk in the linked list.
|
||||
size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated.
|
||||
BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
|
||||
SharedData *shared_; //!< The shared data of the allocator
|
||||
};
|
||||
|
||||
namespace internal {
|
||||
template<typename, typename = void>
|
||||
struct IsRefCounted :
|
||||
public FalseType
|
||||
{ };
|
||||
template<typename T>
|
||||
struct IsRefCounted<T, typename internal::EnableIfCond<T::kRefCounted>::Type> :
|
||||
public TrueType
|
||||
{ };
|
||||
}
|
||||
|
||||
template<typename T, typename A>
|
||||
inline T* Realloc(A& a, T* old_p, size_t old_n, size_t new_n)
|
||||
{
|
||||
RAPIDJSON_NOEXCEPT_ASSERT(old_n <= (std::numeric_limits<size_t>::max)() / sizeof(T) && new_n <= (std::numeric_limits<size_t>::max)() / sizeof(T));
|
||||
return static_cast<T*>(a.Realloc(old_p, old_n * sizeof(T), new_n * sizeof(T)));
|
||||
}
|
||||
|
||||
template<typename T, typename A>
|
||||
inline T *Malloc(A& a, size_t n = 1)
|
||||
{
|
||||
return Realloc<T, A>(a, NULL, 0, n);
|
||||
}
|
||||
|
||||
template<typename T, typename A>
|
||||
inline void Free(A& a, T *p, size_t n = 1)
|
||||
{
|
||||
static_cast<void>(Realloc<T, A>(a, p, n, 0));
|
||||
}
|
||||
|
||||
#ifdef __GNUC__
|
||||
RAPIDJSON_DIAG_PUSH
|
||||
RAPIDJSON_DIAG_OFF(effc++) // std::allocator can safely be inherited
|
||||
#endif
|
||||
|
||||
template <typename T, typename BaseAllocator = CrtAllocator>
|
||||
class StdAllocator :
|
||||
public std::allocator<T>
|
||||
{
|
||||
typedef std::allocator<T> allocator_type;
|
||||
#if RAPIDJSON_HAS_CXX11
|
||||
typedef std::allocator_traits<allocator_type> traits_type;
|
||||
#else
|
||||
typedef allocator_type traits_type;
|
||||
#endif
|
||||
|
||||
public:
|
||||
typedef BaseAllocator BaseAllocatorType;
|
||||
|
||||
StdAllocator() RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(),
|
||||
baseAllocator_()
|
||||
{ }
|
||||
|
||||
StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(rhs),
|
||||
baseAllocator_(rhs.baseAllocator_)
|
||||
{ }
|
||||
|
||||
template<typename U>
|
||||
StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(rhs),
|
||||
baseAllocator_(rhs.baseAllocator_)
|
||||
{ }
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11_RVALUE_REFS
|
||||
StdAllocator(StdAllocator&& rhs) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(std::move(rhs)),
|
||||
baseAllocator_(std::move(rhs.baseAllocator_))
|
||||
{ }
|
||||
#endif
|
||||
#if RAPIDJSON_HAS_CXX11
|
||||
using propagate_on_container_move_assignment = std::true_type;
|
||||
using propagate_on_container_swap = std::true_type;
|
||||
#endif
|
||||
|
||||
/* implicit */
|
||||
StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(),
|
||||
baseAllocator_(baseAllocator)
|
||||
{ }
|
||||
|
||||
~StdAllocator() RAPIDJSON_NOEXCEPT
|
||||
{ }
|
||||
|
||||
template<typename U>
|
||||
struct rebind {
|
||||
typedef StdAllocator<U, BaseAllocator> other;
|
||||
};
|
||||
|
||||
ChunkHeader *chunkHead_; //!< Head of the chunk linked-list. Only the head chunk serves allocation.
|
||||
size_t chunk_capacity_; //!< The minimum capacity of chunk when they are allocated.
|
||||
void *userBuffer_; //!< User supplied buffer.
|
||||
BaseAllocator* baseAllocator_; //!< base allocator for allocating memory chunks.
|
||||
BaseAllocator* ownBaseAllocator_; //!< base allocator created by this object.
|
||||
typedef typename traits_type::size_type size_type;
|
||||
typedef typename traits_type::difference_type difference_type;
|
||||
|
||||
typedef typename traits_type::value_type value_type;
|
||||
typedef typename traits_type::pointer pointer;
|
||||
typedef typename traits_type::const_pointer const_pointer;
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11
|
||||
|
||||
typedef typename std::add_lvalue_reference<value_type>::type &reference;
|
||||
typedef typename std::add_lvalue_reference<typename std::add_const<value_type>::type>::type &const_reference;
|
||||
|
||||
pointer address(reference r) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return std::addressof(r);
|
||||
}
|
||||
const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return std::addressof(r);
|
||||
}
|
||||
|
||||
size_type max_size() const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return traits_type::max_size(*this);
|
||||
}
|
||||
|
||||
template <typename ...Args>
|
||||
void construct(pointer p, Args&&... args)
|
||||
{
|
||||
traits_type::construct(*this, p, std::forward<Args>(args)...);
|
||||
}
|
||||
void destroy(pointer p)
|
||||
{
|
||||
traits_type::destroy(*this, p);
|
||||
}
|
||||
|
||||
#else // !RAPIDJSON_HAS_CXX11
|
||||
|
||||
typedef typename allocator_type::reference reference;
|
||||
typedef typename allocator_type::const_reference const_reference;
|
||||
|
||||
pointer address(reference r) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return allocator_type::address(r);
|
||||
}
|
||||
const_pointer address(const_reference r) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return allocator_type::address(r);
|
||||
}
|
||||
|
||||
size_type max_size() const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return allocator_type::max_size();
|
||||
}
|
||||
|
||||
void construct(pointer p, const_reference r)
|
||||
{
|
||||
allocator_type::construct(p, r);
|
||||
}
|
||||
void destroy(pointer p)
|
||||
{
|
||||
allocator_type::destroy(p);
|
||||
}
|
||||
|
||||
#endif // !RAPIDJSON_HAS_CXX11
|
||||
|
||||
template <typename U>
|
||||
U* allocate(size_type n = 1, const void* = 0)
|
||||
{
|
||||
return RAPIDJSON_NAMESPACE::Malloc<U>(baseAllocator_, n);
|
||||
}
|
||||
template <typename U>
|
||||
void deallocate(U* p, size_type n = 1)
|
||||
{
|
||||
RAPIDJSON_NAMESPACE::Free<U>(baseAllocator_, p, n);
|
||||
}
|
||||
|
||||
pointer allocate(size_type n = 1, const void* = 0)
|
||||
{
|
||||
return allocate<value_type>(n);
|
||||
}
|
||||
void deallocate(pointer p, size_type n = 1)
|
||||
{
|
||||
deallocate<value_type>(p, n);
|
||||
}
|
||||
|
||||
#if RAPIDJSON_HAS_CXX11
|
||||
using is_always_equal = std::is_empty<BaseAllocator>;
|
||||
#endif
|
||||
|
||||
template<typename U>
|
||||
bool operator==(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return baseAllocator_ == rhs.baseAllocator_;
|
||||
}
|
||||
template<typename U>
|
||||
bool operator!=(const StdAllocator<U, BaseAllocator>& rhs) const RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
return !operator==(rhs);
|
||||
}
|
||||
|
||||
//! rapidjson Allocator concept
|
||||
static const bool kNeedFree = BaseAllocator::kNeedFree;
|
||||
static const bool kRefCounted = internal::IsRefCounted<BaseAllocator>::Value;
|
||||
void* Malloc(size_t size)
|
||||
{
|
||||
return baseAllocator_.Malloc(size);
|
||||
}
|
||||
void* Realloc(void* originalPtr, size_t originalSize, size_t newSize)
|
||||
{
|
||||
return baseAllocator_.Realloc(originalPtr, originalSize, newSize);
|
||||
}
|
||||
static void Free(void *ptr) RAPIDJSON_NOEXCEPT
|
||||
{
|
||||
BaseAllocator::Free(ptr);
|
||||
}
|
||||
|
||||
private:
|
||||
template <typename, typename>
|
||||
friend class StdAllocator; // access to StdAllocator<!T>.*
|
||||
|
||||
BaseAllocator baseAllocator_;
|
||||
};
|
||||
|
||||
#if !RAPIDJSON_HAS_CXX17 // std::allocator<void> deprecated in C++17
|
||||
template <typename BaseAllocator>
|
||||
class StdAllocator<void, BaseAllocator> :
|
||||
public std::allocator<void>
|
||||
{
|
||||
typedef std::allocator<void> allocator_type;
|
||||
|
||||
public:
|
||||
typedef BaseAllocator BaseAllocatorType;
|
||||
|
||||
StdAllocator() RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(),
|
||||
baseAllocator_()
|
||||
{ }
|
||||
|
||||
StdAllocator(const StdAllocator& rhs) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(rhs),
|
||||
baseAllocator_(rhs.baseAllocator_)
|
||||
{ }
|
||||
|
||||
template<typename U>
|
||||
StdAllocator(const StdAllocator<U, BaseAllocator>& rhs) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(rhs),
|
||||
baseAllocator_(rhs.baseAllocator_)
|
||||
{ }
|
||||
|
||||
/* implicit */
|
||||
StdAllocator(const BaseAllocator& baseAllocator) RAPIDJSON_NOEXCEPT :
|
||||
allocator_type(),
|
||||
baseAllocator_(baseAllocator)
|
||||
{ }
|
||||
|
||||
~StdAllocator() RAPIDJSON_NOEXCEPT
|
||||
{ }
|
||||
|
||||
template<typename U>
|
||||
struct rebind {
|
||||
typedef StdAllocator<U, BaseAllocator> other;
|
||||
};
|
||||
|
||||
typedef typename allocator_type::value_type value_type;
|
||||
|
||||
private:
|
||||
template <typename, typename>
|
||||
friend class StdAllocator; // access to StdAllocator<!T>.*
|
||||
|
||||
BaseAllocator baseAllocator_;
|
||||
};
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
RAPIDJSON_DIAG_POP
|
||||
#endif
|
||||
|
||||
RAPIDJSON_NAMESPACE_END
|
||||
|
||||
#endif // RAPIDJSON_ENCODINGS_H_
|
||||
|
||||
2
src/3rdparty/rapidjson/cursorstreamwrapper.h
vendored
2
src/3rdparty/rapidjson/cursorstreamwrapper.h
vendored
@@ -1,6 +1,6 @@
|
||||
// Tencent is pleased to support the open source community by making RapidJSON available.
|
||||
//
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip. All rights reserved.
|
||||
// Copyright (C) 2015 THL A29 Limited, a Tencent company, and Milo Yip.
|
||||
//
|
||||
// Licensed under the MIT License (the "License"); you may not use this file except
|
||||
// in compliance with the License. You may obtain a copy of the License at
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user