1
0
mirror of https://github.com/xmrig/xmrig.git synced 2025-12-09 00:33:33 -05:00
Files
xmrig/src/3rdparty/hwloc/src/cpukinds.c
2021-03-04 03:23:26 +07:00

650 lines
20 KiB
C

/*
* Copyright © 2020-2021 Inria. All rights reserved.
* See COPYING in top-level directory.
*/
#include "private/autogen/config.h"
#include "hwloc.h"
#include "private/private.h"
#include "private/debug.h"
/*****************
* Basics
*/
void
hwloc_internal_cpukinds_init(struct hwloc_topology *topology)
{
topology->cpukinds = NULL;
topology->nr_cpukinds = 0;
topology->nr_cpukinds_allocated = 0;
}
void
hwloc_internal_cpukinds_destroy(struct hwloc_topology *topology)
{
unsigned i;
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
hwloc_bitmap_free(kind->cpuset);
hwloc__free_infos(kind->infos, kind->nr_infos);
}
free(topology->cpukinds);
topology->cpukinds = NULL;
topology->nr_cpukinds = 0;
}
int
hwloc_internal_cpukinds_dup(hwloc_topology_t new, hwloc_topology_t old)
{
struct hwloc_tma *tma = new->tma;
struct hwloc_internal_cpukind_s *kinds;
unsigned i;
kinds = hwloc_tma_malloc(tma, old->nr_cpukinds * sizeof(*kinds));
if (!kinds)
return -1;
new->cpukinds = kinds;
new->nr_cpukinds = old->nr_cpukinds;
memcpy(kinds, old->cpukinds, old->nr_cpukinds * sizeof(*kinds));
for(i=0;i<old->nr_cpukinds; i++) {
kinds[i].cpuset = hwloc_bitmap_tma_dup(tma, old->cpukinds[i].cpuset);
if (!kinds[i].cpuset) {
new->nr_cpukinds = i;
goto failed;
}
if (hwloc__tma_dup_infos(tma,
&kinds[i].infos, &kinds[i].nr_infos,
old->cpukinds[i].infos, old->cpukinds[i].nr_infos) < 0) {
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
hwloc_bitmap_free(kinds[i].cpuset);
new->nr_cpukinds = i;
goto failed;
}
}
return 0;
failed:
hwloc_internal_cpukinds_destroy(new);
return -1;
}
void
hwloc_internal_cpukinds_restrict(hwloc_topology_t topology)
{
unsigned i;
int removed = 0;
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
hwloc_bitmap_and(kind->cpuset, kind->cpuset, hwloc_get_root_obj(topology)->cpuset);
if (hwloc_bitmap_iszero(kind->cpuset)) {
hwloc_bitmap_free(kind->cpuset);
hwloc__free_infos(kind->infos, kind->nr_infos);
memmove(kind, kind+1, (topology->nr_cpukinds - i - 1)*sizeof(*kind));
i--;
topology->nr_cpukinds--;
removed = 1;
}
}
if (removed)
hwloc_internal_cpukinds_rank(topology);
}
/********************
* Registering
*/
static __hwloc_inline int
hwloc__cpukind_check_duplicate_info(struct hwloc_internal_cpukind_s *kind,
const char *name, const char *value)
{
unsigned i;
for(i=0; i<kind->nr_infos; i++)
if (!strcmp(kind->infos[i].name, name)
&& !strcmp(kind->infos[i].value, value))
return 1;
return 0;
}
static __hwloc_inline void
hwloc__cpukind_add_infos(struct hwloc_internal_cpukind_s *kind,
const struct hwloc_info_s *infos, unsigned nr_infos)
{
unsigned i;
for(i=0; i<nr_infos; i++) {
if (hwloc__cpukind_check_duplicate_info(kind, infos[i].name, infos[i].value))
continue;
hwloc__add_info(&kind->infos, &kind->nr_infos, infos[i].name, infos[i].value);
}
}
int
hwloc_internal_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t cpuset,
int forced_efficiency,
const struct hwloc_info_s *infos, unsigned nr_infos,
unsigned long flags)
{
struct hwloc_internal_cpukind_s *kinds;
unsigned i, max, bits, oldnr, newnr;
if (hwloc_bitmap_iszero(cpuset)) {
hwloc_bitmap_free(cpuset);
errno = EINVAL;
return -1;
}
if (flags & ~HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY) {
errno = EINVAL;
return -1;
}
/* TODO: for now, only windows provides a forced efficiency.
* if another backend ever provides a conflicting value, the first backend value will be kept.
* (user-provided values are not an issue, they are meant to overwrite)
*/
/* If we have N kinds currently, we may need 2N+1 kinds after inserting the new one:
* - each existing kind may get split into which PUs are in the new kind and which aren't.
* - some PUs might not have been in any kind yet.
*/
max = 2 * topology->nr_cpukinds + 1;
/* Allocate the power-of-two above 2N+1. */
bits = hwloc_flsl(max-1) + 1;
max = 1U<<bits;
/* Allocate 8 minimum to avoid multiple reallocs */
if (max < 8)
max = 8;
/* Create or enlarge the array of kinds if needed */
kinds = topology->cpukinds;
if (max > topology->nr_cpukinds_allocated) {
kinds = realloc(kinds, max * sizeof(*kinds));
if (!kinds) {
hwloc_bitmap_free(cpuset);
return -1;
}
memset(&kinds[topology->nr_cpukinds_allocated], 0, (max - topology->nr_cpukinds_allocated) * sizeof(*kinds));
topology->nr_cpukinds_allocated = max;
topology->cpukinds = kinds;
}
newnr = oldnr = topology->nr_cpukinds;
for(i=0; i<oldnr; i++) {
int res = hwloc_bitmap_compare_inclusion(cpuset, kinds[i].cpuset);
if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_INCLUDED) {
/* new kind with intersection of cpusets and union of infos */
kinds[newnr].cpuset = hwloc_bitmap_alloc();
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
kinds[newnr].forced_efficiency = forced_efficiency;
hwloc_bitmap_and(kinds[newnr].cpuset, cpuset, kinds[i].cpuset);
hwloc__cpukind_add_infos(&kinds[newnr], kinds[i].infos, kinds[i].nr_infos);
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos);
/* remove cpuset PUs from the existing kind that we just split */
hwloc_bitmap_andnot(kinds[i].cpuset, kinds[i].cpuset, kinds[newnr].cpuset);
/* clear cpuset PUs that were taken care of */
hwloc_bitmap_andnot(cpuset, cpuset, kinds[newnr].cpuset);
newnr++;
} else if (res == HWLOC_BITMAP_CONTAINS
|| res == HWLOC_BITMAP_EQUAL) {
/* append new info to existing smaller (or equal) kind */
hwloc__cpukind_add_infos(&kinds[i], infos, nr_infos);
if ((flags & HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY)
|| kinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN)
kinds[i].forced_efficiency = forced_efficiency;
/* clear cpuset PUs that were taken care of */
hwloc_bitmap_andnot(cpuset, cpuset, kinds[i].cpuset);
} else {
assert(res == HWLOC_BITMAP_DIFFERENT);
/* nothing to do */
}
/* don't compare with anything else if already empty */
if (hwloc_bitmap_iszero(cpuset))
break;
}
/* add a final kind with remaining PUs if any */
if (!hwloc_bitmap_iszero(cpuset)) {
kinds[newnr].cpuset = cpuset;
kinds[newnr].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
kinds[newnr].forced_efficiency = forced_efficiency;
hwloc__cpukind_add_infos(&kinds[newnr], infos, nr_infos);
newnr++;
} else {
hwloc_bitmap_free(cpuset);
}
topology->nr_cpukinds = newnr;
return 0;
}
int
hwloc_cpukinds_register(hwloc_topology_t topology, hwloc_cpuset_t _cpuset,
int forced_efficiency,
unsigned nr_infos, struct hwloc_info_s *infos,
unsigned long flags)
{
hwloc_bitmap_t cpuset;
int err;
if (flags) {
errno = EINVAL;
return -1;
}
if (!_cpuset || hwloc_bitmap_iszero(_cpuset)) {
errno = EINVAL;
return -1;
}
cpuset = hwloc_bitmap_dup(_cpuset);
if (!cpuset)
return -1;
if (forced_efficiency < 0)
forced_efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
err = hwloc_internal_cpukinds_register(topology, cpuset, forced_efficiency, infos, nr_infos, HWLOC_CPUKINDS_REGISTER_FLAG_OVERWRITE_FORCED_EFFICIENCY);
if (err < 0)
return err;
hwloc_internal_cpukinds_rank(topology);
return 0;
}
/*********************
* Ranking
*/
static int
hwloc__cpukinds_check_duplicate_rankings(struct hwloc_topology *topology)
{
unsigned i,j;
for(i=0; i<topology->nr_cpukinds; i++)
for(j=i+1; j<topology->nr_cpukinds; j++)
if (topology->cpukinds[i].ranking_value == topology->cpukinds[j].ranking_value)
/* if any duplicate, fail */
return -1;
return 0;
}
static int
hwloc__cpukinds_try_rank_by_forced_efficiency(struct hwloc_topology *topology)
{
unsigned i;
hwloc_debug("Trying to rank cpukinds by forced efficiency...\n");
for(i=0; i<topology->nr_cpukinds; i++) {
if (topology->cpukinds[i].forced_efficiency == HWLOC_CPUKIND_EFFICIENCY_UNKNOWN)
/* if any unknown, fail */
return -1;
topology->cpukinds[i].ranking_value = topology->cpukinds[i].forced_efficiency;
}
return hwloc__cpukinds_check_duplicate_rankings(topology);
}
struct hwloc_cpukinds_info_summary {
int have_max_freq;
int have_base_freq;
int have_intel_core_type;
struct hwloc_cpukind_info_summary {
unsigned intel_core_type; /* 1 for atom, 2 for core */
unsigned max_freq, base_freq; /* MHz, hence < 100000 */
} * summaries;
};
static void
hwloc__cpukinds_summarize_info(struct hwloc_topology *topology,
struct hwloc_cpukinds_info_summary *summary)
{
unsigned i, j;
summary->have_max_freq = 1;
summary->have_base_freq = 1;
summary->have_intel_core_type = 1;
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
for(j=0; j<kind->nr_infos; j++) {
struct hwloc_info_s *info = &kind->infos[j];
if (!strcmp(info->name, "FrequencyMaxMHz")) {
summary->summaries[i].max_freq = atoi(info->value);
} else if (!strcmp(info->name, "FrequencyBaseMHz")) {
summary->summaries[i].base_freq = atoi(info->value);
} else if (!strcmp(info->name, "CoreType")) {
if (!strcmp(info->value, "IntelAtom"))
summary->summaries[i].intel_core_type = 1;
else if (!strcmp(info->value, "IntelCore"))
summary->summaries[i].intel_core_type = 2;
}
}
hwloc_debug("cpukind #%u has intel_core_type %u max_freq %u base_freq %u\n",
i, summary->summaries[i].intel_core_type,
summary->summaries[i].max_freq, summary->summaries[i].base_freq);
if (!summary->summaries[i].base_freq)
summary->have_base_freq = 0;
if (!summary->summaries[i].max_freq)
summary->have_max_freq = 0;
if (!summary->summaries[i].intel_core_type)
summary->have_intel_core_type = 0;
}
}
enum hwloc_cpukinds_ranking {
HWLOC_CPUKINDS_RANKING_DEFAULT, /* forced + frequency on ARM, forced + coretype_frequency otherwise */
HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY, /* default without forced */
HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY,
HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY,
HWLOC_CPUKINDS_RANKING_CORETYPE,
HWLOC_CPUKINDS_RANKING_FREQUENCY,
HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX,
HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE,
HWLOC_CPUKINDS_RANKING_NONE
};
static int
hwloc__cpukinds_try_rank_by_info(struct hwloc_topology *topology,
enum hwloc_cpukinds_ranking heuristics,
struct hwloc_cpukinds_info_summary *summary)
{
unsigned i;
if (HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY == heuristics) {
hwloc_debug("Trying to rank cpukinds by coretype+frequency...\n");
/* we need intel_core_type + (base or max freq) for all kinds */
if (!summary->have_intel_core_type
|| (!summary->have_max_freq && !summary->have_base_freq))
return -1;
/* rank first by coretype (Core>>Atom) then by frequency, base if available, max otherwise */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
if (summary->have_base_freq)
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].base_freq;
else
kind->ranking_value = (summary->summaries[i].intel_core_type << 20) + summary->summaries[i].max_freq;
}
} else if (HWLOC_CPUKINDS_RANKING_CORETYPE == heuristics) {
hwloc_debug("Trying to rank cpukinds by coretype...\n");
/* we need intel_core_type */
if (!summary->have_intel_core_type)
return -1;
/* rank by coretype (Core>>Atom) */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
kind->ranking_value = (summary->summaries[i].intel_core_type << 20);
}
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY == heuristics) {
hwloc_debug("Trying to rank cpukinds by frequency...\n");
/* we need base or max freq for all kinds */
if (!summary->have_max_freq && !summary->have_base_freq)
return -1;
/* rank first by frequency, base if available, max otherwise */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
if (summary->have_base_freq)
kind->ranking_value = summary->summaries[i].base_freq;
else
kind->ranking_value = summary->summaries[i].max_freq;
}
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX == heuristics) {
hwloc_debug("Trying to rank cpukinds by frequency max...\n");
/* we need max freq for all kinds */
if (!summary->have_max_freq)
return -1;
/* rank first by frequency, base if available, max otherwise */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
kind->ranking_value = summary->summaries[i].max_freq;
}
} else if (HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE == heuristics) {
hwloc_debug("Trying to rank cpukinds by frequency base...\n");
/* we need max freq for all kinds */
if (!summary->have_base_freq)
return -1;
/* rank first by frequency, base if available, max otherwise */
for(i=0; i<topology->nr_cpukinds; i++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[i];
kind->ranking_value = summary->summaries[i].base_freq;
}
} else assert(0);
return hwloc__cpukinds_check_duplicate_rankings(topology);
}
static int hwloc__cpukinds_compare_ranking_values(const void *_a, const void *_b)
{
const struct hwloc_internal_cpukind_s *a = _a;
const struct hwloc_internal_cpukind_s *b = _b;
return a->ranking_value - b->ranking_value;
}
/* this function requires ranking values to be unique */
static void
hwloc__cpukinds_finalize_ranking(struct hwloc_topology *topology)
{
unsigned i;
/* sort */
qsort(topology->cpukinds, topology->nr_cpukinds, sizeof(*topology->cpukinds), hwloc__cpukinds_compare_ranking_values);
/* define our own efficiency between 0 and N-1 */
for(i=0; i<topology->nr_cpukinds; i++)
topology->cpukinds[i].efficiency = i;
}
int
hwloc_internal_cpukinds_rank(struct hwloc_topology *topology)
{
enum hwloc_cpukinds_ranking heuristics;
char *env;
unsigned i;
int err;
if (!topology->nr_cpukinds)
return 0;
if (topology->nr_cpukinds == 1) {
topology->cpukinds[0].efficiency = 0;
return 0;
}
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT;
env = getenv("HWLOC_CPUKINDS_RANKING");
if (env) {
if (!strcmp(env, "default"))
heuristics = HWLOC_CPUKINDS_RANKING_DEFAULT;
else if (!strcmp(env, "none"))
heuristics = HWLOC_CPUKINDS_RANKING_NONE;
else if (!strcmp(env, "coretype+frequency"))
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
else if (!strcmp(env, "coretype"))
heuristics = HWLOC_CPUKINDS_RANKING_CORETYPE;
else if (!strcmp(env, "frequency"))
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
else if (!strcmp(env, "frequency_max"))
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_MAX;
else if (!strcmp(env, "frequency_base"))
heuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY_BASE;
else if (!strcmp(env, "forced_efficiency"))
heuristics = HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY;
else if (!strcmp(env, "no_forced_efficiency"))
heuristics = HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY;
else if (!hwloc_hide_errors())
fprintf(stderr, "Failed to recognize HWLOC_CPUKINDS_RANKING value %s\n", env);
}
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT
|| heuristics == HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
/* default is forced_efficiency first */
struct hwloc_cpukinds_info_summary summary;
enum hwloc_cpukinds_ranking subheuristics;
const char *arch;
if (heuristics == HWLOC_CPUKINDS_RANKING_DEFAULT)
hwloc_debug("Using default ranking strategy...\n");
else
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
if (heuristics != HWLOC_CPUKINDS_RANKING_NO_FORCED_EFFICIENCY) {
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology);
if (!err)
goto ready;
}
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries));
if (!summary.summaries)
goto failed;
hwloc__cpukinds_summarize_info(topology, &summary);
arch = hwloc_obj_get_info_by_name(topology->levels[0][0], "Architecture");
/* TODO: rather coretype_frequency only on x86/Intel? */
if (arch && (!strncmp(arch, "arm", 3) || !strncmp(arch, "aarch", 5)))
/* then frequency on ARM */
subheuristics = HWLOC_CPUKINDS_RANKING_FREQUENCY;
else
/* or coretype+frequency otherwise */
subheuristics = HWLOC_CPUKINDS_RANKING_CORETYPE_FREQUENCY;
err = hwloc__cpukinds_try_rank_by_info(topology, subheuristics, &summary);
free(summary.summaries);
if (!err)
goto ready;
} else if (heuristics == HWLOC_CPUKINDS_RANKING_FORCED_EFFICIENCY) {
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
err = hwloc__cpukinds_try_rank_by_forced_efficiency(topology);
if (!err)
goto ready;
} else if (heuristics != HWLOC_CPUKINDS_RANKING_NONE) {
/* custom heuristics */
struct hwloc_cpukinds_info_summary summary;
hwloc_debug("Using custom ranking strategy from HWLOC_CPUKINDS_RANKING=%s\n", env);
summary.summaries = calloc(topology->nr_cpukinds, sizeof(*summary.summaries));
if (!summary.summaries)
goto failed;
hwloc__cpukinds_summarize_info(topology, &summary);
err = hwloc__cpukinds_try_rank_by_info(topology, heuristics, &summary);
free(summary.summaries);
if (!err)
goto ready;
}
failed:
/* failed to rank, clear efficiencies */
for(i=0; i<topology->nr_cpukinds; i++)
topology->cpukinds[i].efficiency = HWLOC_CPUKIND_EFFICIENCY_UNKNOWN;
hwloc_debug("Failed to rank cpukinds.\n\n");
return 0;
ready:
for(i=0; i<topology->nr_cpukinds; i++)
hwloc_debug("cpukind #%u got ranking value %llu\n", i, (unsigned long long) topology->cpukinds[i].ranking_value);
hwloc__cpukinds_finalize_ranking(topology);
#ifdef HWLOC_DEBUG
for(i=0; i<topology->nr_cpukinds; i++)
assert(topology->cpukinds[i].efficiency == (int) i);
#endif
hwloc_debug("\n");
return 0;
}
/*****************
* Consulting
*/
int
hwloc_cpukinds_get_nr(hwloc_topology_t topology, unsigned long flags)
{
if (flags) {
errno = EINVAL;
return -1;
}
return topology->nr_cpukinds;
}
int
hwloc_cpukinds_get_info(hwloc_topology_t topology,
unsigned id,
hwloc_bitmap_t cpuset,
int *efficiencyp,
unsigned *nr_infosp, struct hwloc_info_s **infosp,
unsigned long flags)
{
struct hwloc_internal_cpukind_s *kind;
if (flags) {
errno = EINVAL;
return -1;
}
if (id >= topology->nr_cpukinds) {
errno = ENOENT;
return -1;
}
kind = &topology->cpukinds[id];
if (cpuset)
hwloc_bitmap_copy(cpuset, kind->cpuset);
if (efficiencyp)
*efficiencyp = kind->efficiency;
if (nr_infosp && infosp) {
*nr_infosp = kind->nr_infos;
*infosp = kind->infos;
}
return 0;
}
int
hwloc_cpukinds_get_by_cpuset(hwloc_topology_t topology,
hwloc_const_bitmap_t cpuset,
unsigned long flags)
{
unsigned id;
if (flags) {
errno = EINVAL;
return -1;
}
if (!cpuset || hwloc_bitmap_iszero(cpuset)) {
errno = EINVAL;
return -1;
}
for(id=0; id<topology->nr_cpukinds; id++) {
struct hwloc_internal_cpukind_s *kind = &topology->cpukinds[id];
int res = hwloc_bitmap_compare_inclusion(cpuset, kind->cpuset);
if (res == HWLOC_BITMAP_EQUAL || res == HWLOC_BITMAP_INCLUDED) {
return (int) id;
} else if (res == HWLOC_BITMAP_INTERSECTS || res == HWLOC_BITMAP_CONTAINS) {
errno = EXDEV;
return -1;
}
}
errno = ENOENT;
return -1;
}