mirror of
https://github.com/xmrig/xmrig.git
synced 2026-06-22 20:12:38 -04:00
Use internal hwloc for MSVC.
This commit is contained in:
920
src/3rdparty/hwloc/src/distances.c
vendored
Normal file
920
src/3rdparty/hwloc/src/distances.c
vendored
Normal file
@@ -0,0 +1,920 @@
|
||||
/*
|
||||
* Copyright © 2010-2018 Inria. All rights reserved.
|
||||
* Copyright © 2011-2012 Université Bordeaux
|
||||
* Copyright © 2011 Cisco Systems, Inc. All rights reserved.
|
||||
* See COPYING in top-level directory.
|
||||
*/
|
||||
|
||||
#include <private/autogen/config.h>
|
||||
#include <hwloc.h>
|
||||
#include <private/private.h>
|
||||
#include <private/debug.h>
|
||||
#include <private/misc.h>
|
||||
|
||||
#include <float.h>
|
||||
#include <math.h>
|
||||
|
||||
/******************************************************
|
||||
* Global init, prepare, destroy, dup
|
||||
*/
|
||||
|
||||
/* called during topology init() */
|
||||
void hwloc_internal_distances_init(struct hwloc_topology *topology)
|
||||
{
|
||||
topology->first_dist = topology->last_dist = NULL;
|
||||
topology->next_dist_id = 0;
|
||||
}
|
||||
|
||||
/* called at the beginning of load() */
|
||||
void hwloc_internal_distances_prepare(struct hwloc_topology *topology)
|
||||
{
|
||||
char *env;
|
||||
hwloc_localeswitch_declare;
|
||||
|
||||
topology->grouping = 1;
|
||||
if (topology->type_filter[HWLOC_OBJ_GROUP] == HWLOC_TYPE_FILTER_KEEP_NONE)
|
||||
topology->grouping = 0;
|
||||
env = getenv("HWLOC_GROUPING");
|
||||
if (env && !atoi(env))
|
||||
topology->grouping = 0;
|
||||
|
||||
if (topology->grouping) {
|
||||
topology->grouping_next_subkind = 0;
|
||||
|
||||
HWLOC_BUILD_ASSERT(sizeof(topology->grouping_accuracies)/sizeof(*topology->grouping_accuracies) == 5);
|
||||
topology->grouping_accuracies[0] = 0.0f;
|
||||
topology->grouping_accuracies[1] = 0.01f;
|
||||
topology->grouping_accuracies[2] = 0.02f;
|
||||
topology->grouping_accuracies[3] = 0.05f;
|
||||
topology->grouping_accuracies[4] = 0.1f;
|
||||
topology->grouping_nbaccuracies = 5;
|
||||
|
||||
hwloc_localeswitch_init();
|
||||
env = getenv("HWLOC_GROUPING_ACCURACY");
|
||||
if (!env) {
|
||||
/* only use 0.0 */
|
||||
topology->grouping_nbaccuracies = 1;
|
||||
} else if (strcmp(env, "try")) {
|
||||
/* use the given value */
|
||||
topology->grouping_nbaccuracies = 1;
|
||||
topology->grouping_accuracies[0] = (float) atof(env);
|
||||
} /* otherwise try all values */
|
||||
hwloc_localeswitch_fini();
|
||||
|
||||
topology->grouping_verbose = 0;
|
||||
env = getenv("HWLOC_GROUPING_VERBOSE");
|
||||
if (env)
|
||||
topology->grouping_verbose = atoi(env);
|
||||
}
|
||||
}
|
||||
|
||||
static void hwloc_internal_distances_free(struct hwloc_internal_distances_s *dist)
|
||||
{
|
||||
free(dist->indexes);
|
||||
free(dist->objs);
|
||||
free(dist->values);
|
||||
free(dist);
|
||||
}
|
||||
|
||||
/* called during topology destroy */
|
||||
void hwloc_internal_distances_destroy(struct hwloc_topology * topology)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist, *next = topology->first_dist;
|
||||
while ((dist = next) != NULL) {
|
||||
next = dist->next;
|
||||
hwloc_internal_distances_free(dist);
|
||||
}
|
||||
topology->first_dist = topology->last_dist = NULL;
|
||||
}
|
||||
|
||||
static int hwloc_internal_distances_dup_one(struct hwloc_topology *new, struct hwloc_internal_distances_s *olddist)
|
||||
{
|
||||
struct hwloc_tma *tma = new->tma;
|
||||
struct hwloc_internal_distances_s *newdist;
|
||||
unsigned nbobjs = olddist->nbobjs;
|
||||
|
||||
newdist = hwloc_tma_malloc(tma, sizeof(*newdist));
|
||||
if (!newdist)
|
||||
return -1;
|
||||
|
||||
newdist->type = olddist->type;
|
||||
newdist->nbobjs = nbobjs;
|
||||
newdist->kind = olddist->kind;
|
||||
newdist->id = olddist->id;
|
||||
|
||||
newdist->indexes = hwloc_tma_malloc(tma, nbobjs * sizeof(*newdist->indexes));
|
||||
newdist->objs = hwloc_tma_calloc(tma, nbobjs * sizeof(*newdist->objs));
|
||||
newdist->objs_are_valid = 0;
|
||||
newdist->values = hwloc_tma_malloc(tma, nbobjs*nbobjs * sizeof(*newdist->values));
|
||||
if (!newdist->indexes || !newdist->objs || !newdist->values) {
|
||||
assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */
|
||||
hwloc_internal_distances_free(newdist);
|
||||
return -1;
|
||||
}
|
||||
|
||||
memcpy(newdist->indexes, olddist->indexes, nbobjs * sizeof(*newdist->indexes));
|
||||
memcpy(newdist->values, olddist->values, nbobjs*nbobjs * sizeof(*newdist->values));
|
||||
|
||||
newdist->next = NULL;
|
||||
newdist->prev = new->last_dist;
|
||||
if (new->last_dist)
|
||||
new->last_dist->next = newdist;
|
||||
else
|
||||
new->first_dist = newdist;
|
||||
new->last_dist = newdist;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function may be called with topology->tma set, it cannot free() or realloc() */
|
||||
int hwloc_internal_distances_dup(struct hwloc_topology *new, struct hwloc_topology *old)
|
||||
{
|
||||
struct hwloc_internal_distances_s *olddist;
|
||||
int err;
|
||||
new->next_dist_id = old->next_dist_id;
|
||||
for(olddist = old->first_dist; olddist; olddist = olddist->next) {
|
||||
err = hwloc_internal_distances_dup_one(new, olddist);
|
||||
if (err < 0)
|
||||
return err;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Remove distances from the topology
|
||||
*/
|
||||
|
||||
int hwloc_distances_remove(hwloc_topology_t topology)
|
||||
{
|
||||
if (!topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
hwloc_internal_distances_destroy(topology);
|
||||
return 0;
|
||||
}
|
||||
|
||||
int hwloc_distances_remove_by_depth(hwloc_topology_t topology, int depth)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist, *next;
|
||||
hwloc_obj_type_t type;
|
||||
|
||||
if (!topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* switch back to types since we don't support groups for now */
|
||||
type = hwloc_get_depth_type(topology, depth);
|
||||
if (type == (hwloc_obj_type_t)-1) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
next = topology->first_dist;
|
||||
while ((dist = next) != NULL) {
|
||||
next = dist->next;
|
||||
if (dist->type == type) {
|
||||
if (next)
|
||||
next->prev = dist->prev;
|
||||
else
|
||||
topology->last_dist = dist->prev;
|
||||
if (dist->prev)
|
||||
dist->prev->next = dist->next;
|
||||
else
|
||||
topology->first_dist = dist->next;
|
||||
hwloc_internal_distances_free(dist);
|
||||
}
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Add distances to the topology
|
||||
*/
|
||||
|
||||
static void
|
||||
hwloc__groups_by_distances(struct hwloc_topology *topology, unsigned nbobjs, struct hwloc_obj **objs, uint64_t *values, unsigned long kind, unsigned nbaccuracies, float *accuracies, int needcheck);
|
||||
|
||||
/* insert a distance matrix in the topology.
|
||||
* the caller gives us the distances and objs pointers, we'll free them later.
|
||||
*/
|
||||
static int
|
||||
hwloc_internal_distances__add(hwloc_topology_t topology,
|
||||
hwloc_obj_type_t type, unsigned nbobjs, hwloc_obj_t *objs, uint64_t *indexes, uint64_t *values,
|
||||
unsigned long kind)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist = calloc(1, sizeof(*dist));
|
||||
if (!dist)
|
||||
goto err;
|
||||
|
||||
dist->type = type;
|
||||
dist->nbobjs = nbobjs;
|
||||
dist->kind = kind;
|
||||
|
||||
if (!objs) {
|
||||
assert(indexes);
|
||||
/* we only have indexes, we'll refresh objs from there */
|
||||
dist->indexes = indexes;
|
||||
dist->objs = calloc(nbobjs, sizeof(hwloc_obj_t));
|
||||
if (!dist->objs)
|
||||
goto err_with_dist;
|
||||
dist->objs_are_valid = 0;
|
||||
|
||||
} else {
|
||||
unsigned i;
|
||||
assert(!indexes);
|
||||
/* we only have objs, generate the indexes arrays so that we can refresh objs later */
|
||||
dist->objs = objs;
|
||||
dist->objs_are_valid = 1;
|
||||
dist->indexes = malloc(nbobjs * sizeof(*dist->indexes));
|
||||
if (!dist->indexes)
|
||||
goto err_with_dist;
|
||||
if (dist->type == HWLOC_OBJ_PU || dist->type == HWLOC_OBJ_NUMANODE) {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->os_index;
|
||||
} else {
|
||||
for(i=0; i<nbobjs; i++)
|
||||
dist->indexes[i] = objs[i]->gp_index;
|
||||
}
|
||||
}
|
||||
|
||||
dist->values = values;
|
||||
|
||||
dist->id = topology->next_dist_id++;
|
||||
|
||||
if (topology->last_dist)
|
||||
topology->last_dist->next = dist;
|
||||
else
|
||||
topology->first_dist = dist;
|
||||
dist->prev = topology->last_dist;
|
||||
dist->next = NULL;
|
||||
topology->last_dist = dist;
|
||||
return 0;
|
||||
|
||||
err_with_dist:
|
||||
free(dist);
|
||||
err:
|
||||
free(objs);
|
||||
free(indexes);
|
||||
free(values);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hwloc_internal_distances_add_by_index(hwloc_topology_t topology,
|
||||
hwloc_obj_type_t type, unsigned nbobjs, uint64_t *indexes, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
if (nbobjs < 2) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
/* cannot group without objects,
|
||||
* and we don't group from XML anyway since the hwloc that generated the XML should have grouped already.
|
||||
*/
|
||||
if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
return hwloc_internal_distances__add(topology, type, nbobjs, NULL, indexes, values, kind);
|
||||
|
||||
err:
|
||||
free(indexes);
|
||||
free(values);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int hwloc_internal_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
if (nbobjs < 2) {
|
||||
errno = EINVAL;
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (topology->grouping && (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP)) {
|
||||
float full_accuracy = 0.f;
|
||||
float *accuracies;
|
||||
unsigned nbaccuracies;
|
||||
|
||||
if (flags & HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE) {
|
||||
accuracies = topology->grouping_accuracies;
|
||||
nbaccuracies = topology->grouping_nbaccuracies;
|
||||
} else {
|
||||
accuracies = &full_accuracy;
|
||||
nbaccuracies = 1;
|
||||
}
|
||||
|
||||
if (topology->grouping_verbose) {
|
||||
unsigned i, j;
|
||||
int gp = (objs[0]->type != HWLOC_OBJ_NUMANODE && objs[0]->type != HWLOC_OBJ_PU);
|
||||
fprintf(stderr, "Trying to group objects using distance matrix:\n");
|
||||
fprintf(stderr, "%s", gp ? "gp_index" : "os_index");
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[j]->gp_index : objs[j]->os_index));
|
||||
fprintf(stderr, "\n");
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
fprintf(stderr, " % 5d", (int)(gp ? objs[i]->gp_index : objs[i]->os_index));
|
||||
for(j=0; j<nbobjs; j++)
|
||||
fprintf(stderr, " % 5lld", (long long) values[i*nbobjs + j]);
|
||||
fprintf(stderr, "\n");
|
||||
}
|
||||
}
|
||||
|
||||
hwloc__groups_by_distances(topology, nbobjs, objs, values,
|
||||
kind, nbaccuracies, accuracies, 1 /* check the first matrice */);
|
||||
}
|
||||
|
||||
return hwloc_internal_distances__add(topology, objs[0]->type, nbobjs, objs, NULL, values, kind);
|
||||
|
||||
err:
|
||||
free(objs);
|
||||
free(values);
|
||||
return -1;
|
||||
}
|
||||
|
||||
#define HWLOC_DISTANCES_KIND_FROM_ALL (HWLOC_DISTANCES_KIND_FROM_OS|HWLOC_DISTANCES_KIND_FROM_USER)
|
||||
#define HWLOC_DISTANCES_KIND_MEANS_ALL (HWLOC_DISTANCES_KIND_MEANS_LATENCY|HWLOC_DISTANCES_KIND_MEANS_BANDWIDTH)
|
||||
#define HWLOC_DISTANCES_KIND_ALL (HWLOC_DISTANCES_KIND_FROM_ALL|HWLOC_DISTANCES_KIND_MEANS_ALL)
|
||||
#define HWLOC_DISTANCES_ADD_FLAG_ALL (HWLOC_DISTANCES_ADD_FLAG_GROUP|HWLOC_DISTANCES_ADD_FLAG_GROUP_INACCURATE)
|
||||
|
||||
/* The actual function exported to the user
|
||||
*/
|
||||
int hwloc_distances_add(hwloc_topology_t topology,
|
||||
unsigned nbobjs, hwloc_obj_t *objs, hwloc_uint64_t *values,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
hwloc_obj_type_t type;
|
||||
unsigned i;
|
||||
uint64_t *_values;
|
||||
hwloc_obj_t *_objs;
|
||||
int err;
|
||||
|
||||
if (nbobjs < 2 || !objs || !values || !topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
if ((kind & ~HWLOC_DISTANCES_KIND_ALL)
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_FROM_ALL) != 1
|
||||
|| hwloc_weight_long(kind & HWLOC_DISTANCES_KIND_MEANS_ALL) != 1
|
||||
|| (flags & ~HWLOC_DISTANCES_ADD_FLAG_ALL)) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* no strict need to check for duplicates, things shouldn't break */
|
||||
|
||||
type = objs[0]->type;
|
||||
if (type == HWLOC_OBJ_GROUP) {
|
||||
/* not supported yet, would require we save the subkind together with the type. */
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
for(i=1; i<nbobjs; i++)
|
||||
if (!objs[i] || objs[i]->type != type) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* copy the input arrays and give them to the topology */
|
||||
_objs = malloc(nbobjs*sizeof(hwloc_obj_t));
|
||||
_values = malloc(nbobjs*nbobjs*sizeof(*_values));
|
||||
if (!_objs || !_values)
|
||||
goto out_with_arrays;
|
||||
|
||||
memcpy(_objs, objs, nbobjs*sizeof(hwloc_obj_t));
|
||||
memcpy(_values, values, nbobjs*nbobjs*sizeof(*_values));
|
||||
err = hwloc_internal_distances_add(topology, nbobjs, _objs, _values, kind, flags);
|
||||
if (err < 0)
|
||||
goto out; /* _objs and _values freed in hwloc_internal_distances_add() */
|
||||
|
||||
/* in case we added some groups, see if we need to reconnect */
|
||||
hwloc_topology_reconnect(topology, 0);
|
||||
|
||||
return 0;
|
||||
|
||||
out_with_arrays:
|
||||
free(_values);
|
||||
free(_objs);
|
||||
out:
|
||||
return -1;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Refresh objects in distances
|
||||
*/
|
||||
|
||||
static hwloc_obj_t hwloc_find_obj_by_type_and_gp_index(hwloc_topology_t topology, hwloc_obj_type_t type, uint64_t gp_index)
|
||||
{
|
||||
hwloc_obj_t obj = hwloc_get_obj_by_type(topology, type, 0);
|
||||
while (obj) {
|
||||
if (obj->gp_index == gp_index)
|
||||
return obj;
|
||||
obj = obj->next_cousin;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static void
|
||||
hwloc_internal_distances_restrict(struct hwloc_internal_distances_s *dist,
|
||||
hwloc_obj_t *objs,
|
||||
unsigned disappeared)
|
||||
{
|
||||
unsigned nbobjs = dist->nbobjs;
|
||||
unsigned i, newi;
|
||||
unsigned j, newj;
|
||||
|
||||
for(i=0, newi=0; i<nbobjs; i++)
|
||||
if (objs[i]) {
|
||||
for(j=0, newj=0; j<nbobjs; j++)
|
||||
if (objs[j]) {
|
||||
dist->values[newi*(nbobjs-disappeared)+newj] = dist->values[i*nbobjs+j];
|
||||
newj++;
|
||||
}
|
||||
newi++;
|
||||
}
|
||||
|
||||
for(i=0, newi=0; i<nbobjs; i++)
|
||||
if (objs[i]) {
|
||||
objs[newi] = objs[i];
|
||||
dist->indexes[newi] = dist->indexes[i];
|
||||
newi++;
|
||||
}
|
||||
|
||||
dist->nbobjs -= disappeared;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc_internal_distances_refresh_one(hwloc_topology_t topology,
|
||||
struct hwloc_internal_distances_s *dist)
|
||||
{
|
||||
hwloc_obj_type_t type = dist->type;
|
||||
unsigned nbobjs = dist->nbobjs;
|
||||
hwloc_obj_t *objs = dist->objs;
|
||||
uint64_t *indexes = dist->indexes;
|
||||
unsigned disappeared = 0;
|
||||
unsigned i;
|
||||
|
||||
if (dist->objs_are_valid)
|
||||
return 0;
|
||||
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
hwloc_obj_t obj;
|
||||
/* TODO use cpuset/nodeset to find pus/numas from the root?
|
||||
* faster than traversing the entire level?
|
||||
*/
|
||||
if (type == HWLOC_OBJ_PU)
|
||||
obj = hwloc_get_pu_obj_by_os_index(topology, (unsigned) indexes[i]);
|
||||
else if (type == HWLOC_OBJ_NUMANODE)
|
||||
obj = hwloc_get_numanode_obj_by_os_index(topology, (unsigned) indexes[i]);
|
||||
else
|
||||
obj = hwloc_find_obj_by_type_and_gp_index(topology, type, indexes[i]);
|
||||
objs[i] = obj;
|
||||
if (!obj)
|
||||
disappeared++;
|
||||
}
|
||||
|
||||
if (nbobjs-disappeared < 2)
|
||||
/* became useless, drop */
|
||||
return -1;
|
||||
|
||||
if (disappeared)
|
||||
hwloc_internal_distances_restrict(dist, objs, disappeared);
|
||||
|
||||
dist->objs_are_valid = 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
/* This function may be called with topology->tma set, it cannot free() or realloc() */
|
||||
void
|
||||
hwloc_internal_distances_refresh(hwloc_topology_t topology)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist, *next;
|
||||
|
||||
for(dist = topology->first_dist; dist; dist = next) {
|
||||
next = dist->next;
|
||||
|
||||
if (hwloc_internal_distances_refresh_one(topology, dist) < 0) {
|
||||
assert(!topology->tma || !topology->tma->dontfree); /* this tma cannot fail to allocate */
|
||||
if (dist->prev)
|
||||
dist->prev->next = next;
|
||||
else
|
||||
topology->first_dist = next;
|
||||
if (next)
|
||||
next->prev = dist->prev;
|
||||
else
|
||||
topology->last_dist = dist->prev;
|
||||
hwloc_internal_distances_free(dist);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
hwloc_internal_distances_invalidate_cached_objs(hwloc_topology_t topology)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist;
|
||||
for(dist = topology->first_dist; dist; dist = dist->next)
|
||||
dist->objs_are_valid = 0;
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* User API for getting distances
|
||||
*/
|
||||
|
||||
void
|
||||
hwloc_distances_release(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
struct hwloc_distances_s *distances)
|
||||
{
|
||||
free(distances->values);
|
||||
free(distances->objs);
|
||||
free(distances);
|
||||
}
|
||||
|
||||
static struct hwloc_distances_s *
|
||||
hwloc_distances_get_one(hwloc_topology_t topology __hwloc_attribute_unused,
|
||||
struct hwloc_internal_distances_s *dist)
|
||||
{
|
||||
struct hwloc_distances_s *distances;
|
||||
unsigned nbobjs;
|
||||
|
||||
distances = malloc(sizeof(*distances));
|
||||
if (!distances)
|
||||
return NULL;
|
||||
|
||||
nbobjs = distances->nbobjs = dist->nbobjs;
|
||||
|
||||
distances->objs = malloc(nbobjs * sizeof(hwloc_obj_t));
|
||||
if (!distances->objs)
|
||||
goto out;
|
||||
memcpy(distances->objs, dist->objs, nbobjs * sizeof(hwloc_obj_t));
|
||||
|
||||
distances->values = malloc(nbobjs * nbobjs * sizeof(*distances->values));
|
||||
if (!distances->values)
|
||||
goto out_with_objs;
|
||||
memcpy(distances->values, dist->values, nbobjs*nbobjs*sizeof(*distances->values));
|
||||
|
||||
distances->kind = dist->kind;
|
||||
return distances;
|
||||
|
||||
out_with_objs:
|
||||
free(distances->objs);
|
||||
out:
|
||||
free(distances);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static int
|
||||
hwloc__distances_get(hwloc_topology_t topology,
|
||||
hwloc_obj_type_t type,
|
||||
unsigned *nrp, struct hwloc_distances_s **distancesp,
|
||||
unsigned long kind, unsigned long flags __hwloc_attribute_unused)
|
||||
{
|
||||
struct hwloc_internal_distances_s *dist;
|
||||
unsigned nr = 0, i;
|
||||
|
||||
/* We could return the internal arrays (as const),
|
||||
* but it would require to prevent removing distances between get() and free().
|
||||
* Not performance critical anyway.
|
||||
*/
|
||||
|
||||
if (flags) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* we could refresh only the distances that match, but we won't have many distances anyway,
|
||||
* so performance is totally negligible.
|
||||
*
|
||||
* This is also useful in multithreaded apps that modify the topology.
|
||||
* They can call any valid hwloc_distances_get() to force a refresh after
|
||||
* changing the topology, so that future concurrent get() won't cause
|
||||
* concurrent refresh().
|
||||
*/
|
||||
hwloc_internal_distances_refresh(topology);
|
||||
|
||||
for(dist = topology->first_dist; dist; dist = dist->next) {
|
||||
unsigned long kind_from = kind & HWLOC_DISTANCES_KIND_FROM_ALL;
|
||||
unsigned long kind_means = kind & HWLOC_DISTANCES_KIND_MEANS_ALL;
|
||||
|
||||
if (type != HWLOC_OBJ_TYPE_NONE && type != dist->type)
|
||||
continue;
|
||||
|
||||
if (kind_from && !(kind_from & dist->kind))
|
||||
continue;
|
||||
if (kind_means && !(kind_means & dist->kind))
|
||||
continue;
|
||||
|
||||
if (nr < *nrp) {
|
||||
struct hwloc_distances_s *distances = hwloc_distances_get_one(topology, dist);
|
||||
if (!distances)
|
||||
goto error;
|
||||
distancesp[nr] = distances;
|
||||
}
|
||||
nr++;
|
||||
}
|
||||
|
||||
for(i=nr; i<*nrp; i++)
|
||||
distancesp[i] = NULL;
|
||||
*nrp = nr;
|
||||
return 0;
|
||||
|
||||
error:
|
||||
for(i=0; i<nr; i++)
|
||||
hwloc_distances_release(topology, distancesp[i]);
|
||||
return -1;
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_distances_get(hwloc_topology_t topology,
|
||||
unsigned *nrp, struct hwloc_distances_s **distancesp,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
if (flags || !topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return hwloc__distances_get(topology, HWLOC_OBJ_TYPE_NONE, nrp, distancesp, kind, flags);
|
||||
}
|
||||
|
||||
int
|
||||
hwloc_distances_get_by_depth(hwloc_topology_t topology, int depth,
|
||||
unsigned *nrp, struct hwloc_distances_s **distancesp,
|
||||
unsigned long kind, unsigned long flags)
|
||||
{
|
||||
hwloc_obj_type_t type;
|
||||
|
||||
if (flags || !topology->is_loaded) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
/* switch back to types since we don't support groups for now */
|
||||
type = hwloc_get_depth_type(topology, depth);
|
||||
if (type == (hwloc_obj_type_t)-1) {
|
||||
errno = EINVAL;
|
||||
return -1;
|
||||
}
|
||||
|
||||
return hwloc__distances_get(topology, type, nrp, distancesp, kind, flags);
|
||||
}
|
||||
|
||||
/******************************************************
|
||||
* Grouping objects according to distances
|
||||
*/
|
||||
|
||||
static void hwloc_report_user_distance_error(const char *msg, int line)
|
||||
{
|
||||
static int reported = 0;
|
||||
|
||||
if (!reported && !hwloc_hide_errors()) {
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
fprintf(stderr, "* hwloc %s was given invalid distances by the user.\n", HWLOC_VERSION);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* %s\n", msg);
|
||||
fprintf(stderr, "* Error occurred in topology.c line %d\n", line);
|
||||
fprintf(stderr, "*\n");
|
||||
fprintf(stderr, "* Please make sure that distances given through the programming API\n");
|
||||
fprintf(stderr, "* do not contradict any other topology information.\n");
|
||||
fprintf(stderr, "* \n");
|
||||
fprintf(stderr, "* hwloc will now ignore this invalid topology information and continue.\n");
|
||||
fprintf(stderr, "****************************************************************************\n");
|
||||
reported = 1;
|
||||
}
|
||||
}
|
||||
|
||||
static int hwloc_compare_values(uint64_t a, uint64_t b, float accuracy)
|
||||
{
|
||||
if (accuracy != 0.0f && fabsf((float)a-(float)b) < (float)a * accuracy)
|
||||
return 0;
|
||||
return a < b ? -1 : a == b ? 0 : 1;
|
||||
}
|
||||
|
||||
/*
|
||||
* Place objects in groups if they are in a transitive graph of minimal values.
|
||||
* Return how many groups were created, or 0 if some incomplete distance graphs were found.
|
||||
*/
|
||||
static unsigned
|
||||
hwloc__find_groups_by_min_distance(unsigned nbobjs,
|
||||
uint64_t *_values,
|
||||
float accuracy,
|
||||
unsigned *groupids,
|
||||
int verbose)
|
||||
{
|
||||
uint64_t min_distance = UINT64_MAX;
|
||||
unsigned groupid = 1;
|
||||
unsigned i,j,k;
|
||||
unsigned skipped = 0;
|
||||
|
||||
#define VALUE(i, j) _values[(i) * nbobjs + (j)]
|
||||
|
||||
memset(groupids, 0, nbobjs*sizeof(*groupids));
|
||||
|
||||
/* find the minimal distance */
|
||||
for(i=0; i<nbobjs; i++)
|
||||
for(j=0; j<nbobjs; j++) /* check the entire matrix, it may not be perfectly symmetric depending on the accuracy */
|
||||
if (i != j && VALUE(i, j) < min_distance) /* no accuracy here, we want the real minimal */
|
||||
min_distance = VALUE(i, j);
|
||||
hwloc_debug(" found minimal distance %llu between objects\n", (unsigned long long) min_distance);
|
||||
|
||||
if (min_distance == UINT64_MAX)
|
||||
return 0;
|
||||
|
||||
/* build groups of objects connected with this distance */
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
unsigned size;
|
||||
unsigned firstfound;
|
||||
|
||||
/* if already grouped, skip */
|
||||
if (groupids[i])
|
||||
continue;
|
||||
|
||||
/* start a new group */
|
||||
groupids[i] = groupid;
|
||||
size = 1;
|
||||
firstfound = i;
|
||||
|
||||
while (firstfound != (unsigned)-1) {
|
||||
/* we added new objects to the group, the first one was firstfound.
|
||||
* rescan all connections from these new objects (starting at first found) to any other objects,
|
||||
* so as to find new objects minimally-connected by transivity.
|
||||
*/
|
||||
unsigned newfirstfound = (unsigned)-1;
|
||||
for(j=firstfound; j<nbobjs; j++)
|
||||
if (groupids[j] == groupid)
|
||||
for(k=0; k<nbobjs; k++)
|
||||
if (!groupids[k] && !hwloc_compare_values(VALUE(j, k), min_distance, accuracy)) {
|
||||
groupids[k] = groupid;
|
||||
size++;
|
||||
if (newfirstfound == (unsigned)-1)
|
||||
newfirstfound = k;
|
||||
if (i == j)
|
||||
hwloc_debug(" object %u is minimally connected to %u\n", k, i);
|
||||
else
|
||||
hwloc_debug(" object %u is minimally connected to %u through %u\n", k, i, j);
|
||||
}
|
||||
firstfound = newfirstfound;
|
||||
}
|
||||
|
||||
if (size == 1) {
|
||||
/* cancel this useless group, ignore this object and try from the next one */
|
||||
groupids[i] = 0;
|
||||
skipped++;
|
||||
continue;
|
||||
}
|
||||
|
||||
/* valid this group */
|
||||
groupid++;
|
||||
if (verbose)
|
||||
fprintf(stderr, " Found transitive graph with %u objects with minimal distance %llu accuracy %f\n",
|
||||
size, (unsigned long long) min_distance, accuracy);
|
||||
}
|
||||
|
||||
if (groupid == 2 && !skipped)
|
||||
/* we created a single group containing all objects, ignore it */
|
||||
return 0;
|
||||
|
||||
/* return the last id, since it's also the number of used group ids */
|
||||
return groupid-1;
|
||||
}
|
||||
|
||||
/* check that the matrix is ok */
|
||||
static int
|
||||
hwloc__check_grouping_matrix(unsigned nbobjs, uint64_t *_values, float accuracy, int verbose)
|
||||
{
|
||||
unsigned i,j;
|
||||
for(i=0; i<nbobjs; i++) {
|
||||
for(j=i+1; j<nbobjs; j++) {
|
||||
/* should be symmetric */
|
||||
if (hwloc_compare_values(VALUE(i, j), VALUE(j, i), accuracy)) {
|
||||
if (verbose)
|
||||
fprintf(stderr, " Distance matrix asymmetric ([%u,%u]=%llu != [%u,%u]=%llu), aborting\n",
|
||||
i, j, (unsigned long long) VALUE(i, j), j, i, (unsigned long long) VALUE(j, i));
|
||||
return -1;
|
||||
}
|
||||
/* diagonal is smaller than everything else */
|
||||
if (hwloc_compare_values(VALUE(i, j), VALUE(i, i), accuracy) <= 0) {
|
||||
if (verbose)
|
||||
fprintf(stderr, " Distance to self not strictly minimal ([%u,%u]=%llu <= [%u,%u]=%llu), aborting\n",
|
||||
i, j, (unsigned long long) VALUE(i, j), i, i, (unsigned long long) VALUE(i, i));
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
/*
|
||||
* Look at object physical distances to group them.
|
||||
*/
|
||||
static void
|
||||
hwloc__groups_by_distances(struct hwloc_topology *topology,
|
||||
unsigned nbobjs,
|
||||
struct hwloc_obj **objs,
|
||||
uint64_t *_values,
|
||||
unsigned long kind,
|
||||
unsigned nbaccuracies,
|
||||
float *accuracies,
|
||||
int needcheck)
|
||||
{
|
||||
HWLOC_VLA(unsigned, groupids, nbobjs);
|
||||
unsigned nbgroups = 0;
|
||||
unsigned i,j;
|
||||
int verbose = topology->grouping_verbose;
|
||||
|
||||
if (nbobjs <= 2)
|
||||
return;
|
||||
|
||||
if (!(kind & HWLOC_DISTANCES_KIND_MEANS_LATENCY))
|
||||
/* don't know use to use those for grouping */
|
||||
/* TODO hwloc__find_groups_by_max_distance() for bandwidth */
|
||||
return;
|
||||
|
||||
for(i=0; i<nbaccuracies; i++) {
|
||||
if (verbose)
|
||||
fprintf(stderr, "Trying to group %u %s objects according to physical distances with accuracy %f\n",
|
||||
nbobjs, hwloc_obj_type_string(objs[0]->type), accuracies[i]);
|
||||
if (needcheck && hwloc__check_grouping_matrix(nbobjs, _values, accuracies[i], verbose) < 0)
|
||||
continue;
|
||||
nbgroups = hwloc__find_groups_by_min_distance(nbobjs, _values, accuracies[i], groupids, verbose);
|
||||
if (nbgroups)
|
||||
break;
|
||||
}
|
||||
if (!nbgroups)
|
||||
return;
|
||||
|
||||
{
|
||||
HWLOC_VLA(hwloc_obj_t, groupobjs, nbgroups);
|
||||
HWLOC_VLA(unsigned, groupsizes, nbgroups);
|
||||
HWLOC_VLA(uint64_t, groupvalues, nbgroups*nbgroups);
|
||||
unsigned failed = 0;
|
||||
|
||||
/* create new Group objects and record their size */
|
||||
memset(&(groupsizes[0]), 0, sizeof(groupsizes[0]) * nbgroups);
|
||||
for(i=0; i<nbgroups; i++) {
|
||||
/* create the Group object */
|
||||
hwloc_obj_t group_obj, res_obj;
|
||||
group_obj = hwloc_alloc_setup_object(topology, HWLOC_OBJ_GROUP, HWLOC_UNKNOWN_INDEX);
|
||||
group_obj->cpuset = hwloc_bitmap_alloc();
|
||||
group_obj->attr->group.kind = HWLOC_GROUP_KIND_DISTANCE;
|
||||
group_obj->attr->group.subkind = topology->grouping_next_subkind;
|
||||
for (j=0; j<nbobjs; j++)
|
||||
if (groupids[j] == i+1) {
|
||||
/* assemble the group sets */
|
||||
hwloc_obj_add_other_obj_sets(group_obj, objs[j]);
|
||||
groupsizes[i]++;
|
||||
}
|
||||
hwloc_debug_1arg_bitmap("adding Group object with %u objects and cpuset %s\n",
|
||||
groupsizes[i], group_obj->cpuset);
|
||||
res_obj = hwloc__insert_object_by_cpuset(topology, NULL, group_obj,
|
||||
(kind & HWLOC_DISTANCES_KIND_FROM_USER) ? hwloc_report_user_distance_error : hwloc_report_os_error);
|
||||
/* res_obj may be NULL on failure to insert. */
|
||||
if (!res_obj)
|
||||
failed++;
|
||||
/* or it may be different from groupobjs if we got groups from XML import before grouping */
|
||||
groupobjs[i] = res_obj;
|
||||
}
|
||||
topology->grouping_next_subkind++;
|
||||
|
||||
if (failed)
|
||||
/* don't try to group above if we got a NULL group here, just keep this incomplete level */
|
||||
return;
|
||||
|
||||
/* factorize values */
|
||||
memset(&(groupvalues[0]), 0, sizeof(groupvalues[0]) * nbgroups * nbgroups);
|
||||
#undef VALUE
|
||||
#define VALUE(i, j) _values[(i) * nbobjs + (j)]
|
||||
#define GROUP_VALUE(i, j) groupvalues[(i) * nbgroups + (j)]
|
||||
for(i=0; i<nbobjs; i++)
|
||||
if (groupids[i])
|
||||
for(j=0; j<nbobjs; j++)
|
||||
if (groupids[j])
|
||||
GROUP_VALUE(groupids[i]-1, groupids[j]-1) += VALUE(i, j);
|
||||
for(i=0; i<nbgroups; i++)
|
||||
for(j=0; j<nbgroups; j++) {
|
||||
unsigned groupsize = groupsizes[i]*groupsizes[j];
|
||||
GROUP_VALUE(i, j) /= groupsize;
|
||||
}
|
||||
#ifdef HWLOC_DEBUG
|
||||
hwloc_debug("%s", "generated new distance matrix between groups:\n");
|
||||
hwloc_debug("%s", " index");
|
||||
for(j=0; j<nbgroups; j++)
|
||||
hwloc_debug(" % 5d", (int) j); /* print index because os_index is -1 for Groups */
|
||||
hwloc_debug("%s", "\n");
|
||||
for(i=0; i<nbgroups; i++) {
|
||||
hwloc_debug(" % 5d", (int) i);
|
||||
for(j=0; j<nbgroups; j++)
|
||||
hwloc_debug(" %llu", (unsigned long long) GROUP_VALUE(i, j));
|
||||
hwloc_debug("%s", "\n");
|
||||
}
|
||||
#endif
|
||||
|
||||
hwloc__groups_by_distances(topology, nbgroups, groupobjs, groupvalues, kind, nbaccuracies, accuracies, 0 /* no need to check generated matrix */);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user