/* * Copyright © 2020-2024 Inria. All rights reserved. * See COPYING in top-level directory. */ #include "private/autogen/config.h" #include "hwloc.h" #include "private/private.h" #include "private/debug.h" /***************************** * Attributes */ static __hwloc_inline int hwloc__memattr_get_convenience_value(hwloc_memattr_id_t id, hwloc_obj_t node, hwloc_uint64_t *valuep) { if (id == HWLOC_MEMATTR_ID_CAPACITY) { if (node->type != HWLOC_OBJ_NUMANODE) { errno = EINVAL; return -1; } *valuep = node->attr->numanode.local_memory; return 0; } else if (id == HWLOC_MEMATTR_ID_LOCALITY) { if (!node->cpuset) { errno = EINVAL; return -1; } *valuep = hwloc_bitmap_weight(node->cpuset); return 0; } else assert(0); return 0; /* shut up the compiler */ } void hwloc_internal_memattrs_init(struct hwloc_topology *topology) { topology->nr_memattrs = 0; topology->memattrs = NULL; } static void hwloc__setup_memattr(struct hwloc_internal_memattr_s *imattr, char *name, unsigned long flags, unsigned long iflags) { imattr->name = name; imattr->flags = flags; imattr->iflags = iflags; imattr->nr_targets = 0; imattr->targets = NULL; } void hwloc_internal_memattrs_prepare(struct hwloc_topology *topology) { topology->memattrs = malloc(HWLOC_MEMATTR_ID_MAX * sizeof(*topology->memattrs)); if (!topology->memattrs) return; hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_CAPACITY], (char *) "Capacity", HWLOC_MEMATTR_FLAG_HIGHER_FIRST, HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LOCALITY], (char *) "Locality", HWLOC_MEMATTR_FLAG_LOWER_FIRST, HWLOC_IMATTR_FLAG_STATIC_NAME|HWLOC_IMATTR_FLAG_CONVENIENCE); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH], (char *) "Bandwidth", HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_BANDWIDTH], (char *) "ReadBandwidth", HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_BANDWIDTH], (char *) "WriteBandwidth", HWLOC_MEMATTR_FLAG_HIGHER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_LATENCY], (char *) "Latency", HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_READ_LATENCY], (char *) "ReadLatency", HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); hwloc__setup_memattr(&topology->memattrs[HWLOC_MEMATTR_ID_WRITE_LATENCY], (char *) "WriteLatency", HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_NEED_INITIATOR, HWLOC_IMATTR_FLAG_STATIC_NAME); topology->nr_memattrs = HWLOC_MEMATTR_ID_MAX; } static void hwloc__imi_destroy(struct hwloc_internal_memattr_initiator_s *imi) { if (imi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) hwloc_bitmap_free(imi->initiator.location.cpuset); } static void hwloc__imtg_destroy(struct hwloc_internal_memattr_s *imattr, struct hwloc_internal_memattr_target_s *imtg) { if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* only attributes with initiators may have something to free() in the array */ unsigned k; for(k=0; knr_initiators; k++) hwloc__imi_destroy(&imtg->initiators[k]); } free(imtg->initiators); } void hwloc_internal_memattrs_destroy(struct hwloc_topology *topology) { unsigned id; for(id=0; idnr_memattrs; id++) { struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; unsigned j; for(j=0; jnr_targets; j++) hwloc__imtg_destroy(imattr, &imattr->targets[j]); free(imattr->targets); if (!(imattr->iflags & HWLOC_IMATTR_FLAG_STATIC_NAME)) free(imattr->name); } free(topology->memattrs); topology->memattrs = NULL; topology->nr_memattrs = 0; } int hwloc_internal_memattrs_dup(struct hwloc_topology *new, struct hwloc_topology *old) { struct hwloc_tma *tma = new->tma; struct hwloc_internal_memattr_s *imattrs; hwloc_memattr_id_t id; /* old->nr_memattrs is always > 0 thanks to default memattrs */ imattrs = hwloc_tma_malloc(tma, old->nr_memattrs * sizeof(*imattrs)); if (!imattrs) return -1; new->memattrs = imattrs; new->nr_memattrs = old->nr_memattrs; memcpy(imattrs, old->memattrs, old->nr_memattrs * sizeof(*imattrs)); for(id=0; idnr_memattrs; id++) { struct hwloc_internal_memattr_s *oimattr = &old->memattrs[id]; struct hwloc_internal_memattr_s *nimattr = &imattrs[id]; unsigned j; assert(oimattr->name); nimattr->name = hwloc_tma_strdup(tma, oimattr->name); if (!nimattr->name) { assert(!tma || !tma->dontfree); /* this tma cannot fail to allocate */ new->nr_memattrs = id; goto failed; } nimattr->iflags &= ~HWLOC_IMATTR_FLAG_STATIC_NAME; nimattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; /* cache will need refresh */ if (!oimattr->nr_targets) continue; nimattr->targets = hwloc_tma_malloc(tma, oimattr->nr_targets * sizeof(*nimattr->targets)); if (!nimattr->targets) { free(nimattr->name); new->nr_memattrs = id; goto failed; } memcpy(nimattr->targets, oimattr->targets, oimattr->nr_targets * sizeof(*nimattr->targets)); for(j=0; jnr_targets; j++) { struct hwloc_internal_memattr_target_s *oimtg = &oimattr->targets[j]; struct hwloc_internal_memattr_target_s *nimtg = &nimattr->targets[j]; unsigned k; nimtg->obj = NULL; /* cache will need refresh */ if (!oimtg->nr_initiators) continue; nimtg->initiators = hwloc_tma_malloc(tma, oimtg->nr_initiators * sizeof(*nimtg->initiators)); if (!nimtg->initiators) { nimattr->nr_targets = j; new->nr_memattrs = id+1; goto failed; } memcpy(nimtg->initiators, oimtg->initiators, oimtg->nr_initiators * sizeof(*nimtg->initiators)); for(k=0; knr_initiators; k++) { struct hwloc_internal_memattr_initiator_s *oimi = &oimtg->initiators[k]; struct hwloc_internal_memattr_initiator_s *nimi = &nimtg->initiators[k]; if (oimi->initiator.type == HWLOC_LOCATION_TYPE_CPUSET) { nimi->initiator.location.cpuset = hwloc_bitmap_tma_dup(tma, oimi->initiator.location.cpuset); if (!nimi->initiator.location.cpuset) { nimtg->nr_initiators = k; nimattr->nr_targets = j+1; new->nr_memattrs = id+1; goto failed; } } else if (oimi->initiator.type == HWLOC_LOCATION_TYPE_OBJECT) { nimi->initiator.location.object.obj = NULL; /* cache will need refresh */ } } } } return 0; failed: hwloc_internal_memattrs_destroy(new); return -1; } int hwloc_memattr_get_by_name(hwloc_topology_t topology, const char *name, hwloc_memattr_id_t *idp) { unsigned id; for(id=0; idnr_memattrs; id++) { if (!strcmp(topology->memattrs[id].name, name)) { *idp = id; return 0; } } errno = EINVAL; return -1; } int hwloc_memattr_get_name(hwloc_topology_t topology, hwloc_memattr_id_t id, const char **namep) { if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } *namep = topology->memattrs[id].name; return 0; } int hwloc_memattr_get_flags(hwloc_topology_t topology, hwloc_memattr_id_t id, unsigned long *flagsp) { if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } *flagsp = topology->memattrs[id].flags; return 0; } int hwloc_memattr_register(hwloc_topology_t topology, const char *_name, unsigned long flags, hwloc_memattr_id_t *id) { struct hwloc_internal_memattr_s *newattrs; char *name; unsigned i; /* check flags */ if (flags & ~(HWLOC_MEMATTR_FLAG_NEED_INITIATOR|HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { errno = EINVAL; return -1; } if (!(flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST))) { errno = EINVAL; return -1; } if ((flags & (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) == (HWLOC_MEMATTR_FLAG_LOWER_FIRST|HWLOC_MEMATTR_FLAG_HIGHER_FIRST)) { errno = EINVAL; return -1; } if (!_name) { errno = EINVAL; return -1; } /* check name isn't already used */ for(i=0; inr_memattrs; i++) { if (!strcmp(_name, topology->memattrs[i].name)) { errno = EBUSY; return -1; } } name = strdup(_name); if (!name) return -1; newattrs = realloc(topology->memattrs, (topology->nr_memattrs + 1) * sizeof(*topology->memattrs)); if (!newattrs) { free(name); return -1; } hwloc__setup_memattr(&newattrs[topology->nr_memattrs], name, flags, 0); /* memattr valid when just created */ newattrs[topology->nr_memattrs].iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; *id = topology->nr_memattrs; topology->nr_memattrs++; topology->memattrs = newattrs; return 0; } /*************************** * Internal Locations */ /* return 1 if cpuset/obj matchs the existing initiator location, * for instance if the cpuset of query is included in the cpuset of existing */ static int match_internal_location(struct hwloc_internal_location_s *iloc, struct hwloc_internal_memattr_initiator_s *imi) { if (iloc->type != imi->initiator.type) return 0; switch (iloc->type) { case HWLOC_LOCATION_TYPE_CPUSET: return hwloc_bitmap_isincluded(iloc->location.cpuset, imi->initiator.location.cpuset); case HWLOC_LOCATION_TYPE_OBJECT: return iloc->location.object.type == imi->initiator.location.object.type && iloc->location.object.gp_index == imi->initiator.location.object.gp_index; default: return 0; } } static int to_internal_location(struct hwloc_internal_location_s *iloc, struct hwloc_location *location) { iloc->type = location->type; switch (location->type) { case HWLOC_LOCATION_TYPE_CPUSET: if (!location->location.cpuset || hwloc_bitmap_iszero(location->location.cpuset)) { errno = EINVAL; return -1; } iloc->location.cpuset = location->location.cpuset; return 0; case HWLOC_LOCATION_TYPE_OBJECT: if (!location->location.object) { errno = EINVAL; return -1; } iloc->location.object.gp_index = location->location.object->gp_index; iloc->location.object.type = location->location.object->type; return 0; default: errno = EINVAL; return -1; } } static int from_internal_location(struct hwloc_internal_location_s *iloc, struct hwloc_location *location) { location->type = iloc->type; switch (iloc->type) { case HWLOC_LOCATION_TYPE_CPUSET: location->location.cpuset = iloc->location.cpuset; return 0; case HWLOC_LOCATION_TYPE_OBJECT: /* requires the cache to be refreshed */ location->location.object = iloc->location.object.obj; if (!location->location.object) return -1; return 0; default: errno = EINVAL; return -1; } } /************************ * Refreshing */ static int hwloc__imi_refresh(struct hwloc_topology *topology, struct hwloc_internal_memattr_initiator_s *imi) { switch (imi->initiator.type) { case HWLOC_LOCATION_TYPE_CPUSET: { hwloc_bitmap_and(imi->initiator.location.cpuset, imi->initiator.location.cpuset, topology->levels[0][0]->cpuset); if (hwloc_bitmap_iszero(imi->initiator.location.cpuset)) { hwloc__imi_destroy(imi); return -1; } return 0; } case HWLOC_LOCATION_TYPE_OBJECT: { hwloc_obj_t obj = hwloc_get_obj_by_type_and_gp_index(topology, imi->initiator.location.object.type, imi->initiator.location.object.gp_index); if (!obj) { hwloc__imi_destroy(imi); return -1; } imi->initiator.location.object.obj = obj; return 0; } default: assert(0); } return -1; } static int hwloc__imtg_refresh(struct hwloc_topology *topology, struct hwloc_internal_memattr_s *imattr, struct hwloc_internal_memattr_target_s *imtg) { hwloc_obj_t node; /* no need to refresh convenience memattrs */ assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); /* check the target object */ if (imtg->gp_index == (hwloc_uint64_t) -1) { /* only NUMA and PU may work with os_index, and only NUMA is currently used internally */ if (imtg->type == HWLOC_OBJ_NUMANODE) node = hwloc_get_numanode_obj_by_os_index(topology, imtg->os_index); else if (imtg->type == HWLOC_OBJ_PU) node = hwloc_get_pu_obj_by_os_index(topology, imtg->os_index); else node = NULL; } else { node = hwloc_get_obj_by_type_and_gp_index(topology, imtg->type, imtg->gp_index); } if (!node) { hwloc__imtg_destroy(imattr, imtg); return -1; } /* save the gp_index in case it wasn't initialized yet */ imtg->gp_index = node->gp_index; /* cache the object */ imtg->obj = node; if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* check the initiators */ unsigned k, l; for(k=0, l=0; knr_initiators; k++) { int err = hwloc__imi_refresh(topology, &imtg->initiators[k]); if (err < 0) continue; if (k != l) memcpy(&imtg->initiators[l], &imtg->initiators[k], sizeof(*imtg->initiators)); l++; } imtg->nr_initiators = l; if (!imtg->nr_initiators) { hwloc__imtg_destroy(imattr, imtg); return -1; } } return 0; } static void hwloc__imattr_refresh(struct hwloc_topology *topology, struct hwloc_internal_memattr_s *imattr) { unsigned j, k; for(j=0, k=0; jnr_targets; j++) { int ret = hwloc__imtg_refresh(topology, imattr, &imattr->targets[j]); if (!ret) { /* target still valid, move it if some former targets were removed */ if (j != k) memcpy(&imattr->targets[k], &imattr->targets[j], sizeof(*imattr->targets)); k++; } } imattr->nr_targets = k; imattr->iflags |= HWLOC_IMATTR_FLAG_CACHE_VALID; } void hwloc_internal_memattrs_refresh(struct hwloc_topology *topology) { unsigned id; for(id=0; idnr_memattrs; id++) { struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; if (imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID) /* nothing to refresh */ continue; hwloc__imattr_refresh(topology, imattr); } } void hwloc_internal_memattrs_need_refresh(struct hwloc_topology *topology) { unsigned id; for(id=0; idnr_memattrs; id++) { struct hwloc_internal_memattr_s *imattr = &topology->memattrs[id]; if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) /* no need to refresh convenience memattrs */ continue; imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; } } /******************************** * Targets */ static struct hwloc_internal_memattr_target_s * hwloc__memattr_get_target(struct hwloc_internal_memattr_s *imattr, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, int create) { struct hwloc_internal_memattr_target_s *news, *new; unsigned j; for(j=0; jnr_targets; j++) { if (target_type == imattr->targets[j].type) if ((target_gp_index != (hwloc_uint64_t)-1 && target_gp_index == imattr->targets[j].gp_index) || (target_os_index != (unsigned)-1 && target_os_index == imattr->targets[j].os_index)) return &imattr->targets[j]; } if (!create) return NULL; news = realloc(imattr->targets, (imattr->nr_targets+1)*sizeof(*imattr->targets)); if (!news) return NULL; imattr->targets = news; /* FIXME sort targets? by logical index at the end of load? */ new = &news[imattr->nr_targets]; new->type = target_type; new->gp_index = target_gp_index; new->os_index = target_os_index; /* cached object will be refreshed later on actual access */ new->obj = NULL; imattr->iflags &= ~HWLOC_IMATTR_FLAG_CACHE_VALID; /* When setting a value after load(), the caller has the target object * (and initiator object, if not CPU set). Hence, we could avoid invalidating * the cache here. * The overhead of the imattr-wide refresh isn't high enough so far * to justify making the cache management more complex. */ new->nr_initiators = 0; new->initiators = NULL; new->noinitiator_value = 0; imattr->nr_targets++; return new; } static struct hwloc_internal_memattr_initiator_s * hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, struct hwloc_internal_memattr_target_s *imtg, struct hwloc_location *location); int hwloc_memattr_get_targets(hwloc_topology_t topology, hwloc_memattr_id_t id, struct hwloc_location *initiator, unsigned long flags, unsigned *nrp, hwloc_obj_t *targets, hwloc_uint64_t *values) { struct hwloc_internal_memattr_s *imattr; unsigned i, found = 0, max; if (flags) { errno = EINVAL; return -1; } if (!nrp || (*nrp && !targets)) { errno = EINVAL; return -1; } max = *nrp; if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { /* convenience attributes */ for(i=0; ; i++) { hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, i); if (!node) break; if (foundiflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) hwloc__imattr_refresh(topology, imattr); for(i=0; inr_targets; i++) { struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[i]; hwloc_uint64_t value = 0; if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { if (initiator) { /* find a matching initiator */ struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); if (!imi) continue; value = imi->value; } } else { value = imtg->noinitiator_value; } if (foundobj; if (values) values[found] = value; } found++; } done: *nrp = found; return 0; } /************************ * Initiators */ static struct hwloc_internal_memattr_initiator_s * hwloc__memattr_target_get_initiator(struct hwloc_internal_memattr_target_s *imtg, struct hwloc_internal_location_s *iloc, int create) { struct hwloc_internal_memattr_initiator_s *news, *new; unsigned k; for(k=0; knr_initiators; k++) { struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[k]; if (match_internal_location(iloc, imi)) { return imi; } } if (!create) return NULL; news = realloc(imtg->initiators, (imtg->nr_initiators+1)*sizeof(*imtg->initiators)); if (!news) return NULL; new = &news[imtg->nr_initiators]; new->initiator = *iloc; if (iloc->type == HWLOC_LOCATION_TYPE_CPUSET) { new->initiator.location.cpuset = hwloc_bitmap_dup(iloc->location.cpuset); if (!new->initiator.location.cpuset) goto out_with_realloc; } imtg->nr_initiators++; imtg->initiators = news; return new; out_with_realloc: imtg->initiators = news; return NULL; } static struct hwloc_internal_memattr_initiator_s * hwloc__memattr_get_initiator_from_location(struct hwloc_internal_memattr_s *imattr, struct hwloc_internal_memattr_target_s *imtg, struct hwloc_location *location) { struct hwloc_internal_memattr_initiator_s *imi; struct hwloc_internal_location_s iloc; assert(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR); /* use the initiator value */ if (!location) { errno = EINVAL; return NULL; } if (to_internal_location(&iloc, location) < 0) { errno = EINVAL; return NULL; } imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); if (!imi) { errno = EINVAL; return NULL; } return imi; } int hwloc_memattr_get_initiators(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_t target_node, unsigned long flags, unsigned *nrp, struct hwloc_location *initiators, hwloc_uint64_t *values) { struct hwloc_internal_memattr_s *imattr; struct hwloc_internal_memattr_target_s *imtg; unsigned i, max; if (flags || !target_node) { errno = EINVAL; return -1; } if (!nrp || (*nrp && !initiators)) { errno = EINVAL; return -1; } max = *nrp; if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { *nrp = 0; return 0; } /* all convenience attributes have no initiators */ assert(!(imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE)); if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) hwloc__imattr_refresh(topology, imattr); imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); if (!imtg) { errno = EINVAL; return -1; } for(i=0; inr_initiators && iinitiators[i]; int err = from_internal_location(&imi->initiator, &initiators[i]); assert(!err); if (values) /* no need to handle capacity/locality special cases here, those are initiator-less attributes */ values[i] = imi->value; } *nrp = imtg->nr_initiators; return 0; } /************************** * Values */ int hwloc_memattr_get_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_t target_node, struct hwloc_location *initiator, unsigned long flags, hwloc_uint64_t *valuep) { struct hwloc_internal_memattr_s *imattr; struct hwloc_internal_memattr_target_s *imtg; if (flags || !target_node) { errno = EINVAL; return -1; } if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { /* convenience attributes */ return hwloc__memattr_get_convenience_value(id, target_node, valuep); } /* normal attributes */ if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) hwloc__imattr_refresh(topology, imattr); imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); if (!imtg) { errno = EINVAL; return -1; } if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* find the initiator and set its value */ struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); if (!imi) return -1; *valuep = imi->value; } else { /* get the no-initiator value */ *valuep = imtg->noinitiator_value; } return 0; } static int hwloc__internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value) { struct hwloc_internal_memattr_s *imattr; struct hwloc_internal_memattr_target_s *imtg; if (id >= topology->nr_memattrs) { /* something bad happened during init */ errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* check given initiator */ if (!initiator) { errno = EINVAL; return -1; } } if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { /* convenience attributes are read-only */ errno = EINVAL; return -1; } if (topology->is_loaded && !(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) /* don't refresh when adding values during load (some nodes might not be ready yet), * we'll refresh later */ hwloc__imattr_refresh(topology, imattr); imtg = hwloc__memattr_get_target(imattr, target_type, target_gp_index, target_os_index, 1); if (!imtg) return -1; if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* find/add the initiator and set its value */ // FIXME what if cpuset is larger than an existing one ? struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_target_get_initiator(imtg, initiator, 1); if (!imi) return -1; imi->value = value; } else { /* set the no-initiator value */ imtg->noinitiator_value = value; } return 0; } int hwloc_internal_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_type_t target_type, hwloc_uint64_t target_gp_index, unsigned target_os_index, struct hwloc_internal_location_s *initiator, hwloc_uint64_t value) { assert(id != HWLOC_MEMATTR_ID_CAPACITY); assert(id != HWLOC_MEMATTR_ID_LOCALITY); return hwloc__internal_memattr_set_value(topology, id, target_type, target_gp_index, target_os_index, initiator, value); } int hwloc_memattr_set_value(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_t target_node, struct hwloc_location *initiator, unsigned long flags, hwloc_uint64_t value) { struct hwloc_internal_location_s iloc, *ilocp; if (flags || !target_node) { errno = EINVAL; return -1; } if (initiator) { if (to_internal_location(&iloc, initiator) < 0) { errno = EINVAL; return -1; } ilocp = &iloc; } else { ilocp = NULL; } return hwloc__internal_memattr_set_value(topology, id, target_node->type, target_node->gp_index, target_node->os_index, ilocp, value); } /********************** * Best target */ static void hwloc__update_best_target(hwloc_obj_t *best_obj, hwloc_uint64_t *best_value, int *found, hwloc_obj_t new_obj, hwloc_uint64_t new_value, int keep_highest) { if (*found) { if (keep_highest) { if (new_value <= *best_value) return; } else { if (new_value >= *best_value) return; } } *best_obj = new_obj; *best_value = new_value; *found = 1; } int hwloc_memattr_get_best_target(hwloc_topology_t topology, hwloc_memattr_id_t id, struct hwloc_location *initiator, unsigned long flags, hwloc_obj_t *bestp, hwloc_uint64_t *valuep) { struct hwloc_internal_memattr_s *imattr; hwloc_uint64_t best_value = 0; /* shutup the compiler */ hwloc_obj_t best = NULL; int found = 0; unsigned j; if (flags) { errno = EINVAL; return -1; } if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (imattr->iflags & HWLOC_IMATTR_FLAG_CONVENIENCE) { /* convenience attributes */ for(j=0; ; j++) { hwloc_obj_t node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, j); hwloc_uint64_t value = 0; if (!node) break; hwloc__memattr_get_convenience_value(id, node, &value); hwloc__update_best_target(&best, &best_value, &found, node, value, imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); } goto done; } /* normal attributes */ if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) /* not strictly need */ hwloc__imattr_refresh(topology, imattr); for(j=0; jnr_targets; j++) { struct hwloc_internal_memattr_target_s *imtg = &imattr->targets[j]; hwloc_uint64_t value; if (imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR) { /* find the initiator and set its value */ struct hwloc_internal_memattr_initiator_s *imi = hwloc__memattr_get_initiator_from_location(imattr, imtg, initiator); if (!imi) continue; value = imi->value; } else { /* get the no-initiator value */ value = imtg->noinitiator_value; } hwloc__update_best_target(&best, &best_value, &found, imtg->obj, value, imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); } done: if (found) { assert(best); *bestp = best; if (valuep) *valuep = best_value; return 0; } else { errno = ENOENT; return -1; } } /********************** * Best initiators */ static void hwloc__update_best_initiator(struct hwloc_internal_location_s *best_initiator, hwloc_uint64_t *best_value, int *found, struct hwloc_internal_location_s *new_initiator, hwloc_uint64_t new_value, int keep_highest) { if (*found) { if (keep_highest) { if (new_value <= *best_value) return; } else { if (new_value >= *best_value) return; } } *best_initiator = *new_initiator; *best_value = new_value; *found = 1; } int hwloc_memattr_get_best_initiator(hwloc_topology_t topology, hwloc_memattr_id_t id, hwloc_obj_t target_node, unsigned long flags, struct hwloc_location *bestp, hwloc_uint64_t *valuep) { struct hwloc_internal_memattr_s *imattr; struct hwloc_internal_memattr_target_s *imtg; struct hwloc_internal_location_s best_initiator; hwloc_uint64_t best_value; int found; unsigned i; if (flags || !target_node) { errno = EINVAL; return -1; } if (id >= topology->nr_memattrs) { errno = EINVAL; return -1; } imattr = &topology->memattrs[id]; if (!(imattr->flags & HWLOC_MEMATTR_FLAG_NEED_INITIATOR)) { errno = EINVAL; return -1; } if (!(imattr->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) /* not strictly need */ hwloc__imattr_refresh(topology, imattr); imtg = hwloc__memattr_get_target(imattr, target_node->type, target_node->gp_index, target_node->os_index, 0); if (!imtg) { errno = EINVAL; return -1; } found = 0; for(i=0; inr_initiators; i++) { struct hwloc_internal_memattr_initiator_s *imi = &imtg->initiators[i]; hwloc__update_best_initiator(&best_initiator, &best_value, &found, &imi->initiator, imi->value, imattr->flags & HWLOC_MEMATTR_FLAG_HIGHER_FIRST); } if (found) { if (valuep) *valuep = best_value; return from_internal_location(&best_initiator, bestp); } else { errno = ENOENT; return -1; } } /**************************** * Listing local nodes */ static __hwloc_inline int match_local_obj_cpuset(hwloc_obj_t node, hwloc_cpuset_t cpuset, unsigned long flags) { if (flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL) return 1; if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY) && hwloc_bitmap_isincluded(cpuset, node->cpuset)) return 1; if ((flags & HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY) && hwloc_bitmap_isincluded(node->cpuset, cpuset)) return 1; return hwloc_bitmap_isequal(node->cpuset, cpuset); } int hwloc_get_local_numanode_objs(hwloc_topology_t topology, struct hwloc_location *location, unsigned *nrp, hwloc_obj_t *nodes, unsigned long flags) { hwloc_cpuset_t cpuset; hwloc_obj_t node; unsigned i; if (flags & ~(HWLOC_LOCAL_NUMANODE_FLAG_SMALLER_LOCALITY |HWLOC_LOCAL_NUMANODE_FLAG_LARGER_LOCALITY | HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { errno = EINVAL; return -1; } if (!nrp || (*nrp && !nodes)) { errno = EINVAL; return -1; } if (!location) { if (!(flags & HWLOC_LOCAL_NUMANODE_FLAG_ALL)) { errno = EINVAL; return -1; } cpuset = NULL; /* unused */ } else { if (location->type == HWLOC_LOCATION_TYPE_CPUSET) { cpuset = location->location.cpuset; } else if (location->type == HWLOC_LOCATION_TYPE_OBJECT) { hwloc_obj_t obj = location->location.object; while (!obj->cpuset) obj = obj->parent; cpuset = obj->cpuset; } else { errno = EINVAL; return -1; } } i = 0; for(node = hwloc_get_obj_by_type(topology, HWLOC_OBJ_NUMANODE, 0); node; node = node->next_cousin) { if (!match_local_obj_cpuset(node, cpuset, flags)) continue; if (i < *nrp) nodes[i] = node; i++; } *nrp = i; return 0; } /************************************** * Using memattrs to identify HBM/DRAM */ enum hwloc_memory_tier_type_e { /* WARNING: keep higher BW types first for compare_tiers_by_bw_and_type() when BW info is missing */ HWLOC_MEMORY_TIER_HBM = 1UL<<0, HWLOC_MEMORY_TIER_DRAM = 1UL<<1, HWLOC_MEMORY_TIER_GPU = 1UL<<2, HWLOC_MEMORY_TIER_SPM = 1UL<<3, /* Specific-Purpose Memory is usually HBM, we'll use BW to confirm or force*/ HWLOC_MEMORY_TIER_NVM = 1UL<<4, HWLOC_MEMORY_TIER_CXL = 1UL<<5 }; typedef unsigned long hwloc_memory_tier_type_t; #define HWLOC_MEMORY_TIER_UNKNOWN 0UL static const char * hwloc_memory_tier_type_snprintf(hwloc_memory_tier_type_t type) { switch (type) { case HWLOC_MEMORY_TIER_DRAM: return "DRAM"; case HWLOC_MEMORY_TIER_HBM: return "HBM"; case HWLOC_MEMORY_TIER_GPU: return "GPUMemory"; case HWLOC_MEMORY_TIER_SPM: return "SPM"; case HWLOC_MEMORY_TIER_NVM: return "NVM"; case HWLOC_MEMORY_TIER_CXL: case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM: return "CXL-DRAM"; case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM: return "CXL-HBM"; case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU: return "CXL-GPUMemory"; case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM: return "CXL-SPM"; case HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM: return "CXL-NVM"; default: return NULL; } } static hwloc_memory_tier_type_t hwloc_memory_tier_type_sscanf(const char *name) { if (!strcasecmp(name, "DRAM")) return HWLOC_MEMORY_TIER_DRAM; if (!strcasecmp(name, "HBM")) return HWLOC_MEMORY_TIER_HBM; if (!strcasecmp(name, "GPUMemory")) return HWLOC_MEMORY_TIER_GPU; if (!strcasecmp(name, "SPM")) return HWLOC_MEMORY_TIER_SPM; if (!strcasecmp(name, "NVM")) return HWLOC_MEMORY_TIER_NVM; if (!strcasecmp(name, "CXL-DRAM")) return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_DRAM; if (!strcasecmp(name, "CXL-HBM")) return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_HBM; if (!strcasecmp(name, "CXL-GPUMemory")) return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_GPU; if (!strcasecmp(name, "CXL-SPM")) return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_SPM; if (!strcasecmp(name, "CXL-NVM")) return HWLOC_MEMORY_TIER_CXL|HWLOC_MEMORY_TIER_NVM; return 0; } /* factorized tier, grouping multiple nodes */ struct hwloc_memory_tier_s { hwloc_nodeset_t nodeset; uint64_t local_bw_min, local_bw_max; uint64_t local_lat_min, local_lat_max; hwloc_memory_tier_type_t type; }; /* early tier discovery, one entry per node */ struct hwloc_memory_node_info_s { hwloc_obj_t node; uint64_t local_bw; uint64_t local_lat; hwloc_memory_tier_type_t type; unsigned rank; }; static int compare_node_infos_by_type_and_bw(const void *_a, const void *_b) { const struct hwloc_memory_node_info_s *a = _a, *b = _b; /* sort by type of node first */ if (a->type != b->type) return a->type - b->type; /* then by bandwidth */ if (a->local_bw > b->local_bw) return -1; else if (a->local_bw < b->local_bw) return 1; return 0; } static int compare_tiers_by_bw_and_type(const void *_a, const void *_b) { const struct hwloc_memory_tier_s *a = _a, *b = _b; /* sort by (average) BW first */ if (a->local_bw_min && b->local_bw_min) { if (a->local_bw_min + a->local_bw_max > b->local_bw_min + b->local_bw_max) return -1; else if (a->local_bw_min + a->local_bw_max < b->local_bw_min + b->local_bw_max) return 1; } /* then by tier type */ if (a->type != b->type) return a->type - b->type; return 0; } static struct hwloc_memory_tier_s * hwloc__group_memory_tiers(hwloc_topology_t topology, unsigned *nr_tiers_p) { struct hwloc_internal_memattr_s *imattr_bw, *imattr_lat; struct hwloc_memory_node_info_s *nodeinfos; struct hwloc_memory_tier_s *tiers; unsigned nr_tiers; float bw_threshold = 0.1; float lat_threshold = 0.1; const char *env; unsigned i, j, n; n = hwloc_get_nbobjs_by_depth(topology, HWLOC_TYPE_DEPTH_NUMANODE); assert(n); env = getenv("HWLOC_MEMTIERS_BANDWIDTH_THRESHOLD"); if (env) bw_threshold = atof(env); env = getenv("HWLOC_MEMTIERS_LATENCY_THRESHOLD"); if (env) lat_threshold = atof(env); imattr_bw = &topology->memattrs[HWLOC_MEMATTR_ID_BANDWIDTH]; imattr_lat = &topology->memattrs[HWLOC_MEMATTR_ID_LATENCY]; if (!(imattr_bw->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) hwloc__imattr_refresh(topology, imattr_bw); if (!(imattr_lat->iflags & HWLOC_IMATTR_FLAG_CACHE_VALID)) hwloc__imattr_refresh(topology, imattr_lat); nodeinfos = malloc(n * sizeof(*nodeinfos)); if (!nodeinfos) return NULL; for(i=0; isubtype && !strcmp(node->subtype, "GPUMemory")) nodeinfos[i].type = HWLOC_MEMORY_TIER_GPU; else if (daxtype && !strcmp(daxtype, "NVM")) nodeinfos[i].type = HWLOC_MEMORY_TIER_NVM; else if (daxtype && !strcmp(daxtype, "SPM")) nodeinfos[i].type = HWLOC_MEMORY_TIER_SPM; /* add CXL flag */ if (hwloc_obj_get_info_by_name(node, "CXLDevice") != NULL) { /* CXL is always SPM for now. HBM and DRAM not possible here yet. * Hence remove all but NVM first. */ nodeinfos[i].type &= HWLOC_MEMORY_TIER_NVM; nodeinfos[i].type |= HWLOC_MEMORY_TIER_CXL; } /* get local bandwidth */ imtg = NULL; for(j=0; jnr_targets; j++) if (imattr_bw->targets[j].obj == node) { imtg = &imattr_bw->targets[j]; break; } if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { struct hwloc_internal_memattr_initiator_s *imi; iloc.type = HWLOC_LOCATION_TYPE_CPUSET; iloc.location.cpuset = node->cpuset; imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); if (imi) nodeinfos[i].local_bw = imi->value; } /* get local latency */ imtg = NULL; for(j=0; jnr_targets; j++) if (imattr_lat->targets[j].obj == node) { imtg = &imattr_lat->targets[j]; break; } if (imtg && !hwloc_bitmap_iszero(node->cpuset)) { struct hwloc_internal_memattr_initiator_s *imi; iloc.type = HWLOC_LOCATION_TYPE_CPUSET; iloc.location.cpuset = node->cpuset; imi = hwloc__memattr_target_get_initiator(imtg, &iloc, 0); if (imi) nodeinfos[i].local_lat = imi->value; } } /* Sort nodes. * We could also sort by the existing subtype. * KNL is the only case where subtypes are set in backends, but we set memattrs as well there. * Also HWLOC_MEMTIERS_REFRESH would be a special value to ignore existing subtypes. */ hwloc_debug("Sorting memory node infos...\n"); qsort(nodeinfos, n, sizeof(*nodeinfos), compare_node_infos_by_type_and_bw); #ifdef HWLOC_DEBUG for(i=0; ilogical_index, nodeinfos[i].node->os_index, nodeinfos[i].type, (unsigned long long) nodeinfos[i].local_bw, (unsigned long long) nodeinfos[i].local_lat); #endif /* now we have UNKNOWN nodes (sorted by BW only), then known ones */ /* iterate among them and add a rank value. * start from rank 0 and switch to next rank when the type changes or when the BW or latendy difference is > threshold */ hwloc_debug("Starting memory tier #0 and iterating over nodes...\n"); nodeinfos[0].rank = 0; for(i=1; ilogical_index, nodeinfos[i].node->os_index); nodeinfos[i].rank++; continue; } /* comparing bandwidth */ if (nodeinfos[i].local_bw && nodeinfos[i-1].local_bw) { float bw_ratio = (float)nodeinfos[i].local_bw/(float)nodeinfos[i-1].local_bw; if (bw_ratio < 1.) bw_ratio = 1./bw_ratio; if (bw_ratio > 1.0 + bw_threshold) { nodeinfos[i].rank++; hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of bandwidth\n", nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index); continue; } } /* comparing latency */ if (nodeinfos[i].local_lat && nodeinfos[i-1].local_lat) { float lat_ratio = (float)nodeinfos[i].local_lat/(float)nodeinfos[i-1].local_lat; if (lat_ratio < 1.) lat_ratio = 1./lat_ratio; if (lat_ratio > 1.0 + lat_threshold) { hwloc_debug(" Switching to memory tier #%u starting with node L#%u P#%u because of latency\n", nodeinfos[i].rank, nodeinfos[i].node->logical_index, nodeinfos[i].node->os_index); nodeinfos[i].rank++; continue; } } } /* FIXME: if there are cpuset-intersecting nodes in same tier, split again? */ hwloc_debug(" Found %u tiers total\n", nodeinfos[n-1].rank + 1); /* now group nodeinfos into factorized tiers */ nr_tiers = nodeinfos[n-1].rank + 1; tiers = calloc(nr_tiers, sizeof(*tiers)); if (!tiers) goto out_with_nodeinfos; for(i=0; ios_index); assert(tiers[rank].type == HWLOC_MEMORY_TIER_UNKNOWN || tiers[rank].type == nodeinfos[i].type); tiers[rank].type = nodeinfos[i].type; /* nodeinfos are sorted in BW order, no need to compare */ if (!tiers[rank].local_bw_min) tiers[rank].local_bw_min = nodeinfos[i].local_bw; tiers[rank].local_bw_max = nodeinfos[i].local_bw; /* compare latencies to update min/max */ if (!tiers[rank].local_lat_min || nodeinfos[i].local_lat < tiers[rank].local_lat_min) tiers[rank].local_lat_min = nodeinfos[i].local_lat; if (!tiers[rank].local_lat_max || nodeinfos[i].local_lat > tiers[rank].local_lat_max) tiers[rank].local_lat_max = nodeinfos[i].local_lat; } free(nodeinfos); *nr_tiers_p = nr_tiers; return tiers; out_with_tiers: for(i=0; ilocal_bw_min || !tier2->local_bw_min) { hwloc_debug(" Missing BW info\n"); return -1; } /* reorder tiers by BW */ if (tier1->local_bw_min > tier2->local_bw_min) { tmp = tier1; tier1 = tier2; tier2 = tmp; } /* tier1 < tier2 */ hwloc_debug(" tier1 BW %llu-%llu vs tier2 BW %llu-%llu\n", (unsigned long long) tier1->local_bw_min, (unsigned long long) tier1->local_bw_max, (unsigned long long) tier2->local_bw_min, (unsigned long long) tier2->local_bw_max); if (tier2->local_bw_min <= tier1->local_bw_max * 2) { /* tier2 BW isn't 2x tier1, we cannot guess HBM */ hwloc_debug(" BW difference isn't >2x\n"); return -1; } /* tier2 BW is >2x tier1 */ if ((flags & HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM) && hwloc_bitmap_isset(tier2->nodeset, 0)) { /* node0 is not DRAM, and we assume that's not possible */ hwloc_debug(" node0 shouldn't have HBM BW\n"); return -1; } /* assume tier1 == DRAM and tier2 == HBM */ tier1->type = HWLOC_MEMORY_TIER_DRAM; tier2->type = HWLOC_MEMORY_TIER_HBM; hwloc_debug(" Success\n"); return 0; } static int hwloc__guess_memory_tiers_types(hwloc_topology_t topology __hwloc_attribute_unused, unsigned nr_tiers, struct hwloc_memory_tier_s *tiers) { unsigned long flags; const char *env; unsigned nr_unknown, nr_spm; struct hwloc_memory_tier_s *unknown_tier[2], *spm_tier; unsigned i; flags = 0; env = getenv("HWLOC_MEMTIERS_GUESS"); if (env) { if (!strcmp(env, "none")) return 0; /* by default, we don't guess anything unsure */ if (!strcmp(env, "all")) /* enable all typical cases */ flags = ~0UL; if (strstr(env, "spm_is_hbm")) { hwloc_debug("Assuming SPM-tier is HBM, ignore bandwidth\n"); flags |= HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM; } if (strstr(env, "node0_is_dram")) { hwloc_debug("Assuming node0 is DRAM\n"); flags |= HWLOC_GUESS_MEMTIERS_FLAG_NODE0_IS_DRAM; } } if (nr_tiers == 1) /* Likely DRAM only, but could also be HBM-only in non-SPM mode. * We cannot be sure, but it doesn't matter since there's a single tier. */ return 0; nr_unknown = nr_spm = 0; unknown_tier[0] = unknown_tier[1] = spm_tier = NULL; for(i=0; i DRAM or HBM? HBM won't be SPM on HBM-only CPUs * unknown + CXL DRAM => DRAM or HBM? */ if (nr_unknown == 2 && !nr_spm) { /* 2 unknown, could be DRAM + non-SPM HBM */ hwloc_debug(" Trying to guess 2 unknown tiers using BW\n"); hwloc__guess_dram_hbm_tiers(unknown_tier[0], unknown_tier[1], flags); } else if (nr_unknown == 1 && nr_spm == 1) { /* 1 unknown + 1 SPM, could be DRAM + SPM HBM */ hwloc_debug(" Trying to guess 1 unknown + 1 SPM tiers using BW\n"); hwloc__guess_dram_hbm_tiers(unknown_tier[0], spm_tier, flags); } if (flags & HWLOC_GUESS_MEMTIERS_FLAG_SPM_IS_HBM) { /* force mark SPM as HBM */ for(i=0; ios_index)) { const char *subtype = hwloc_memory_tier_type_snprintf(tiers[j].type); if (!node->subtype || force) { /* don't overwrite the existing subtype unless forced */ if (subtype) { /* don't set a subtype for unknown tiers */ hwloc_debug(" marking node L#%u P#%u as %s (was %s)\n", node->logical_index, node->os_index, subtype, node->subtype); free(node->subtype); node->subtype = strdup(subtype); } } else hwloc_debug(" node L#%u P#%u already marked as %s, not setting %s\n", node->logical_index, node->os_index, node->subtype, subtype); if (nr_tiers > 1) { char tmp[20]; snprintf(tmp, sizeof(tmp), "%u", j); hwloc__add_info_nodup(&node->infos, &node->infos_count, "MemoryTier", tmp, 1); } break; /* each node is in a single tier */ } } } if (nr_tiers > 1) { hwloc_obj_t root = hwloc_get_root_obj(topology); char tmp[20]; snprintf(tmp, sizeof(tmp), "%u", nr_tiers); hwloc__add_info_nodup(&root->infos, &root->infos_count, "MemoryTiersNr", tmp, 1); } } int hwloc_internal_memattrs_guess_memory_tiers(hwloc_topology_t topology, int force_subtype) { struct hwloc_memory_tier_s *tiers; unsigned nr_tiers; unsigned i; const char *env; env = getenv("HWLOC_MEMTIERS"); if (env) { if (!strcmp(env, "none")) goto out; tiers = hwloc__force_memory_tiers(topology, &nr_tiers, env); if (tiers) { assert(nr_tiers > 0); force_subtype = 1; goto ready; } } tiers = hwloc__group_memory_tiers(topology, &nr_tiers); if (!tiers) goto out; hwloc__guess_memory_tiers_types(topology, nr_tiers, tiers); /* sort tiers by BW first, then by type */ hwloc_debug("Sorting memory tiers...\n"); qsort(tiers, nr_tiers, sizeof(*tiers), compare_tiers_by_bw_and_type); ready: #ifdef HWLOC_DEBUG for(i=0; i