Skip to content

Commit 950b488

Browse files
committed
Improve hetero node handling
Compression has gotten very good now, so just send the topology from every node (amounts to a few kbytes). Use the xml string as the signature. NOTE: have to strip identification tags from both the base topology and the xml string as HWLOC is adding things that make the topology unique to both the node AND the process that creates the xml representation. Signed-off-by: Ralph Castain <rhc@pmix.org>
1 parent c0ca4a0 commit 950b488

27 files changed

Lines changed: 369 additions & 957 deletions

src/docs/prrte-rst-content/Makefile.am

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
#
22
# Copyright (c) 2023-2025 Jeffrey M. Squyres. All rights reserved.
3-
# Copyright (c) 2023-2025 Nanook Consulting All rights reserved.
3+
# Copyright (c) 2023-2026 Nanook Consulting All rights reserved.
44
#
55
# $COPYRIGHT$
66
#
@@ -33,7 +33,6 @@ dist_rst_DATA = \
3333
cli-dvm.rst \
3434
cli-forward-signals.rst \
3535
cli-general.rst \
36-
cli-hetero-nodes.rst \
3736
cli-launcher-hostfile.rst \
3837
cli-leave-session-attached.rst \
3938
cli-map-by.rst \

src/docs/prrte-rst-content/cli-hetero-nodes.rst

Lines changed: 0 additions & 24 deletions
This file was deleted.

src/hwloc/hwloc-internal.h

Lines changed: 3 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@
77
* Copyright (c) 2018 Research Organization for Information Science
88
* and Technology (RIST). All rights reserved.
99
*
10-
* Copyright (c) 2021-2025 Nanook Consulting All rights reserved.
10+
* Copyright (c) 2021-2026 Nanook Consulting All rights reserved.
1111
* Copyright (c) 2023 Advanced Micro Devices, Inc. All rights reserved.
1212
* $COPYRIGHT$
1313
*
@@ -259,6 +259,8 @@ PRTE_EXPORT int prte_hwloc_base_get_topology(void);
259259
*/
260260
PRTE_EXPORT int prte_hwloc_base_set_topology(char *topofile);
261261

262+
PRTE_EXPORT int prte_hwloc_get_xml(hwloc_topology_t t, char **xml, int *len);
263+
262264
PRTE_EXPORT void prte_hwloc_base_setup_summary(hwloc_topology_t topo);
263265

264266
PRTE_EXPORT hwloc_cpuset_t prte_hwloc_base_generate_cpuset(hwloc_topology_t topo,
@@ -345,10 +347,6 @@ PRTE_EXPORT void prte_hwloc_get_binding_info(hwloc_const_cpuset_t cpuset,
345347
PRTE_EXPORT hwloc_obj_t prte_hwloc_base_get_pu(hwloc_topology_t topo, bool use_hwthread_cpus,
346348
int lid);
347349

348-
/* get the topology "signature" so we can check for differences - caller
349-
* if responsible for freeing the returned string */
350-
PRTE_EXPORT char *prte_hwloc_base_get_topo_signature(hwloc_topology_t topo);
351-
352350
/* get a string describing the locality of a given process */
353351
PRTE_EXPORT char *prte_hwloc_base_get_locality_string(hwloc_topology_t topo, char *bitmap);
354352

src/hwloc/hwloc_base_util.c

Lines changed: 63 additions & 194 deletions
Original file line numberDiff line numberDiff line change
@@ -360,6 +360,65 @@ static void fill_cache_line_size(void)
360360
}
361361
}
362362

363+
int prte_hwloc_get_xml(hwloc_topology_t t, char **xml, int *len)
364+
{
365+
char *xmlbuffer, *token;
366+
char **output = NULL, *xmlout;
367+
int length;
368+
369+
if (0 != hwloc_topology_export_xmlbuffer(t, &xmlbuffer, &length, 0)) {
370+
return PRTE_ERROR;
371+
}
372+
// need to filter the xml to remove undesirable cruft
373+
token = strtok(xmlbuffer, "\n");
374+
while (token != NULL) {
375+
if (NULL == strstr(token, "ProcessName")) {
376+
PMIx_Argv_append_nosize(&output, token);
377+
}
378+
token = strtok(NULL, "\n");
379+
}
380+
free(xmlbuffer);
381+
382+
xmlout = PMIx_Argv_join(output, '\n');
383+
PMIx_Argv_free(output);
384+
385+
*xml = xmlout;
386+
*len = strlen(xmlout);
387+
return PRTE_SUCCESS;
388+
}
389+
390+
static void strip_topo(void)
391+
{
392+
hwloc_obj_t obj;
393+
unsigned j, k;
394+
395+
/* remove the hostname from the topology. Unfortunately, hwloc
396+
* decided to add the source hostname to the "topology", thus
397+
* rendering it unusable as a pure topological description. So
398+
* we remove that information here.
399+
*/
400+
obj = hwloc_get_root_obj(prte_hwloc_topology);
401+
for (k = 0; k < obj->infos_count; k++) {
402+
if (NULL == obj->infos ||
403+
NULL == obj->infos[k].name ||
404+
NULL == obj->infos[k].value) {
405+
continue;
406+
}
407+
if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
408+
free(obj->infos[k].name);
409+
free(obj->infos[k].value);
410+
/* left justify the array */
411+
for (j = k; j < obj->infos_count - 1; j++) {
412+
obj->infos[j] = obj->infos[j + 1];
413+
}
414+
obj->infos[obj->infos_count - 1].name = NULL;
415+
obj->infos[obj->infos_count - 1].value = NULL;
416+
obj->infos_count--;
417+
break;
418+
}
419+
}
420+
}
421+
363422
int prte_hwloc_base_get_topology(void)
364423
{
365424
int rc;
@@ -381,6 +440,7 @@ int prte_hwloc_base_get_topology(void)
381440
PRTE_ERROR_LOG(PRTE_ERR_NOT_SUPPORTED);
382441
return PRTE_ERR_NOT_SUPPORTED;
383442
}
443+
384444
} else {
385445
pmix_output_verbose(1, prte_hwloc_base_output,
386446
"hwloc:base loading topology from file %s",
@@ -391,6 +451,9 @@ int prte_hwloc_base_get_topology(void)
391451
prte_hwloc_synthetic_topo = true;
392452
}
393453

454+
// clean out cruft
455+
strip_topo();
456+
394457
/* fill prte_cache_line_size global with the smallest L1 cache
395458
line size */
396459
fill_cache_line_size();
@@ -402,8 +465,6 @@ int prte_hwloc_base_get_topology(void)
402465

403466
int prte_hwloc_base_set_topology(char *topofile)
404467
{
405-
hwloc_obj_t obj;
406-
unsigned j, k;
407468
int rc;
408469
struct hwloc_topology_support *support;
409470

@@ -445,36 +506,6 @@ int prte_hwloc_base_set_topology(char *topofile)
445506
return PRTE_ERR_NOT_SUPPORTED;
446507
}
447508

448-
/* remove the hostname from the topology. Unfortunately, hwloc
449-
* decided to add the source hostname to the "topology", thus
450-
* rendering it unusable as a pure topological description. So
451-
* we remove that information here.
452-
*/
453-
obj = hwloc_get_root_obj(prte_hwloc_topology);
454-
for (k = 0; k < obj->infos_count; k++) {
455-
if (NULL == obj->infos ||
456-
NULL == obj->infos[k].name ||
457-
NULL == obj->infos[k].value) {
458-
continue;
459-
}
460-
if (0 == strncmp(obj->infos[k].name, "HostName", strlen("HostName"))) {
461-
free(obj->infos[k].name);
462-
free(obj->infos[k].value);
463-
/* left justify the array */
464-
for (j = k; j < obj->infos_count - 1; j++) {
465-
obj->infos[j] = obj->infos[j + 1];
466-
}
467-
obj->infos[obj->infos_count - 1].name = NULL;
468-
obj->infos[obj->infos_count - 1].value = NULL;
469-
obj->infos_count--;
470-
break;
471-
}
472-
}
473-
474-
/* fill prte_cache_line_size global with the smallest L1 cache
475-
line size */
476-
fill_cache_line_size();
477-
478509
/* all done */
479510
return PRTE_SUCCESS;
480511
}
@@ -1642,168 +1673,6 @@ char *prte_hwloc_base_cset2str(hwloc_const_cpuset_t cpuset,
16421673
return result;
16431674
}
16441675

1645-
static char* construct_range(char **vals)
1646-
{
1647-
int n, cnt;
1648-
char buf[4096], **ans = NULL, *str;
1649-
1650-
if (NULL == vals) {
1651-
str = strdup("-");
1652-
return str;
1653-
}
1654-
1655-
cnt = 1;
1656-
for (n=0; NULL != vals[n]; n++) {
1657-
if (NULL == vals[n+1]) {
1658-
if (1 == cnt) {
1659-
PMIx_Argv_append_nosize(&ans, vals[n]);
1660-
} else {
1661-
snprintf(buf, 4096, "%d:%s", cnt, vals[n]);
1662-
PMIx_Argv_append_nosize(&ans, buf);
1663-
}
1664-
break;
1665-
}
1666-
if (0 == strcmp(vals[n], vals[n+1])) {
1667-
cnt++;
1668-
} else {
1669-
if (1 == cnt) {
1670-
PMIx_Argv_append_nosize(&ans, vals[n]);
1671-
} else {
1672-
snprintf(buf, 4096, "%d:%s", cnt, vals[n]);
1673-
PMIx_Argv_append_nosize(&ans, buf);
1674-
}
1675-
cnt = 1;
1676-
}
1677-
}
1678-
1679-
str = PMIx_Argv_join(ans, ',');
1680-
return str;
1681-
}
1682-
1683-
char *prte_hwloc_base_get_topo_signature(hwloc_topology_t topo)
1684-
{
1685-
char *sig = NULL, *arch = NULL, *endian;
1686-
hwloc_obj_t obj;
1687-
unsigned i, nobjs, n, ncpus;
1688-
char buffer[4096], **scratch = NULL, **answer = NULL;
1689-
int rc;
1690-
hwloc_cpuset_t avail, available;
1691-
1692-
rc = hwloc_topology_export_synthetic(topo, buffer, 4096,
1693-
HWLOC_TOPOLOGY_EXPORT_SYNTHETIC_FLAG_NO_ATTRS);
1694-
if (0 > rc) {
1695-
// create out own signature - start with packages
1696-
scratch = NULL;
1697-
available = hwloc_bitmap_alloc();
1698-
avail = prte_hwloc_base_filter_cpus(prte_hwloc_topology);
1699-
nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_PACKAGE);
1700-
for (n=0; n < nobjs; n++) {
1701-
obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_PACKAGE, n);
1702-
hwloc_bitmap_and(available, avail, obj->cpuset);
1703-
ncpus = hwloc_bitmap_weight(available);
1704-
snprintf(buffer, 4096, "%u", ncpus);
1705-
PMIx_Argv_append_nosize(&scratch, buffer);
1706-
}
1707-
sig = construct_range(scratch);
1708-
snprintf(buffer, 4096, "PKG[%s]", sig);
1709-
free(sig);
1710-
PMIx_Argv_free(scratch);
1711-
PMIx_Argv_append_nosize(&answer, buffer);
1712-
// now account for NUMA
1713-
scratch = NULL;
1714-
nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_NUMANODE);
1715-
for (n=0; n < nobjs; n++) {
1716-
obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_NUMANODE, n);
1717-
hwloc_bitmap_and(available, avail, obj->cpuset);
1718-
ncpus = hwloc_bitmap_weight(available);
1719-
snprintf(buffer, 4096, "%u", ncpus);
1720-
PMIx_Argv_append_nosize(&scratch, buffer);
1721-
}
1722-
sig = construct_range(scratch);
1723-
snprintf(buffer, 4096, "NUMA[%s]", sig);
1724-
free(sig);
1725-
PMIx_Argv_free(scratch);
1726-
PMIx_Argv_append_nosize(&answer, buffer);
1727-
// L3caches
1728-
scratch = NULL;
1729-
nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L3CACHE);
1730-
for (n=0; n < nobjs; n++) {
1731-
obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_L3CACHE, n);
1732-
hwloc_bitmap_and(available, avail, obj->cpuset);
1733-
ncpus = hwloc_bitmap_weight(available);
1734-
snprintf(buffer, 4096, "%u", ncpus);
1735-
PMIx_Argv_append_nosize(&scratch, buffer);
1736-
}
1737-
sig = construct_range(scratch);
1738-
snprintf(buffer, 4096, "L3[%s]", sig);
1739-
free(sig);
1740-
PMIx_Argv_free(scratch);
1741-
PMIx_Argv_append_nosize(&answer, buffer);
1742-
// L2caches
1743-
scratch = NULL;
1744-
nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L2CACHE);
1745-
for (n=0; n < nobjs; n++) {
1746-
obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_L2CACHE, n);
1747-
hwloc_bitmap_and(available, avail, obj->cpuset);
1748-
ncpus = hwloc_bitmap_weight(available);
1749-
snprintf(buffer, 4096, "%u", ncpus);
1750-
PMIx_Argv_append_nosize(&scratch, buffer);
1751-
}
1752-
sig = construct_range(scratch);
1753-
snprintf(buffer, 4096, "L2[%s]", sig);
1754-
free(sig);
1755-
PMIx_Argv_free(scratch);
1756-
PMIx_Argv_append_nosize(&answer, buffer);
1757-
// L1caches
1758-
scratch = NULL;
1759-
nobjs = prte_hwloc_base_get_nbobjs_by_type(topo, HWLOC_OBJ_L1CACHE);
1760-
for (n=0; n < nobjs; n++) {
1761-
obj = prte_hwloc_base_get_obj_by_type(prte_hwloc_topology, HWLOC_OBJ_L1CACHE, n);
1762-
hwloc_bitmap_and(available, avail, obj->cpuset);
1763-
ncpus = hwloc_bitmap_weight(available);
1764-
snprintf(buffer, 4096, "%u", ncpus);
1765-
PMIx_Argv_append_nosize(&scratch, buffer);
1766-
}
1767-
sig = construct_range(scratch);
1768-
snprintf(buffer, 4096, "L1[%s]", sig);
1769-
free(sig);
1770-
PMIx_Argv_free(scratch);
1771-
PMIx_Argv_append_nosize(&answer, buffer);
1772-
// setup the signature
1773-
sig = PMIx_Argv_join(answer, ';');
1774-
snprintf(buffer, 4096, "%s", sig);
1775-
free(sig);
1776-
PMIx_Argv_free(answer);
1777-
hwloc_bitmap_free(avail);
1778-
}
1779-
1780-
/* get the root object so we can add the processor architecture */
1781-
obj = hwloc_get_root_obj(topo);
1782-
for (i = 0; i < obj->infos_count; i++) {
1783-
if (0 == strcmp(obj->infos[i].name, "Architecture")) {
1784-
arch = obj->infos[i].value;
1785-
break;
1786-
}
1787-
}
1788-
if (NULL == arch) {
1789-
arch = "unknown";
1790-
}
1791-
1792-
#ifdef __BYTE_ORDER
1793-
# if __BYTE_ORDER == __LITTLE_ENDIAN
1794-
endian = "le";
1795-
# else
1796-
endian = "be";
1797-
# endif
1798-
#else
1799-
endian = "unknown";
1800-
#endif
1801-
1802-
// form the final signature
1803-
pmix_asprintf(&sig, "%s:%s:%s", buffer, arch, endian);
1804-
return sig;
1805-
}
1806-
18071676
static int prte_hwloc_base_get_locality_string_by_depth(hwloc_topology_t topo, int d,
18081677
hwloc_cpuset_t cpuset,
18091678
hwloc_cpuset_t result)

0 commit comments

Comments
 (0)