[mvapich-discuss] SIGSEGV in cm_completion_handler
Alexander Melnikov
alex.i.melnikov at gmail.com
Thu Oct 3 04:21:02 EDT 2019
The mvapich2 library is multithreaded, so you should avoid using MT-unsafe
calls like setenv.
For example, using setenv in hwloc_bind.c sometimes leads to a program
crash on SIGSEGV. The call chain is as follows:
- in cm_completion_handler
thread: cm_handle_msg->cm_accept->cm_qp_create->cm_qp_conn_create->set_pkey_index->getenv
- in main thread: MPID_Init->MPIDI_CH3I_set_affinity->setenv
The problem was solved using the following patch:
---
mvapich2-2.3.2-orig/src/mpid/ch3/channels/common/src/affinity/hwloc_bind.c
2019-08-09 04:26:25.000000000 +0500
+++ mvapich2-2.3.2/src/mpid/ch3/channels/common/src/affinity/hwloc_bind.c
2019-08-19 09:58:34.352721117 +0500
@@ -2784,6 +2784,8 @@
int num_local_procs;
long N_CPUs_online;
mv2_arch_type arch_type;
+ int enforce_hybrid = 0;
+ int enforce_hybrid_numa = 0;
MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3I_SET_AFFINITY);
MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3I_SET_AFFINITY);
@@ -2805,13 +2807,15 @@
arch_type == MV2_ARCH_INTEL_PLATINUM_8160_2S_48 ||
arch_type == MV2_ARCH_AMD_EPYC_7551_64 /* EPYC */ ||
arch_type == MV2_ARCH_AMD_EPYC_7742_128 /* rome */) {
- setenv ("MV2_CPU_BINDING_POLICY", "hybrid", 0);
+ if (getenv("MV2_CPU_BINDING_POLICY") == NULL)
+ enforce_hybrid = 1;
/* if CPU is EPYC, further force hybrid_binding_policy to NUMA */
if (arch_type == MV2_ARCH_AMD_EPYC_7551_64 ||
arch_type == MV2_ARCH_AMD_EPYC_7742_128 /* rome */) {
- setenv ("MV2_HYBRID_BINDING_POLICY", "numa", 0);
- }
+ if (getenv("MV2_HYBRID_BINDING_POLICY") == NULL)
+ enforce_hybrid_numa = 1;
+ }
}
if (mv2_enable_affinity && (num_local_procs > N_CPUs_online)) {
@@ -2844,7 +2848,8 @@
if (mv2_enable_affinity && (value = getenv("MV2_CPU_MAPPING")) ==
NULL) {
/* Affinity is on and the user has not specified a mapping string
*/
- if ((value = getenv("MV2_CPU_BINDING_POLICY")) != NULL) {
+ value = enforce_hybrid ? "hybrid" :
getenv("MV2_CPU_BINDING_POLICY");
+ if (value != NULL) {
/* User has specified a binding policy */
if (!strcmp(value, "bunch") || !strcmp(value, "BUNCH")) {
mv2_binding_policy = POLICY_BUNCH;
@@ -2900,7 +2905,8 @@
/* since mv2_threads_per_proc > 0, check if any threads
* binding policy have been explicitly specified */
- if ((value = getenv("MV2_HYBRID_BINDING_POLICY")) !=
NULL) {
+ value = enforce_hybrid_numa ? "numa" :
getenv("MV2_HYBRID_BINDING_POLICY");
+ if (value != NULL) {
if (!strcmp(value, "linear") || !strcmp(value,
"LINEAR")) {
mv2_hybrid_binding_policy = HYBRID_LINEAR;
} else if (!strcmp(value, "compact") ||
!strcmp(value, "COMPACT")) {
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.cse.ohio-state.edu/pipermail/mvapich-discuss/attachments/20191003/25cd76c5/attachment.html>
More information about the mvapich-discuss
mailing list