[mvapich-discuss] Problem with MPI_Init

Matthew Koop koop at cse.ohio-state.edu
Thu Dec 7 11:51:57 EST 2006


Michael,

Thanks for the additional information. The issue is that the optimized
shared memory collectives added in MVAPICH2 0.9.8 were not added with all
of the needed code for supporting multi-threading.

Our testing for multithreading was done with the optimized SMP collectives
turned off. If you'd like to use it that way please change on line 118 of
make.mvapich.gen2:

COLL_FLAG="-D_SHMEM_COLL_"
to
COLL_FLAG="-"

Alternatively, I've made the changes needed to make the shared memory
collectives enabled with multithreading. I've attached the patch to this
mail and I've also checked it into the MVAPICH2 svn trunk. With this there
is no reason to make the above change.

In the main source directory:
patch -p0 < smp_coll_thread.patch

Please let us know if you have any other issues. Thanks for letting us
know!

Matt

> I tried now different configurations with and without
> multithreading-support.
>
> Values set in make.mvapch2.gen2:
> OPT_FLAG=-O2
> ENABLE_CKPT=no
> PTMALLOC=yes
> MULTI_THREAD=yes
> RDMA_CM_SUPPORT=no
> ROMIO=yes
>
> The result is  that my program
> and osu_latency  hang in MPI_Init as
> mentioned in my original posting.
>
> When I change MULTI_THREAD=yes to
> MULTI_THREAD=no, the result is that
> the programs work.
-------------- next part --------------
Index: src/mpi/coll/allreduce.c
===================================================================
--- src/mpi/coll/allreduce.c	(revision 820)
+++ src/mpi/coll/allreduce.c	(working copy)
@@ -720,6 +720,7 @@
             /* intracommunicator */
 #ifdef _SMP_
             if (enable_shmem_collectives){
+                MPIR_Nest_incr();
                 mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb, &true_extent);  
                 MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPID_Datatype_get_extent_macro(datatype, extent);
@@ -751,9 +752,11 @@
                         else
                             uop = (MPI_User_function *) op_ptr->function.f77_function;
                 }
+                MPIR_Nest_decr();
             }
             if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_ALLREDUCE_THRESHOLD)&&
                     (disable_shmem_allreduce == 0) &&(is_commutative) &&(enable_shmem_collectives) &&(check_comm_registry(comm))){
+                MPIR_Nest_incr();
                 my_rank = comm_ptr->rank;
                 MPI_Comm_size(comm, &total_size);
                 shmem_comm = comm_ptr->shmem_comm;
@@ -764,6 +767,7 @@
 
                 leader_comm = comm_ptr->leader_comm;
                 MPID_Comm_get_ptr(leader_comm, leader_commptr);
+                MPIR_Nest_decr();
 
 
                 if (local_rank == 0){
@@ -816,7 +820,9 @@
 
                 /* Broadcasting the mesage from leader to the rest*/
                 if (local_size > 1){
+                    MPIR_Nest_incr();
                     MPI_Bcast(recvbuf, count, datatype, 0, shmem_comm);
+                    MPIR_Nest_decr();
                 }
 
             }
Index: src/mpi/coll/reduce.c
===================================================================
--- src/mpi/coll/reduce.c	(revision 820)
+++ src/mpi/coll/reduce.c	(working copy)
@@ -866,6 +866,7 @@
             /* intracommunicator */
 #ifdef _SMP_
             if (enable_shmem_collectives){
+                MPIR_Nest_incr();
                 mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb, &true_extent);  
                 MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
                 MPID_Datatype_get_extent_macro(datatype, extent);
@@ -897,10 +898,12 @@
                         else
                             uop = (MPI_User_function *) op_ptr->function.f77_function;
                 }
+                MPIR_Nest_decr();
             }
 
             if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_REDUCE_THRESHOLD)&&
                     (disable_shmem_reduce == 0) &&(is_commutative==1) &&(enable_shmem_collectives)&&(check_comm_registry(comm))){
+                MPIR_Nest_incr();
                 my_rank = comm_ptr->rank;
                 MPI_Comm_size(comm, &total_size);
                 shmem_comm = comm_ptr->shmem_comm;
@@ -911,14 +914,17 @@
 
                 leader_comm = comm_ptr->leader_comm;
                 MPID_Comm_get_ptr(leader_comm, leader_commptr);
+                MPIR_Nest_decr();
 
 
                 if (local_rank == 0){
                     global_rank = leader_commptr->rank;
                     MPIU_CHKLMEM_MALLOC(tmpbuf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer");
                     tmpbuf = (void *)((char*)tmpbuf - true_lb);
+                    MPIR_Nest_incr();
                     mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, tmpbuf,
                             count, datatype);
+                    MPIR_Nest_decr();
                 }
 
                 if (local_size > 1){
@@ -949,20 +955,26 @@
 
                     leader_root = comm_ptr->leader_rank[leader_of_root];
                     if (local_size != total_size){
+                        MPIR_Nest_incr();
                         mpi_errno = MPIR_Reduce(tmpbuf, recvbuf, count, datatype,
                                 op, leader_root, leader_commptr); 
+                        MPIR_Nest_decr();
                     }
                     else if (root == my_rank){
+                        MPIR_Nest_incr();
                         mpi_errno = MPIR_Localcopy(tmpbuf, count, datatype, recvbuf,
                                 count, datatype);
+                        MPIR_Nest_decr();
                         goto fn_exit;
                     }
 
                 }
                 else{
                     local_buf = (char*)shmem_buf + stride*local_rank;
+                    MPIR_Nest_incr();
                     mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, local_buf,
                             count, datatype);
+                    MPIR_Nest_decr();
                     MPIDI_CH3I_SHMEM_COLL_SetGatherComplete(local_size, local_rank, shmem_comm_rank);
                 }
 
@@ -970,6 +982,7 @@
                 /* Copying data from leader to the root incase leader is
                  * not the root */
                 if (local_size > 1){
+                    MPIR_Nest_incr();
                     /* Send the message to the root if the leader is not the
                      * root of the reduce operation */
                     if ((local_rank == 0) && (root != my_rank) && (leader_root == global_rank)){
@@ -986,7 +999,9 @@
                     if ((local_rank != 0) && (root == my_rank)){
                         mpi_errno = MPIC_Recv ( recvbuf, count, datatype, leader_of_root, 
                                 MPIR_REDUCE_TAG, comm, &status);
+
                     }
+                    MPIR_Nest_decr();
                 }
 
             }
Index: src/mpi/comm/comm_dup.c
===================================================================
--- src/mpi/comm/comm_dup.c	(revision 820)
+++ src/mpi/comm/comm_dup.c	(working copy)
@@ -182,6 +182,7 @@
     if (enable_shmem_collectives){
         if (split_comm == 1){
             if (*newcomm != MPI_COMM_NULL){
+                MPIR_Nest_incr();
                 MPI_Comm_test_inter(*newcomm, &flag);
                 if (flag == 0){
                     int my_id, size;
@@ -191,6 +192,7 @@
                     create_2level_comm(*newcomm, size, my_id);
                     split_comm = 1;
                 }
+                MPIR_Nest_decr();
             }
         }
     }
Index: src/mpi/comm/comm_split.c
===================================================================
--- src/mpi/comm/comm_split.c	(revision 820)
+++ src/mpi/comm/comm_split.c	(working copy)
@@ -260,6 +260,7 @@
     if (enable_shmem_collectives){
         if (split_comm == 1){
             if (*newcomm != MPI_COMM_NULL){
+                MPIR_Nest_incr();
                 MPI_Comm_test_inter(*newcomm, &flag);
                 if (flag == 0){
                     int my_id, size;
@@ -269,6 +270,7 @@
                     create_2level_comm(*newcomm, size, my_id);
                     split_comm = 1;
                 }
+                MPIR_Nest_decr();
             }
         }
     }
Index: src/mpi/comm/comm_create.c
===================================================================
--- src/mpi/comm/comm_create.c	(revision 820)
+++ src/mpi/comm/comm_create.c	(working copy)
@@ -237,6 +237,7 @@
     if (enable_shmem_collectives){
         if (split_comm == 1){
             if (*newcomm != MPI_COMM_NULL){
+                MPIR_Nest_incr();
                 MPI_Comm_test_inter(*newcomm, &flag);
                 if (flag == 0){
                     int my_id, size;
@@ -246,6 +247,7 @@
                     create_2level_comm(*newcomm, size, my_id);
                     split_comm = 1;
                 }
+                MPIR_Nest_decr();
             }
         }
     }
Index: src/mpi/comm/create_2level_comm.c
===================================================================
--- src/mpi/comm/create_2level_comm.c	(revision 820)
+++ src/mpi/comm/create_2level_comm.c	(working copy)
@@ -35,6 +35,8 @@
 
     if (comm_count > MAX_ALLOWED_COMM) return;
 
+    MPIR_Nest_incr();
+
     int* shmem_group = malloc(sizeof(int) * size);
     if (NULL == shmem_group){
         printf("Couldn't malloc shmem_group\n");
@@ -144,6 +146,7 @@
     }
 
     ++comm_count;
+    MPIR_Nest_decr();
 }
 
 int check_comm_registry(MPI_Comm comm)
@@ -153,8 +156,10 @@
     int context_id = 0, i =0, my_rank, size;
     context_id = comm_ptr->context_id;
 
+    MPIR_Nest_incr();
     MPI_Comm_rank(comm, &my_rank);
     MPI_Comm_size(comm, &size);
+    MPIR_Nest_decr();
 
     for (i = 0; i < comm_registered; i++){
         if (comm_registry[i] == context_id){
Index: src/mpi/init/init.c
===================================================================
--- src/mpi/init/init.c	(revision 820)
+++ src/mpi/init/init.c	(working copy)
@@ -131,12 +131,14 @@
 
     if (enable_shmem_collectives){
     if (split_comm == 1){
+        MPIR_Nest_incr();
         int my_id, size;
         MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
         MPI_Comm_size(MPI_COMM_WORLD, &size);
         split_comm = 0;
         create_2level_comm(MPI_COMM_WORLD, size, my_id);
         split_comm = 1;
+        MPIR_Nest_decr();
     }
     }
 #endif


More information about the mvapich-discuss mailing list