[mvapich-discuss] Problem with MPI_Init
Matthew Koop
koop at cse.ohio-state.edu
Thu Dec 7 11:51:57 EST 2006
Michael,
Thanks for the additional information. The issue is that the optimized
shared memory collectives added in MVAPICH2 0.9.8 were not added with all
of the needed code for supporting multi-threading.
Our testing for multithreading was done with the optimized SMP collectives
turned off. If you'd like to use it that way please change on line 118 of
make.mvapich.gen2:
COLL_FLAG="-D_SHMEM_COLL_"
to
COLL_FLAG="-"
Alternatively, I've made the changes needed to make the shared memory
collectives enabled with multithreading. I've attached the patch to this
mail and I've also checked it into the MVAPICH2 svn trunk. With this there
is no reason to make the above change.
In the main source directory:
patch -p0 < smp_coll_thread.patch
Please let us know if you have any other issues. Thanks for letting us
know!
Matt
> I tried now different configurations with and without
> multithreading-support.
>
> Values set in make.mvapch2.gen2:
> OPT_FLAG=-O2
> ENABLE_CKPT=no
> PTMALLOC=yes
> MULTI_THREAD=yes
> RDMA_CM_SUPPORT=no
> ROMIO=yes
>
> The result is that my program
> and osu_latency hang in MPI_Init as
> mentioned in my original posting.
>
> When I change MULTI_THREAD=yes to
> MULTI_THREAD=no, the result is that
> the programs work.
-------------- next part --------------
Index: src/mpi/coll/allreduce.c
===================================================================
--- src/mpi/coll/allreduce.c (revision 820)
+++ src/mpi/coll/allreduce.c (working copy)
@@ -720,6 +720,7 @@
/* intracommunicator */
#ifdef _SMP_
if (enable_shmem_collectives){
+ MPIR_Nest_incr();
mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb, &true_extent);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
MPID_Datatype_get_extent_macro(datatype, extent);
@@ -751,9 +752,11 @@
else
uop = (MPI_User_function *) op_ptr->function.f77_function;
}
+ MPIR_Nest_decr();
}
if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_ALLREDUCE_THRESHOLD)&&
(disable_shmem_allreduce == 0) &&(is_commutative) &&(enable_shmem_collectives) &&(check_comm_registry(comm))){
+ MPIR_Nest_incr();
my_rank = comm_ptr->rank;
MPI_Comm_size(comm, &total_size);
shmem_comm = comm_ptr->shmem_comm;
@@ -764,6 +767,7 @@
leader_comm = comm_ptr->leader_comm;
MPID_Comm_get_ptr(leader_comm, leader_commptr);
+ MPIR_Nest_decr();
if (local_rank == 0){
@@ -816,7 +820,9 @@
/* Broadcasting the mesage from leader to the rest*/
if (local_size > 1){
+ MPIR_Nest_incr();
MPI_Bcast(recvbuf, count, datatype, 0, shmem_comm);
+ MPIR_Nest_decr();
}
}
Index: src/mpi/coll/reduce.c
===================================================================
--- src/mpi/coll/reduce.c (revision 820)
+++ src/mpi/coll/reduce.c (working copy)
@@ -866,6 +866,7 @@
/* intracommunicator */
#ifdef _SMP_
if (enable_shmem_collectives){
+ MPIR_Nest_incr();
mpi_errno = NMPI_Type_get_true_extent(datatype, &true_lb, &true_extent);
MPIU_ERR_CHKANDJUMP((mpi_errno), mpi_errno, MPI_ERR_OTHER, "**fail");
MPID_Datatype_get_extent_macro(datatype, extent);
@@ -897,10 +898,12 @@
else
uop = (MPI_User_function *) op_ptr->function.f77_function;
}
+ MPIR_Nest_decr();
}
if ((comm_ptr->shmem_coll_ok == 1)&&(stride < SHMEM_COLL_REDUCE_THRESHOLD)&&
(disable_shmem_reduce == 0) &&(is_commutative==1) &&(enable_shmem_collectives)&&(check_comm_registry(comm))){
+ MPIR_Nest_incr();
my_rank = comm_ptr->rank;
MPI_Comm_size(comm, &total_size);
shmem_comm = comm_ptr->shmem_comm;
@@ -911,14 +914,17 @@
leader_comm = comm_ptr->leader_comm;
MPID_Comm_get_ptr(leader_comm, leader_commptr);
+ MPIR_Nest_decr();
if (local_rank == 0){
global_rank = leader_commptr->rank;
MPIU_CHKLMEM_MALLOC(tmpbuf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer");
tmpbuf = (void *)((char*)tmpbuf - true_lb);
+ MPIR_Nest_incr();
mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, tmpbuf,
count, datatype);
+ MPIR_Nest_decr();
}
if (local_size > 1){
@@ -949,20 +955,26 @@
leader_root = comm_ptr->leader_rank[leader_of_root];
if (local_size != total_size){
+ MPIR_Nest_incr();
mpi_errno = MPIR_Reduce(tmpbuf, recvbuf, count, datatype,
op, leader_root, leader_commptr);
+ MPIR_Nest_decr();
}
else if (root == my_rank){
+ MPIR_Nest_incr();
mpi_errno = MPIR_Localcopy(tmpbuf, count, datatype, recvbuf,
count, datatype);
+ MPIR_Nest_decr();
goto fn_exit;
}
}
else{
local_buf = (char*)shmem_buf + stride*local_rank;
+ MPIR_Nest_incr();
mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, local_buf,
count, datatype);
+ MPIR_Nest_decr();
MPIDI_CH3I_SHMEM_COLL_SetGatherComplete(local_size, local_rank, shmem_comm_rank);
}
@@ -970,6 +982,7 @@
/* Copying data from leader to the root incase leader is
* not the root */
if (local_size > 1){
+ MPIR_Nest_incr();
/* Send the message to the root if the leader is not the
* root of the reduce operation */
if ((local_rank == 0) && (root != my_rank) && (leader_root == global_rank)){
@@ -986,7 +999,9 @@
if ((local_rank != 0) && (root == my_rank)){
mpi_errno = MPIC_Recv ( recvbuf, count, datatype, leader_of_root,
MPIR_REDUCE_TAG, comm, &status);
+
}
+ MPIR_Nest_decr();
}
}
Index: src/mpi/comm/comm_dup.c
===================================================================
--- src/mpi/comm/comm_dup.c (revision 820)
+++ src/mpi/comm/comm_dup.c (working copy)
@@ -182,6 +182,7 @@
if (enable_shmem_collectives){
if (split_comm == 1){
if (*newcomm != MPI_COMM_NULL){
+ MPIR_Nest_incr();
MPI_Comm_test_inter(*newcomm, &flag);
if (flag == 0){
int my_id, size;
@@ -191,6 +192,7 @@
create_2level_comm(*newcomm, size, my_id);
split_comm = 1;
}
+ MPIR_Nest_decr();
}
}
}
Index: src/mpi/comm/comm_split.c
===================================================================
--- src/mpi/comm/comm_split.c (revision 820)
+++ src/mpi/comm/comm_split.c (working copy)
@@ -260,6 +260,7 @@
if (enable_shmem_collectives){
if (split_comm == 1){
if (*newcomm != MPI_COMM_NULL){
+ MPIR_Nest_incr();
MPI_Comm_test_inter(*newcomm, &flag);
if (flag == 0){
int my_id, size;
@@ -269,6 +270,7 @@
create_2level_comm(*newcomm, size, my_id);
split_comm = 1;
}
+ MPIR_Nest_decr();
}
}
}
Index: src/mpi/comm/comm_create.c
===================================================================
--- src/mpi/comm/comm_create.c (revision 820)
+++ src/mpi/comm/comm_create.c (working copy)
@@ -237,6 +237,7 @@
if (enable_shmem_collectives){
if (split_comm == 1){
if (*newcomm != MPI_COMM_NULL){
+ MPIR_Nest_incr();
MPI_Comm_test_inter(*newcomm, &flag);
if (flag == 0){
int my_id, size;
@@ -246,6 +247,7 @@
create_2level_comm(*newcomm, size, my_id);
split_comm = 1;
}
+ MPIR_Nest_decr();
}
}
}
Index: src/mpi/comm/create_2level_comm.c
===================================================================
--- src/mpi/comm/create_2level_comm.c (revision 820)
+++ src/mpi/comm/create_2level_comm.c (working copy)
@@ -35,6 +35,8 @@
if (comm_count > MAX_ALLOWED_COMM) return;
+ MPIR_Nest_incr();
+
int* shmem_group = malloc(sizeof(int) * size);
if (NULL == shmem_group){
printf("Couldn't malloc shmem_group\n");
@@ -144,6 +146,7 @@
}
++comm_count;
+ MPIR_Nest_decr();
}
int check_comm_registry(MPI_Comm comm)
@@ -153,8 +156,10 @@
int context_id = 0, i =0, my_rank, size;
context_id = comm_ptr->context_id;
+ MPIR_Nest_incr();
MPI_Comm_rank(comm, &my_rank);
MPI_Comm_size(comm, &size);
+ MPIR_Nest_decr();
for (i = 0; i < comm_registered; i++){
if (comm_registry[i] == context_id){
Index: src/mpi/init/init.c
===================================================================
--- src/mpi/init/init.c (revision 820)
+++ src/mpi/init/init.c (working copy)
@@ -131,12 +131,14 @@
if (enable_shmem_collectives){
if (split_comm == 1){
+ MPIR_Nest_incr();
int my_id, size;
MPI_Comm_rank(MPI_COMM_WORLD, &my_id);
MPI_Comm_size(MPI_COMM_WORLD, &size);
split_comm = 0;
create_2level_comm(MPI_COMM_WORLD, size, my_id);
split_comm = 1;
+ MPIR_Nest_decr();
}
}
#endif
More information about the mvapich-discuss
mailing list