[mvapich-discuss] Strange error with MPI_REDUCE

amith rajith mamidala mamidala at cse.ohio-state.edu
Sun Dec 30 13:31:45 EST 2007


Hi Christian,

I am attaching the patch to MPI_REDUCE error with MVAPICH2. This patch is
almost similar to the one posted earlier. I tested this out with my own
test case, also attached with this email. Can you please test if this
works?


Thanks,
Amith.

On Fri, 21 Dec 2007, Christian Boehme wrote:

> Hi Amith,
>
> amith rajith mamidala schrieb:
> > If you are using MVAPICH1, I am attaching a minor patch related to freeing
> > of memory with MPI_REDUCE. Can you apply this one too?
> >
>
> The error regarding the wrong behavior of MPI_REDUCE was fixed with your
> previous patch. How should I test this one?
>
> >> I am attaching the patch for MVAPICH2 (0.9.8) along with this mail. Can
> >> you please try this out?
> >>
>
> Thanks a lot. However, this patch did not seem to work, the behavior of
> MPI_REDUCE is still as described before.
>
> I'm gone for this year, so I won't get back to this problem before Jan,
> 7th. Have a good start into 2008!
>
> Christian
>
>
-------------- next part --------------
Index: reduce.c
===================================================================
--- reduce.c	(revision 1746)
+++ reduce.c	(working copy)
@@ -729,14 +729,14 @@
     MPI_Comm shmem_comm, leader_comm;
     MPID_Comm *shmem_commptr = 0, *leader_commptr = 0;
     int local_rank = -1, global_rank = -1, local_size=0, my_rank;
-    void* local_buf, *tmpbuf;
+    void* local_buf, *tmpbuf, *tmpbuf1;
     MPI_Aint   true_lb, true_extent, extent;
     MPI_User_function *uop;
     int stride = 0, i, is_commutative, size;
     MPID_Op *op_ptr;
     MPI_Status status;
     int leader_root, total_size, shmem_comm_rank;
-    MPIU_CHKLMEM_DECL(1);
+    MPIU_CHKLMEM_DECL(2);
 #ifdef HAVE_CXX_BINDING
     int is_cxx_uop = 0;
 #endif
@@ -918,6 +918,8 @@
                     global_rank = leader_commptr->rank;
                     MPIU_CHKLMEM_MALLOC(tmpbuf, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer");
                     tmpbuf = (void *)((char*)tmpbuf - true_lb);
+                    MPIU_CHKLMEM_MALLOC(tmpbuf1, void *, count*(MPIR_MAX(extent,true_extent)), mpi_errno, "receive buffer");
+                    tmpbuf1 = (void *)((char*)tmpbuf1 - true_lb);
                     MPIR_Nest_incr();
                     mpi_errno = MPIR_Localcopy(sendbuf, count, datatype, tmpbuf,
                             count, datatype);
@@ -953,7 +955,7 @@
                     leader_root = comm_ptr->leader_rank[leader_of_root];
                     if (local_size != total_size){
                         MPIR_Nest_incr();
-                        mpi_errno = MPIR_Reduce(tmpbuf, recvbuf, count, datatype,
+                        mpi_errno = MPIR_Reduce(tmpbuf, tmpbuf1, count, datatype,
                                 op, leader_root, leader_commptr); 
                         MPIR_Nest_decr();
                     }
@@ -975,6 +977,13 @@
                     MPIDI_CH3I_SHMEM_COLL_SetGatherComplete(local_size, local_rank, shmem_comm_rank);
                 }
 
+                if ((local_rank == 0) && (root == my_rank)){
+                    MPIR_Nest_incr();
+                    mpi_errno = MPIR_Localcopy(tmpbuf1, count, datatype, recvbuf,
+                            count, datatype);
+                    MPIR_Nest_decr();
+                    goto fn_exit;
+                }
 
                 /* Copying data from leader to the root incase leader is
                  * not the root */
@@ -988,7 +997,7 @@
                                     MPIR_REDUCE_TAG, comm );
                         }
                         else{
-                            mpi_errno  = MPIC_Send( recvbuf, count, datatype, root, 
+                            mpi_errno  = MPIC_Send( tmpbuf1, count, datatype, root, 
                                     MPIR_REDUCE_TAG, comm );
                         }
                     }
-------------- next part --------------
/* -*- Mode: C; c-basic-offset:4 ; -*- */
/*
 *
 *  (C) 2003 by Argonne National Laboratory.
 *      See COPYRIGHT in top-level directory.
 */
#include "mpi.h"
#include <stdio.h>
#include <stdlib.h>


int main( int argc, char *argv[] )
{
    int errs = 0;
    int rank, size, root;
    int *sendbuf, *recvbuf, i;
    int minsize = 2, count=1; 

    MPI_Init(&argc, &argv);
	MPI_Comm_rank( MPI_COMM_WORLD, &rank );
	MPI_Comm_size( MPI_COMM_WORLD, &size );
	
    sendbuf = (int *)malloc( count * sizeof(int) );
    recvbuf = (int *)malloc( count * sizeof(int) );
    for (i=0; i<count; i++) sendbuf[i] = 1;
    for (i=0; i<count; i++) recvbuf[i] = 0;

    MPI_Reduce( sendbuf, recvbuf, count, MPI_INT, MPI_SUM, 
            3, MPI_COMM_WORLD );
    printf("[%d]recvbuf:%d\n", rank, recvbuf[0]);
    free( sendbuf );
    free( recvbuf );

    MPI_Finalize();
    return 0;
}


More information about the mvapich-discuss mailing list