[mvapich-discuss] MVAPICH Assertion Error

Gopal Santhanaraman santhana at cse.ohio-state.edu
Sat Jul 7 05:54:22 EDT 2007


 Hi Thomas,

    We have debugged the problem further with respect to the fortran
    test code you had sent earlier.
    please find attached a patch against mvapich2-0.9.8p2 .

    Could you please try it out and let us know how it goes.

 Thanks
 Gopal

On Thu, 5 Jul 2007, OShea, Thomas T. wrote:

> Any luck debugging this problem?
>
>
>
> Thanks,
>
> Tom
>
>
>
>
>
>
>
> ------------------------------------------------------------------------
> --------------------------------
>
> Hi, I posted about a problem with mvapich2 and communicating inside a
>
> node using smp. My computer died and along with it my email, so I'm
>
> using this email account to post a test code that I put together to
>
> illustrate the error I'm getting.
>
>
>
> This code builds 2 arrays on each processor, and then tries to have
>
> the master process (rank = 0) grab the arrays from each other ranks
>
> using remote memory communication with passive synchrization.
>
>
>
> On our system this code will run until it gets to a process that is on
>
> the same node and then it gives the SMP assertion error.
>
>
>
> It was a hard bug to track down because so many things will make it go
>
> away, such as switching the order of the 2 arrays that are passed. If
>
> you pass the larger array first (buff_x), there is no error.
>
>
>
> Hope this helps, and thanks for you time in looking into this.
>
>
>
> Thomas O'Shea
>
> SAIC
>
> -------------- next part --------------
>
>       program smp_test
>
>
>
> c      use mpi
>
>       include 'mpif.h'
>
>       parameter(iu_bnd=20,ju_bnd=20,ku_bnd=20,maxblocks=2)
>
>       parameter(len_x=iu_bnd*ju_bnd*ku_bnd,len_x2=iu_bnd)
>
>       integer mype,nprocs,jproc,i,j,k,mb,ierr,winx,winx2
>
>       integer(kind=MPI_ADDRESS_KIND) winsize,targ_disp
>
>
>
>       common/var/x(iu_bnd,ju_bnd,ku_bnd,maxblocks),
>
>      1           x2(iu_bnd,maxblocks)
>
>       real*8 ,target :: x
>
>       real*8 ,target :: x2
>
>
>
>       call MPI_INIT(ierr)
>
>       call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr)
>
>       call MPI_COMM_RANK(MPI_COMM_WORLD, mype, ierr)
>
>
>
>       do mb = 1,maxblocks
>
>        do k = 1,ku_bnd
>
>         do j = 1,ju_bnd
>
>          do i = 1,iu_bnd
>
>           x(i,j,k,mb) =10*mb+ mype+0.01*i+0.0001*j+.000001*k
>
>          enddo
>
>         enddo
>
>         x2(k,mb) = 10*mb+mype+0.01*k
>
>        enddo
>
>       enddo
>
>
>
>       call test(mype)
>
>
>
>       call MPI_BARRIER(MPI_COMM_WORLD,ierr)
>
>       print *,'Final Barrier',mype
>
>       CALL MPI_FINALIZE()
>
>       end
>
>
>
> c ---------------------------------------------------------------------
>
>
>
>       subroutine test(mype)
>
> c      use mpi
>
>       include 'mpif.h'
>
>       parameter(iu_bnd=20,ju_bnd=20,ku_bnd=20,maxblocks=2)
>
>       parameter(len_x=iu_bnd*ju_bnd*ku_bnd,len_x2=iu_bnd)
>
>       integer mype,nprocs,jproc,i,j,k,mb,ierr,winx,winx2
>
>       integer(kind=MPI_ADDRESS_KIND) winsize,targ_disp
>
>
>
>       common/var/x(iu_bnd,ju_bnd,ku_bnd,maxblocks),
>
>      1           x2(iu_bnd,maxblocks)
>
>       real*8 ,target :: x
>
>       real*8 ,target :: x2
>
>
>
>       real*8 x0(iu_bnd,ju_bnd,ku_bnd)
>
>       real*8 x20(iu_bnd)
>
>
>
>       real*8 buff_x,buff_x2
>
>       pointer(p_x,buff_x(iu_bnd,ju_bnd,ku_bnd,maxblocks))
>
>       pointer(p_x2,buff_x2(iu_bnd,maxblocks))
>
>
>
>       call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr)
>
>
>
>       winsize = 8*len_x*maxblocks
>
>       CALL MPI_ALLOC_MEM(winsize, MPI_INFO_NULL, p_x, ierr)
>
>
>
>       winsize = 8*len_x2*maxblocks
>
>       CALL MPI_ALLOC_MEM(winsize, MPI_INFO_NULL, p_x2, ierr)
>
>
>
>
>
>       winsize = 8*len_x*maxblocks
>
>       CALL MPI_WIN_CREATE(buff_x,winsize,8,MPI_INFO_NULL,
>
>      &                    MPI_COMM_WORLD,winx,ierr)
>
>
>
>       winsize = 8*len_x2*maxblocks
>
>       CALL MPI_WIN_CREATE(buff_x2,winsize,8,MPI_INFO_NULL,
>
>      &                    MPI_COMM_WORLD,winx2,ierr)
>
>
>
>       buff_x = x
>
>       buff_x2 = x2
>
>
>
>
>
>       if(mype.eq.0) then ! collect arrays from other ranks
>
>       do jproc=0,(nprocs-1)
>
>        do mb = 1,2
>
>         targ_disp = len_x2*(mb-1)
>
>         CALL MPI_WIN_LOCK(MPI_LOCK_SHARED,jproc,0,winx2,ierr)
>
>         CALL MPI_GET(x20,len_x2,MPI_DOUBLE_PRECISION,jproc,targ_disp,
>
>      &               len_x2,MPI_DOUBLE_PRECISION,winx2,ierr)
>
>         CALL MPI_WIN_UNLOCK(jproc,winx2,ierr)
>
>
>
>         print *,'2nd RMA: jproc, mb',jproc,mb
>
>         targ_disp = len_x*(mb-1)
>
>         CALL MPI_WIN_LOCK(MPI_LOCK_SHARED,jproc,0,winx,ierr)
>
>         CALL MPI_GET(x0,len_x,MPI_DOUBLE_PRECISION,jproc,targ_disp,
>
>      &               len_x,MPI_DOUBLE_PRECISION,winx,ierr)
>
>         CALL MPI_WIN_UNLOCK(jproc,winx,ierr)
>
>
>
>
>
>        enddo ! mb
>
>       enddo ! jproc
>
>       endif ! mype=0
>
>
>
> ! Freeing Windows and Memory
>
>       CALL MPI_WIN_FREE(winx,ierr)
>
>       CALL MPI_WIN_FREE(winx2,ierr)
>
>       CALL MPI_FREE_MEM(buff_x,ierr)
>
>       CALL MPI_FREE_MEM(buff_x2,ierr)
>
>
>
>       end subroutine test
>
>
-------------- next part --------------
diff -ruN mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c
--- mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c	2006-10-03 14:22:56.000000000 -0400
+++ mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c	2007-07-07 04:42:32.000000000 -0400
@@ -1980,6 +1980,7 @@
 	if ((HANDLE_GET_KIND(curr_op->target_datatype) ==
 	     HANDLE_KIND_BUILTIN)
 	    && MPIDI_CH3_Eager_ok(vc, type_size * curr_op->origin_count)) {
+#if 0
 	    single_op_opt = 1;
 	    /* Set the lock granted flag to 1 */
 	    win_ptr->lock_granted = 1;
@@ -1993,6 +1994,7 @@
 	    if (mpi_errno) {
 		MPIU_ERR_POP(mpi_errno);
 	    }
+#endif
 	}
     }
 


More information about the mvapich-discuss mailing list