[mvapich-discuss] MVAPICH Assertion Error
Gopal Santhanaraman
santhana at cse.ohio-state.edu
Sat Jul 7 05:54:22 EDT 2007
Hi Thomas,
We have debugged the problem further with respect to the fortran
test code you had sent earlier.
please find attached a patch against mvapich2-0.9.8p2 .
Could you please try it out and let us know how it goes.
Thanks
Gopal
On Thu, 5 Jul 2007, OShea, Thomas T. wrote:
> Any luck debugging this problem?
>
>
>
> Thanks,
>
> Tom
>
>
>
>
>
>
>
> ------------------------------------------------------------------------
> --------------------------------
>
> Hi, I posted about a problem with mvapich2 and communicating inside a
>
> node using smp. My computer died and along with it my email, so I'm
>
> using this email account to post a test code that I put together to
>
> illustrate the error I'm getting.
>
>
>
> This code builds 2 arrays on each processor, and then tries to have
>
> the master process (rank = 0) grab the arrays from each other ranks
>
> using remote memory communication with passive synchrization.
>
>
>
> On our system this code will run until it gets to a process that is on
>
> the same node and then it gives the SMP assertion error.
>
>
>
> It was a hard bug to track down because so many things will make it go
>
> away, such as switching the order of the 2 arrays that are passed. If
>
> you pass the larger array first (buff_x), there is no error.
>
>
>
> Hope this helps, and thanks for you time in looking into this.
>
>
>
> Thomas O'Shea
>
> SAIC
>
> -------------- next part --------------
>
> program smp_test
>
>
>
> c use mpi
>
> include 'mpif.h'
>
> parameter(iu_bnd=20,ju_bnd=20,ku_bnd=20,maxblocks=2)
>
> parameter(len_x=iu_bnd*ju_bnd*ku_bnd,len_x2=iu_bnd)
>
> integer mype,nprocs,jproc,i,j,k,mb,ierr,winx,winx2
>
> integer(kind=MPI_ADDRESS_KIND) winsize,targ_disp
>
>
>
> common/var/x(iu_bnd,ju_bnd,ku_bnd,maxblocks),
>
> 1 x2(iu_bnd,maxblocks)
>
> real*8 ,target :: x
>
> real*8 ,target :: x2
>
>
>
> call MPI_INIT(ierr)
>
> call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr)
>
> call MPI_COMM_RANK(MPI_COMM_WORLD, mype, ierr)
>
>
>
> do mb = 1,maxblocks
>
> do k = 1,ku_bnd
>
> do j = 1,ju_bnd
>
> do i = 1,iu_bnd
>
> x(i,j,k,mb) =10*mb+ mype+0.01*i+0.0001*j+.000001*k
>
> enddo
>
> enddo
>
> x2(k,mb) = 10*mb+mype+0.01*k
>
> enddo
>
> enddo
>
>
>
> call test(mype)
>
>
>
> call MPI_BARRIER(MPI_COMM_WORLD,ierr)
>
> print *,'Final Barrier',mype
>
> CALL MPI_FINALIZE()
>
> end
>
>
>
> c ---------------------------------------------------------------------
>
>
>
> subroutine test(mype)
>
> c use mpi
>
> include 'mpif.h'
>
> parameter(iu_bnd=20,ju_bnd=20,ku_bnd=20,maxblocks=2)
>
> parameter(len_x=iu_bnd*ju_bnd*ku_bnd,len_x2=iu_bnd)
>
> integer mype,nprocs,jproc,i,j,k,mb,ierr,winx,winx2
>
> integer(kind=MPI_ADDRESS_KIND) winsize,targ_disp
>
>
>
> common/var/x(iu_bnd,ju_bnd,ku_bnd,maxblocks),
>
> 1 x2(iu_bnd,maxblocks)
>
> real*8 ,target :: x
>
> real*8 ,target :: x2
>
>
>
> real*8 x0(iu_bnd,ju_bnd,ku_bnd)
>
> real*8 x20(iu_bnd)
>
>
>
> real*8 buff_x,buff_x2
>
> pointer(p_x,buff_x(iu_bnd,ju_bnd,ku_bnd,maxblocks))
>
> pointer(p_x2,buff_x2(iu_bnd,maxblocks))
>
>
>
> call MPI_COMM_SIZE(MPI_COMM_WORLD, nprocs, ierr)
>
>
>
> winsize = 8*len_x*maxblocks
>
> CALL MPI_ALLOC_MEM(winsize, MPI_INFO_NULL, p_x, ierr)
>
>
>
> winsize = 8*len_x2*maxblocks
>
> CALL MPI_ALLOC_MEM(winsize, MPI_INFO_NULL, p_x2, ierr)
>
>
>
>
>
> winsize = 8*len_x*maxblocks
>
> CALL MPI_WIN_CREATE(buff_x,winsize,8,MPI_INFO_NULL,
>
> & MPI_COMM_WORLD,winx,ierr)
>
>
>
> winsize = 8*len_x2*maxblocks
>
> CALL MPI_WIN_CREATE(buff_x2,winsize,8,MPI_INFO_NULL,
>
> & MPI_COMM_WORLD,winx2,ierr)
>
>
>
> buff_x = x
>
> buff_x2 = x2
>
>
>
>
>
> if(mype.eq.0) then ! collect arrays from other ranks
>
> do jproc=0,(nprocs-1)
>
> do mb = 1,2
>
> targ_disp = len_x2*(mb-1)
>
> CALL MPI_WIN_LOCK(MPI_LOCK_SHARED,jproc,0,winx2,ierr)
>
> CALL MPI_GET(x20,len_x2,MPI_DOUBLE_PRECISION,jproc,targ_disp,
>
> & len_x2,MPI_DOUBLE_PRECISION,winx2,ierr)
>
> CALL MPI_WIN_UNLOCK(jproc,winx2,ierr)
>
>
>
> print *,'2nd RMA: jproc, mb',jproc,mb
>
> targ_disp = len_x*(mb-1)
>
> CALL MPI_WIN_LOCK(MPI_LOCK_SHARED,jproc,0,winx,ierr)
>
> CALL MPI_GET(x0,len_x,MPI_DOUBLE_PRECISION,jproc,targ_disp,
>
> & len_x,MPI_DOUBLE_PRECISION,winx,ierr)
>
> CALL MPI_WIN_UNLOCK(jproc,winx,ierr)
>
>
>
>
>
> enddo ! mb
>
> enddo ! jproc
>
> endif ! mype=0
>
>
>
> ! Freeing Windows and Memory
>
> CALL MPI_WIN_FREE(winx,ierr)
>
> CALL MPI_WIN_FREE(winx2,ierr)
>
> CALL MPI_FREE_MEM(buff_x,ierr)
>
> CALL MPI_FREE_MEM(buff_x2,ierr)
>
>
>
> end subroutine test
>
>
-------------- next part --------------
diff -ruN mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c
--- mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c 2006-10-03 14:22:56.000000000 -0400
+++ mvapich2-0.9.8p2/src/mpid/osu_ch3/src/ch3u_rma_sync.c 2007-07-07 04:42:32.000000000 -0400
@@ -1980,6 +1980,7 @@
if ((HANDLE_GET_KIND(curr_op->target_datatype) ==
HANDLE_KIND_BUILTIN)
&& MPIDI_CH3_Eager_ok(vc, type_size * curr_op->origin_count)) {
+#if 0
single_op_opt = 1;
/* Set the lock granted flag to 1 */
win_ptr->lock_granted = 1;
@@ -1993,6 +1994,7 @@
if (mpi_errno) {
MPIU_ERR_POP(mpi_errno);
}
+#endif
}
}
More information about the mvapich-discuss
mailing list