[mvapich-discuss] NaNs from non-blocking comms
Dan Kokron
daniel.kokron at nasa.gov
Tue Apr 5 15:00:41 EDT 2011
I'm resending this because some scanner didn't like my previous attachment.
Using mvapich2-1.6 configured and built under x86_64 Linux with
Intel-11.0.083 suite of compilers
./configure CC=icc CXX=icpc F77=ifort F90=ifort CFLAGS=-fpic
CXXFLAGS=-fpic FFLAGS=-fpic F90FLAGS=-fpic
--prefix=/home/dkokron/play/mvapich2-1.6/install/intel --enable-f77
--enable-f90 --enable-cxx --enable-romio --with-hwloc
The attached example code gives NaN's as output from the MPI_Recv if
MV2_ON_DEMAND_THRESHOLD is set to be less than the number of processes
used.
The example also gives NaNs using IntelMPI-4.0.1.002 if
I_MPI_USE_DYNAMIC_CONNECTIONS=enable
See the 'commands' file in the tarball for more information.
--
Dan Kokron
Global Modeling and Assimilation Office
NASA Goddard Space Flight Center
Greenbelt, MD 20771
Daniel.S.Kokron at nasa.gov
Phone: (301) 614-5192
Fax: (301) 614-5304
-------------- next part --------------
module load comp/intel-11.0.083 mpi/mv2-1.6/intel-11.0.083
mpif90 -g -O0 -ftz -align all -fno-alias -traceback -debug -nolib-inline -fno-inline-functions -assume protect_parens,minus0 -prec-div -prec-sqrt -check bounds -check uninit -fp-stack-check -ftrapuv TestNonBlocking.F90
# This env var is critical to allow running under MVAPICH2-1.6
setenv MV2_ON_DEMAND_THRESHOLD 72
mpiexec.hydra -prepend-rank -launcher-exec /usr/bin/sshmpi -np 72 ./a.out
[3] NaN found 65 9 640
[69] NaN found 65 8 568
#### Intel MPI
module load comp/intel-11.0.083 mpi/impi-4.0.1.002-beta
mpiifort -g -O0 -ftz -align all -fno-alias -traceback -debug -nolib-inline -fno-inline-functions -assume protect_parens,minus0 -prec-div -prec-sqrt -check bounds -check uninit -fp-stack-check -ftrapuv TestNonBlocking.F90
set NUMNODES=`sort -u $PBS_NODEFILE | wc -l`
mpdboot -v --totalnum=$NUMNODES --file=$PBS_NODEFILE --rsh=sshmpi
# This env var is critical to allow running under IntelMPI
setenv I_MPI_USE_DYNAMIC_CONNECTIONS 0
mpiexec -l -perhost 8 -n 72 ./a.out
-------------- next part --------------
program nonblocking
implicit none
include "mpif.h"
real(kind=4), parameter :: pi = 3.14159265358979323846
real(kind=4), allocatable, dimension (: ) :: var
real(kind=4), allocatable, dimension (:,:) :: array, DstArray
integer, parameter :: im_world=288
integer, parameter :: jm_world=181
integer, parameter :: NX=4
integer, parameter :: NY=18
integer, allocatable :: i1(:),in(:),j1(:),jn(:),im(:),jm(:)
integer, allocatable :: IMs(:), JMs(:)
integer :: i,j,k,n
integer :: npes, mype, col, row, kount
character(len=128), parameter :: FILENAME="output.dat"
character(len=128) :: FNAME
integer status(MPI_STATUS_SIZE)
integer (kind=4) :: ierr
call mpi_init(ierr)
call MPI_COMM_SIZE(MPI_COMM_WORLD,npes,ierr)
call MPI_COMM_RANK(MPI_COMM_WORLD,mype,ierr)
allocate( IMs(0:NX-1), JMs(0:NY-1), stat=ierr)
call MAPL_DecomposeDim ( im_world, IMs, NX )
call MAPL_DecomposeDim ( jm_world, JMs, NY )
! print*,'My ims ',ims
! print*,'My jms ',jms
allocate (i1(0:npes-1), stat=ierr)
allocate (in(0:npes-1), stat=ierr)
allocate (j1(0:npes-1), stat=ierr)
allocate (jn(0:npes-1), stat=ierr)
allocate (im(0:npes-1), stat=ierr)
allocate (jm(0:npes-1), stat=ierr)
do n=0,npes-1
col = mod(n,NX)
i1(n) = sum(IMs(:col))-IMs(col)+1
in(n) = sum(IMs(:col))
! print*,'My i1 in ',i1,in
row = n/NX
j1(n) = sum(JMs(:row))-JMs(row)+1
jn(n) = sum(JMs(:row))
! print*,'My j1 jn ',j1,jn
im(n) = in(n) - i1(n) + 1
jm(n) = jn(n) - j1(n) + 1
end do
allocate(DstArray(IM(mype), JM(mype) ),stat=ierr)
DstArray = Z'7FA00000'
if(mype == 0) then
allocate(array(im_world,jm_world))
do j=1,jm_world
do i=1,im_world
array(i,j) = sin(2*pi*float(i)/float(im_world))*cos(pi*float(j)/float(jm_world))
end do
end do
do n=0,npes-1
kount = IM(n)*JM(n)
if(n == mype) then
DstArray = array(i1(n):in(n),j1(n):jn(n))
else
! print*,'ISend: ',n,i1(n),in(n),j1(n),jn(n),kount
call MPI_ISend(array(i1(n):in(n),j1(n):jn(n)), kount, MPI_REAL, &
n, 1, MPI_COMM_WORLD, status, ierr)
end if
end do
else
allocate(var(0:IM(mype)*JM(mype)-1) ,stat=ierr)
var = Z'7FA00000'
call MPI_Recv(var, size(var), MPI_REAL, 0, 1, MPI_COMM_WORLD, status, ierr)
! print*,'Recv: ',status
k=0
do J=1,JM(mype)
do I=1,IM(mype)
if(isnan(var(k))) print*,'NaN found ',i,j,k
DstArray(I,J) = var(k)
k = k+1
end do
end do
deallocate(var)
end if
call MPI_Barrier(MPI_COMM_WORLD, ierr)
if(mype == 0) deallocate(array)
deallocate(i1)
deallocate(in)
deallocate(j1)
deallocate(jn)
deallocate(im)
deallocate(jm)
deallocate(DstArray)
call MPI_FINALIZE(ierr)
end program nonblocking
subroutine MAPL_DecomposeDim ( dim_world,dim,NDEs )
implicit none
integer dim_world, NDEs
integer dim(0:NDEs-1)
integer n,im,rm,nbeg,nend
im = dim_world/NDEs
rm = dim_world-NDEs*im
do n=0,NDEs-1
dim(n) = im
if( n.le.rm-1 ) dim(n) = im+1
enddo
end subroutine MAPL_DecomposeDim
More information about the mvapich-discuss
mailing list