[mvapich-discuss] SIGSEV in F90: An MPI bug?

Brian Curtis curtisbr at cse.ohio-state.edu
Wed Jan 23 15:23:03 EST 2008


David,

Sorry to hear you are experience problems with the MVAPICH2 Fortran 90 
interface.  I will be investigating this issue, but need some additional 
information about your setup.  What is the exact version of MVAPICH2 1.0 
you are utilizing (daily tarball or release)?  Have you tried MVAPICH2 
1.0.1?

Brian

David Stuebe wrote:
> Hello MVAPICH
> I have found a strange bug in MVAPICH2 using IFORT. The behavior is very
> strange indeed - it seems to be related to how ifort deals with passing
> pointers to the MVAPICH FORTRAN 90 INTERFACE.
> The MPI call returns successfully, but later calls to a dummy subroutine
> cause a sigsev.
>
>  Please look at the following code:
>
> !=================================================================================
> !=================================================================================
> !=================================================================================
> ! TEST CODE TO FOR POSSIBLE BUG IN MVAPICH2 COMPILED ON IFORT
> ! WRITEN BY: DAVID STUEBE
> ! DATE: JAN 23, 2008
> !
> ! COMPILE WITH: mpif90 -xP mpi_prog.f90 -o xtest
> !
> ! KNOWN BEHAVIOR:
> ! PASSING A NONE CONTIGUOUS POINTER TO MPI_BCAST CAUSES FAILURE OF
> ! SUBROUTINES USING MULTI DIMENSIONAL EXPLICT SHAPE ARRAYS WITHOUT AN
> INTERFACE -
> ! EVEN THOUGH THE MPI_BCAST COMPLETES SUCCESUFULLY, RETURNING VALID DATA.
> !
> ! COMMENTS:
> ! I REALIZE PASSING NON CONTIGUOUS POINTERS IS DANGEROUS - SHAME ON
> ! ME FOR MAKING THAT MISTAKE. HOWEVER, IT SHOULD EITHER WORK OR NOT.
> ! RETURNING SUCCESSFULLY BUT CAUSING INTERFACE ERRORS LATER IS
> ! EXTREMELY DIFFICULT TO DEBUG!
> !
> ! CONDITIONS FOR OCCURANCE:
> !    COMPILER MUST OPTIMIZE USING 'VECTORIZATION'
> !    ARRAY MUST BE 'LARGE' -SYSTEM DEPENDENT ?
> !    MUST BE RUN ON MORE THAN ONE NODE TO CAUSE CRASH...
> !    ie  Running inside one SMP box does not crash.
> !
> !    RUNNING UNDER MPD, ALL PROCESSES SIGSEV
> !    RUNNING UNDER MPIEXEC0.82 FOR PBS,
> !       ONLY SOME PROCESSES SIGSEV ?
> !
> ! ENVIRONMENTAL INFO:
> ! NODES: DELL 1850 3.0GHZ, 2GB RAM, INFINIBAND PCI-EX 4X
> ! SYSTEM: ROCKS 4.2
> ! gcc version 3.4.6 20060404 (Red Hat 3.4.6-3)
> !
> ! IFORT/ICC:
> !   Intel(R) Fortran Compiler for Intel(R) EM64T-based applications,
> !   Version 9.1 Build 20061101 Package ID: l_fc_c_9.1.040
> !
> ! MVAPICH2: mpif90 for mvapich2-1.0
> ! ./configure --prefix=/usr/local/share/mvapich2/1.0
> --with-device=osu_ch3:mrail --with-rdma=vapi --with-pm=mpd --enable-f90
> --enable-cxx --disable-romio --without-mpe
> !
> !=================================================================================
> !=================================================================================
> !=================================================================================
>
> Module vars
>   USE MPI
>   implicit none
>
>
>   integer :: n,m,MYID,NPROCS
>   integer :: ipt
>
>   integer, allocatable, target :: data(:,:)
>
>   contains
>
>     subroutine alloc_vars
>       implicit none
>
>       integer Status
>
>       allocate(data(n,m),stat=status)
>       if (status /=0) then
>          write(ipt,*) "allocation error"
>          stop
>       end if
>
>       data = 0
>
>     end subroutine alloc_vars
>
>    SUBROUTINE INIT_MPI_ENV(ID,NP)
> !===================================================================================|
> !  INITIALIZE MPI
> ENVIRONMENT                                                       |
> !===================================================================================|
>      INTEGER, INTENT(OUT) :: ID,NP
>      INTEGER IERR
>
>      IERR=0
>
>      CALL MPI_INIT(IERR)
>      IF(IERR/=0) WRITE(*,*) "BAD MPI_INIT", ID
>      CALL MPI_COMM_RANK(MPI_COMM_WORLD,ID,IERR)
>      IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_RANK", ID
>      CALL MPI_COMM_SIZE(MPI_COMM_WORLD,NP,IERR)
>      IF(IERR/=0) WRITE(*,*) "BAD MPI_COMM_SIZE", ID
>
>    END SUBROUTINE INIT_MPI_ENV
>
>
> !==============================================================================|
>   SUBROUTINE PSHUTDOWN
>
> !==============================================================================|
>     INTEGER IERR
>
>     IERR=0
>     CALL MPI_FINALIZE(IERR)
>     if(ierr /=0) write(ipt,*) "BAD MPI_FINALIZE", MYID
>     close(IPT)
>     STOP
>
>   END SUBROUTINE PSHUTDOWN
>
>
>   SUBROUTINE CONTIGUOUS_WORKS
>     IMPLICIT NONE
>     INTEGER, pointer :: ptest(:,:)
>     INTEGER :: IERR, I,J
>
>
>     write(ipt,*) "START CONTIGUOUS:"
>     n=2000 ! Set size here...
>     m=n+10
>
>     call alloc_vars
>     write(ipt,*) "ALLOCATED DATA"
>     ptest => data(1:N,1:N)
>
>     IF (MYID == 0) ptest=6
>     write(ipt,*) "Made POINTER"
>
>     call MPI_BCAST(ptest,N*N,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
>     IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST", MYID
>
>     write(ipt,*) "BROADCAST Data; a value:",data(1,6)
>
>     DO I = 1,N
>        DO J = 1,N
>           if(data(I,J) /= 6) &
>                & write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
>        END DO
>
>        DO J = N+1,M
>           if(data(I,J) /= 0) &
>                & write(ipt,*) "INCORRECT VALUE!", I,J,data(I,J)
>        END DO
>
>     END DO
>
>     ! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
>     ! THAT USE AN EXPLICIT SHAPE ARRAY
>     write(ipt,*) "CALLING DUMMY1"
>     CALL DUMMY1
>
>     write(ipt,*) "CALLING DUMMY2"
>     call Dummy2(m,n)
>
>     write(ipt,*) "CALLING DUMMY3"
>     call Dummy3
>     write(ipt,*) "FINISHED!"
>
>   END SUBROUTINE CONTIGUOUS_WORKS
>
>   SUBROUTINE NON_CONTIGUOUS_FAILS
>     IMPLICIT NONE
>     INTEGER, pointer :: ptest(:,:)
>     INTEGER :: IERR, I,J
>
>
>     write(ipt,*) "START NON_CONTIGUOUS:"
>
>     m=200 ! Set size here - crash is size dependent!
>     n=m+10
>
>     call alloc_vars
>     write(ipt,*) "ALLOCATED DATA"
>     ptest => data(1:M,1:M)
>
> !===================================================
> ! IF YOU CALL DUMMY2 HERE TOO, THEN EVERYTHING PASSES  ???
> !===================================================
> !    CALL DUMMY1 ! THIS ONE HAS NO EFFECT
> !    CALL DUMMY2 ! THIS ONE 'FIXES' THE BUG
>
>     IF (MYID == 0) ptest=6
>     write(ipt,*) "Made POINTER"
>
>     call MPI_BCAST(ptest,M*M,MPI_INTEGER,0,MPI_COMM_WORLD,IERR)
>     IF(IERR /= 0) WRITE(IPT,*) "BAD BCAST"
>
>     write(ipt,*) "BROADCAST Data; a value:",data(1,6)
>
>     DO I = 1,M
>        DO J = 1,M
>           if(data(J,I) /= 6) &
>                & write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
>        END DO
>
>        DO J = M+1,N
>           if(data(J,I) /= 0) &
>                & write(ipt,*) "INCORRECT VALUE!",I,J,DATA(I,J)
>        END DO
>     END DO
>
>     ! CALL THREE DIFFERENT EXAMPLES OF SUBROUTINES W/OUT AN ITERFACE
>     ! THAT USE AN EXPLICIT SHAPE ARRAY
>     write(ipt,*) "CALLING DUMMY1"
>     CALL DUMMY1
>
>     write(ipt,*) "CALLING DUMMY2"
>     call Dummy2(m,n) ! SHOULD CRASH HERE!
>
>     write(ipt,*) "CALLING DUMMY3"
>     call Dummy3
>     write(ipt,*) "FINISHED!"
>
>   END SUBROUTINE NON_CONTIGUOUS_FAILS
>
>
>   End Module vars
>
>
> Program main
>   USE vars
>   implicit none
>
>
>   CALL INIT_MPI_ENV(MYID,NPROCS)
>
>   ipt=myid+10
>   OPEN(ipt)
>
>
>   write(ipt,*) "Start memory test!"
>
>   CALL NON_CONTIGUOUS_FAILS
>
> !  CALL CONTIGUOUS_WORKS
>
>   write(ipt,*) "End memory test!"
>
>   CALL PSHUTDOWN
>
> END Program main
>
>
>
> ! TWO DUMMY SUBROUTINE WITH EXPLICIT SHAPE ARRAYS
> ! DUMMY1 DECLARES A VECTOR  - THIS ONE NEVER CAUSES FAILURE
> ! DUMMY2 DECLARES AN ARRAY  - THIS ONE CAUSES FAILURE
>
> SUBROUTINE DUMMY1
>   USE vars
>   implicit none
>   real, dimension(m) :: my_data
>
>   write(ipt,*) "m,n",m,n
>
>   write(ipt,*) "DUMMY 1", size(my_data)
>
> END SUBROUTINE DUMMY1
>
>
> SUBROUTINE DUMMY2(i,j)
>   USE vars
>   implicit none
>   INTEGER, INTENT(IN) ::i,j
>
>
>   real, dimension(i,j) :: my_data
>
>   write(ipt,*) "start: DUMMY 2", size(my_data)
>
>
> END SUBROUTINE DUMMY2
>
> SUBROUTINE DUMMY3
>   USE vars
>   implicit none
>
>
>   real, dimension(m,n) :: my_data
>
>
>   write(ipt,*) "start: DUMMY 3", size(my_data)
>
>
> END SUBROUTINE DUMMY3
>
>   
> ------------------------------------------------------------------------
>
> _______________________________________________
> mvapich-discuss mailing list
> mvapich-discuss at cse.ohio-state.edu
> http://mail.cse.ohio-state.edu/mailman/listinfo/mvapich-discuss
>   


More information about the mvapich-discuss mailing list