[mvapich-discuss] Possible RMA passive mode deadlock

James Dinan dinan at mcs.anl.gov
Fri Feb 18 12:39:35 EST 2011


Hi,

I've been trying to pin down a deadlock problem in my application and I 
think I've narrowed it down to a small MPI test that locks up under 
MVAPICH2 1.5 through the latest 1.6 release candidate.  I've tried the 
same test on MPICH2 version 1.2.1p1 and 1.3.2p1 and it does not appear 
to have this problem.

The deadlock appears to occur when multiple passive mode put operations 
with exclusive locks contend with one another.  I've attached my test 
case; this test locks up for me on our Fusion cluster with 4+ processes 
and 1 process per node.  I've also attached a stack trace for each 
process at the point when things lock up.

Any suggestions on how to proceed?

Thanks,
  ~Jim.
-------------- next part --------------
/** Contended RMA put test -- James Dinan <dinan at mcs.anl.gov>
  *
  * Each process issues COUNT put operations to non-overlapping locations on
  * every other processs.
  */

#include <stdio.h>
#include <stdlib.h>
#include <assert.h>
#include <mpi.h>

#define MAXELEMS      6400
#define COUNT         1000

static int me, nproc;
static const int verbose = 0;

void test_put() {
  MPI_Win  dst_win;
  double  *dst_buf;
  double   src_buf[MAXELEMS];
  int      i, j;

  MPI_Alloc_mem(sizeof(double)*nproc*MAXELEMS, MPI_INFO_NULL, &dst_buf);
  MPI_Win_create(dst_buf, sizeof(double)*nproc*MAXELEMS, 1, MPI_INFO_NULL, MPI_COMM_WORLD, &dst_win);

  for (i = 0; i < MAXELEMS; i++)
    src_buf[i] = me + 1.0;

  MPI_Win_lock(MPI_LOCK_EXCLUSIVE, 0, me, dst_win);

  for (i = 0; i < nproc*MAXELEMS; i++)
    dst_buf[i] = 0.0;

  MPI_Win_unlock(me, dst_win);

  MPI_Barrier(MPI_COMM_WORLD);

  for(i = 0; i < nproc; i++) {
    /* int target = (me + i) % nproc; */
    int target = i;
    for(j = 0; j < COUNT; j++) {
      printf("%2d -> %2d [%2d]\n", me, target, j); 
      MPI_Win_lock(MPI_LOCK_EXCLUSIVE, target, 0, dst_win);
      MPI_Put(&src_buf[j], sizeof(double), MPI_BYTE, target, (me*MAXELEMS+j)*sizeof(double), sizeof(double), MPI_BYTE, dst_win);
      MPI_Win_unlock(target, dst_win);
    }
  }

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Win_free(&dst_win);
  MPI_Free_mem(dst_buf);
}


int main(int argc, char* argv[]) {
  MPI_Init(&argc, &argv);
  MPI_Comm_size(MPI_COMM_WORLD, &nproc);
  MPI_Comm_rank(MPI_COMM_WORLD, &me);

  assert(COUNT <= MAXELEMS);

  if (me == 0 && verbose) {
    printf("Test starting on %d processes\n", nproc); 
    fflush(stdout);
  }

  test_put();

  MPI_Barrier(MPI_COMM_WORLD);

  MPI_Finalize();

  if (me == 0 && verbose) {
    printf("Test completed.\n");
    fflush(stdout);
  }

  return 0;
}
-------------- next part --------------
== 0/4 =====================================================================================

i = 1
j = 7

#0  MPIDI_CH3I_SMP_write_progress (pg=0xf531158) at ch3_smp_progress.c:397
#1  0x00000000004267a9 in MPIDI_CH3I_Progress (is_blocking=1, state=0x2b3282cdfe00) at ch3_progress.c:176
#2  0x0000000000465f48 in MPIDI_CH3I_Send_lock_put_or_acc (dest=<value optimized out>, win_ptr=0x72c520)
    at ch3u_rma_sync.c:3296
#3  MPIDI_Win_unlock (dest=<value optimized out>, win_ptr=0x72c520) at ch3u_rma_sync.c:2797
#4  0x0000000000415b5c in PMPI_Win_unlock (rank=1, win=-1610612736) at win_unlock.c:102
#5  0x000000000040459c in test_put () at test-put.c:46
#6  0x0000000000404657 in main (argc=1, argv=0x7fffb7786cd8) at test-put.c:69

== 1/4 =====================================================================================

i = 4
j = 1000

#0  0x000000360f00b725 in pthread_spin_lock () from /lib64/libpthread.so.0
#1  0x00002b20516a8a7b in ibv_cmd_create_qp () from /usr/lib64/libmlx4-rdmav2.so
#2  0x000000000043f032 in ibv_poll_cq (vbuf_handle=0x7fff53b892e8, vc_req=0x0, receiving=0, is_blocking=1)
    at /usr/include/infiniband/verbs.h:884
#3  MPIDI_CH3I_MRAILI_Cq_poll (vbuf_handle=0x7fff53b892e8, vc_req=0x0, receiving=0, is_blocking=1)
    at ibv_channel_manager.c:599
#4  0x0000000000426e12 in MPIDI_CH3I_read_progress (vc_pptr=0x7fff53b892f0, v_ptr=0x7fff53b892e8, is_blocking=1)
    at ch3_read_progress.c:146
#5  0x0000000000426729 in MPIDI_CH3I_Progress (is_blocking=1, state=<value optimized out>) at ch3_progress.c:206
#6  0x0000000000407703 in MPIC_Wait (request_ptr=0x730fc8) at helper_fns.c:518
#7  0x0000000000408ae2 in MPIC_Sendrecv (sendbuf=0x0, sendcount=0, sendtype=1275068685, dest=2, sendtag=1, 
    recvbuf=0x0, recvcount=0, recvtype=1275068685, source=0, recvtag=1, comm=-2080374784, status=0x1)
    at helper_fns.c:163
#8  0x0000000000404739 in MPIR_Barrier (comm_ptr=<value optimized out>) at barrier.c:86
#9  0x0000000000492435 in MPIR_Barrier_OSU (comm_ptr=<value optimized out>) at barrier_osu.c:105
#10 0x0000000000404a30 in PMPI_Barrier (comm=1140850688) at barrier.c:423
#11 0x00000000004045ca in test_put () at test-put.c:50
#12 0x0000000000404657 in main (argc=1, argv=0x7fff53b96008) at test-put.c:69

== 2/4 =====================================================================================

i = 4
j = 1000

#0  0x00000000004267e2 in MPIDI_CH3I_Progress (is_blocking=1, state=<value optimized out>) at ch3_progress.c:185
#1  0x0000000000407703 in MPIC_Wait (request_ptr=0x730fc8) at helper_fns.c:518
#2  0x0000000000408ae2 in MPIC_Sendrecv (sendbuf=0x0, sendcount=0, sendtype=1275068685, dest=0, sendtag=1, 
    recvbuf=0x0, recvcount=0, recvtype=1275068685, source=0, recvtag=1, comm=-2080374784, status=0x1)
    at helper_fns.c:163
#3  0x0000000000404739 in MPIR_Barrier (comm_ptr=<value optimized out>) at barrier.c:86
#4  0x0000000000492435 in MPIR_Barrier_OSU (comm_ptr=<value optimized out>) at barrier_osu.c:105
#5  0x0000000000404a30 in PMPI_Barrier (comm=1140850688) at barrier.c:423
#6  0x00000000004045ca in test_put () at test-put.c:50
#7  0x0000000000404657 in main (argc=1, argv=0x7fff64df0838) at test-put.c:69

== 3/4 =====================================================================================

i = 4
j = 1000

#0  0x00000034a580b725 in pthread_spin_lock () from /lib64/libpthread.so.0
#1  0x00002b0c91dc0a7b in ibv_cmd_create_qp () from /usr/lib64/libmlx4-rdmav2.so
#2  0x000000000043f032 in ibv_poll_cq (vbuf_handle=0x7fffe694c628, vc_req=0x0, receiving=0, is_blocking=1)
    at /usr/include/infiniband/verbs.h:884
#3  MPIDI_CH3I_MRAILI_Cq_poll (vbuf_handle=0x7fffe694c628, vc_req=0x0, receiving=0, is_blocking=1)
    at ibv_channel_manager.c:599
#4  0x0000000000426e12 in MPIDI_CH3I_read_progress (vc_pptr=0x7fffe694c630, v_ptr=0x7fffe694c628, is_blocking=1)
    at ch3_read_progress.c:146
#5  0x0000000000426729 in MPIDI_CH3I_Progress (is_blocking=1, state=<value optimized out>) at ch3_progress.c:206
#6  0x0000000000407703 in MPIC_Wait (request_ptr=0x730498) at helper_fns.c:518
#7  0x0000000000408ae2 in MPIC_Sendrecv (sendbuf=0x0, sendcount=0, sendtype=1275068685, dest=1, sendtag=1, 
    recvbuf=0x0, recvcount=0, recvtype=1275068685, source=1, recvtag=1, comm=-2080374784, status=0x1)
    at helper_fns.c:163
#8  0x0000000000404739 in MPIR_Barrier (comm_ptr=<value optimized out>) at barrier.c:86
#9  0x0000000000492435 in MPIR_Barrier_OSU (comm_ptr=<value optimized out>) at barrier_osu.c:105
#10 0x0000000000404a30 in PMPI_Barrier (comm=1140850688) at barrier.c:423
#11 0x00000000004045ca in test_put () at test-put.c:50
#12 0x0000000000404657 in main (argc=1, argv=0x7fffe6959348) at test-put.c:69



More information about the mvapich-discuss mailing list