[mvapich-discuss] MVAPICH 2 Progress Code improvement for RDMA_FAST_PATH

Sylvain Jeaugey sylvain.jeaugey at bull.net
Wed Apr 4 08:30:11 EDT 2007


Hi all,

Sorry for the delay, I was preempted on other tasks. I just take a few 
minutes to send you this patch. It is certainly not the best way to do it, 
but it works fine. Feel free to adapt it to fit with the rest, as long as 
the performance is preserved.

The patch applies on top of mvapich2 0.9.8.

Sylvain

On Sun, 25 Mar 2007, wei huang wrote:

> Hi Sylvain,
>
> Thanks for your effort on helping us improving rdma fast path code. Your
> proposal looks good to us. There may be some corner cases in the progress
> engine that need to be considered, but we should be able to take care of
> them later. We are actually working on a later 1.0 release, which will
> have more features including enhanced messaging rate, enhanced
> collectives, etc. Now it should be the right time to incorperate such
> enhancement. We will thus have time to systematically test and evaluate
> the changes. A patch from you will definitely help us moving faster along
> this direction. A patch against 0.9.8 should work fine.
>
> Thanks again and looking forward to discussing with you further.
>
> -- Wei
>
>> [ADAPTIVE_]RDMA_FAST_PATH is an optimization to provide low latency on
>> mvapich2. The issue is, latency increases as the number of total processes
>> grows. Finally, when you launch a job with over 32 processes, latency is
>> worse than the standard send/recv protocol.
>>
>> The reason for that is very simple. Contrary to the send/recv protocol
>> which gets its receives in a single completion queue, the RDMA fast path
>> has to poll _every_ RDMA queue to find out from which queue to receive
>> data.
>>
>> My first try to improve that was to poll only on the VCs associated to
>> requests passed to MPID_Progress. That didn't work well because
>> unfortunately, well-written MPI applications are scarce, and calling
>> MPI_Wait on the wrong request resulted in a deadlock.
>>
>> My second try is a lot better. The RDMA polling set is now restrained to :
>>   * VCs on which we have waiting posted receives;
>>   * VCs on which we have a rendez-vous send in progress.
>> .. and it seems to work fine and quickly, since polling is quite always
>> directed to the right VC.
>>
>> Has anyone already a good (better) solution for that ? Am I totally
>> mistaken in my understanding of the MVAPICH 2 code ? If I'm not, I will
>> consider cleaning things and proposing a patch against 0.9.8, unless I
>> should wait until 0.9.9 ?
>>
>> Thanks in advance for your opinions/comments/flames on that,
>>
>> Sylvain
>> _______________________________________________
>> mvapich-discuss mailing list
>> mvapich-discuss at cse.ohio-state.edu
>> http://mail.cse.ohio-state.edu/mailman/listinfo/mvapich-discuss
>>
>
-------------- next part --------------
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c	2006-10-13 16:50:28.000000000 +0200
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_channel_manager.c	2007-03-26 17:04:40.000000000 +0200
@@ -240,16 +240,73 @@
     return type;
 }
 
+int MPIDI_CH3I_MRAILI_Get_next_vbuf_on_vc(MPIDI_VC_t * vc, MPIDI_VC_t ** vc_ptr, vbuf ** vbuf_ptr) {
+    vbuf *v;
+    int seq;
+    volatile VBUF_FLAG_TYPE *tail;
+
+    v		= NULL;
+
+    if (vc->mrail.rfp.RDMA_recv_buf == NULL) {
+	vbuf_fast_rdma_alloc(vc, 1);
+	vbuf_address_send(vc);
+    }
+
+    seq  = GetSeqNumVbuf(vc->mrail.cmanager.msg_channels[INDEX_LOCAL(&vc->mrail.cmanager,0)].v_queue_head);
+    if (seq == PKT_IS_NULL) {
+	v    = &(vc->mrail.rfp.RDMA_recv_buf[vc->mrail.rfp.p_RDMA_recv]);
+	tail = v->head_flag;
+
+	if (*tail && vc->mrail.rfp.p_RDMA_recv != vc->mrail.rfp.p_RDMA_recv_tail) {
+	    DEBUG_PRINT("Get one!!!!!!!!!!!!!!\n");
+	    if (++(vc->mrail.rfp.p_RDMA_recv) >= num_rdma_buffer)
+		vc->mrail.rfp.p_RDMA_recv = 0;
+	    MRAILI_FAST_RDMA_VBUF_START(v, *tail, v->pheader)
+		v->content_size = *v->head_flag;
+
+	    seq = GetSeqNumVbuf(v);
+	    if (seq == vc->seqnum_recv) {
+		DEBUG_PRINT("Get one exact seq: %d\n", seq);
+		vc->seqnum_recv ++;
+		*vbuf_ptr = v;
+		*vc_ptr   = v->vc;
+		return T_CHANNEL_EXACT_ARRIVE;
+	    } else if( seq == PKT_NO_SEQ_NUM) {
+		DEBUG_PRINT("[vbuf_local]: get control msg\n");
+		*vbuf_ptr = v;
+		*vc_ptr   = v->vc;
+		return T_CHANNEL_CONTROL_MSG_ARRIVE;
+	    } else {
+		DEBUG_PRINT("Get one out of order seq: %d, expecting %d\n", 
+			    seq, vc->seqnum_recv);
+		VQUEUE_ENQUEUE(&vc->mrail.cmanager,
+			       INDEX_LOCAL(&vc->mrail.cmanager,0), v);
+		return T_CHANNEL_NO_ARRIVE;
+	    }
+	} else
+	    return T_CHANNEL_NO_ARRIVE;
+    } 
+
+    if (seq == vc->seqnum_recv) {
+	*vbuf_ptr = VQUEUE_DEQUEUE(&vc->mrail.cmanager, INDEX_LOCAL(&vc->mrail.cmanager,0));
+	*vc_ptr   = (*vbuf_ptr)->vc;
+	vc->seqnum_recv ++;
+	return T_CHANNEL_EXACT_ARRIVE;
+    } else if (seq == PKT_NO_SEQ_NUM) {
+	*vbuf_ptr = VQUEUE_DEQUEUE(&vc->mrail.cmanager, INDEX_LOCAL(&vc->mrail.cmanager,0));
+	*vc_ptr   = (*vbuf_ptr)->vc;
+	return T_CHANNEL_CONTROL_MSG_ARRIVE;
+    }
+    return T_CHANNEL_NO_ARRIVE;
+}
+
+extern MPIDI_VC_t * rvip_list;
+
 int MPIDI_CH3I_MRAILI_Get_next_vbuf(MPIDI_VC_t ** vc_ptr, vbuf ** vbuf_ptr)
 {
-    MPIDI_VC_t *vc;
     int type = T_CHANNEL_NO_ARRIVE;
     int i;
-    int seq;
-    vbuf *v;
-    volatile VBUF_FLAG_TYPE *tail;
 
-    v		= NULL;
     *vc_ptr 	= NULL;
     *vbuf_ptr 	= NULL;
 
@@ -265,58 +322,44 @@
     if (num_rdma_buffer == 0)
         goto fn_exit;
 
-    /* no msg is queued, poll rdma polling set */
-    for (i = 0; i < MPIDI_CH3I_RDMA_Process.polling_group_size; i++) {
-        vc   = MPIDI_CH3I_RDMA_Process.polling_set[i];
-        seq  = GetSeqNumVbuf(vc->mrail.cmanager.msg_channels[INDEX_LOCAL(&vc->mrail.cmanager,0)].v_queue_head);
-        if (seq == PKT_IS_NULL) {
-            v    = &(vc->mrail.rfp.RDMA_recv_buf[vc->mrail.rfp.p_RDMA_recv]);
-            tail = v->head_flag;
-
-            if (*tail && vc->mrail.rfp.p_RDMA_recv != vc->mrail.rfp.p_RDMA_recv_tail) {
-                DEBUG_PRINT("Get one!!!!!!!!!!!!!!\n");
-                if (++(vc->mrail.rfp.p_RDMA_recv) >= num_rdma_buffer)
-                    vc->mrail.rfp.p_RDMA_recv = 0;
-                MRAILI_FAST_RDMA_VBUF_START(v, *tail, v->pheader)
-                    v->content_size = *v->head_flag;
-
-                seq = GetSeqNumVbuf(v);
-                if (seq == vc->seqnum_recv) {
-                    DEBUG_PRINT("Get one exact seq: %d\n", seq);
-                    type = T_CHANNEL_EXACT_ARRIVE;
-                    vc->seqnum_recv ++;
-                    *vbuf_ptr = v;
-                    *vc_ptr   = v->vc;
-                    goto fn_exit;
-                } else if( seq == PKT_NO_SEQ_NUM) {
-                    type = T_CHANNEL_CONTROL_MSG_ARRIVE;
-                    DEBUG_PRINT("[vbuf_local]: get control msg\n");
-                    *vbuf_ptr = v;
-                    *vc_ptr   = v->vc;
-                    goto fn_exit;
-                } else {
-                    DEBUG_PRINT("Get one out of order seq: %d, expecting %d\n", 
-                            seq, vc->seqnum_recv);
-                    VQUEUE_ENQUEUE(&vc->mrail.cmanager,
-                            INDEX_LOCAL(&vc->mrail.cmanager,0), v);
-                    continue;
-                }
-            } else
-                continue;
-        } 
-
-        if (seq == vc->seqnum_recv) {
-            *vbuf_ptr = VQUEUE_DEQUEUE(&vc->mrail.cmanager, INDEX_LOCAL(&vc->mrail.cmanager,0));
-            *vc_ptr   = (*vbuf_ptr)->vc;
-            vc->seqnum_recv ++;
-            type = T_CHANNEL_EXACT_ARRIVE;
-            goto fn_exit;
-        } else if (seq == PKT_NO_SEQ_NUM) {
-            *vbuf_ptr = VQUEUE_DEQUEUE(&vc->mrail.cmanager, INDEX_LOCAL(&vc->mrail.cmanager,0));
-            *vc_ptr   = (*vbuf_ptr)->vc;
-            type = T_CHANNEL_CONTROL_MSG_ARRIVE;
-            goto fn_exit;
-        }
+    if (rdma_targeted_polling) {
+	/* New Progress Path */
+	/* no msg is queued, poll rdma polling set */
+	MPIDI_VC_t ** vcs;
+	int count;
+	MPIDI_CH3U_Recvq_get_AVT(&vcs, &count);
+
+	if (!vcs && count == -2) { /* any source receives : poll everything */
+	    MPIDI_CH3U_Recvq_release_AVT();
+	    for (i = 0; i < MPIDI_CH3I_RDMA_Process.polling_group_size; i++)
+		if (type = MPIDI_CH3I_MRAILI_Get_next_vbuf_on_vc(MPIDI_CH3I_RDMA_Process.polling_set[i], vc_ptr, vbuf_ptr) != T_CHANNEL_NO_ARRIVE)
+		    goto fn_exit;
+	    goto fn_exit;
+	} else {
+	    /* progress on rendez-vous transfers */
+	    {
+		MPIDI_VC_t * vc = rvip_list;
+		while (vc) {
+		    if (type = MPIDI_CH3I_MRAILI_Get_next_vbuf_on_vc(vc, vc_ptr, vbuf_ptr) != T_CHANNEL_NO_ARRIVE) {
+			MPIDI_CH3U_Recvq_release_AVT();
+			goto fn_exit;
+		    }
+		    vc = vc->mrail.next_rvip_vc;
+		}
+	    }
+	    /* progress on receives */
+	    for (i = 0; i < count; i++) {
+		if (type = MPIDI_CH3I_MRAILI_Get_next_vbuf_on_vc(vcs[i], vc_ptr, vbuf_ptr) != T_CHANNEL_NO_ARRIVE) {
+		    MPIDI_CH3U_Recvq_release_AVT();
+		    goto fn_exit;
+		}
+	    }
+	    MPIDI_CH3U_Recvq_release_AVT();
+	}
+    } else {
+	for (i = 0; i < MPIDI_CH3I_RDMA_Process.polling_group_size; i++)
+	    if (type = MPIDI_CH3I_MRAILI_Get_next_vbuf_on_vc(MPIDI_CH3I_RDMA_Process.polling_set[i], vc_ptr, vbuf_ptr) != T_CHANNEL_NO_ARRIVE)
+		goto fn_exit;
     }
 fn_exit:
     return type;
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.c	2006-11-10 20:07:37.000000000 +0100
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.c	2007-03-26 17:26:56.000000000 +0200
@@ -59,6 +59,7 @@
 int           rdma_max_inline_size;
 unsigned int  rdma_ndreg_entries = RDMA_NDREG_ENTRIES;
 int           num_rdma_buffer;
+int           rdma_targeted_polling;
 
 /* max (total) number of vbufs to allocate, after which process
  * terminates with a fatal error.
@@ -447,6 +448,8 @@
     } else {
         rdma_credit_preserve = 3;
     }
+
+    rdma_targeted_polling = 1;
 }
 
 void rdma_get_user_parameters(int num_proc, int me)
@@ -508,6 +511,9 @@
     if ((value = getenv("MV2_NUM_RDMA_BUFFER")) != NULL) { 
         num_rdma_buffer = (int)atoi(value);
     }
+    if ((value = getenv("MV2_TARGETED_POLLING")) != NULL) { 
+        rdma_targeted_polling = (int)atoi(value);
+    }
     if ((value = getenv("MV2_POLLING_SET_THRESHOLD")) != NULL
         && MPIDI_CH3I_RDMA_Process.has_adaptive_fast_path) {
         rdma_polling_set_threshold = atoi(value);
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h	2006-10-03 20:22:56.000000000 +0200
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/ibv_param.h	2007-03-26 17:02:05.000000000 +0200
@@ -42,6 +42,7 @@
 extern int                  rdma_read_reserve;
 extern float                rdma_credit_update_threshold;   
 extern int                  num_rdma_buffer;
+extern int                  rdma_targeted_polling;
 extern int                  rdma_iba_eager_threshold;
 extern char                 rdma_iba_hca[32];
 extern unsigned int         rdma_ndreg_entries;
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h	2006-10-03 20:22:56.000000000 +0200
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_post.h	2007-03-26 17:02:05.000000000 +0200
@@ -126,6 +126,51 @@
     }                                                           \
 }
 
+extern MPIDI_VC_t *rvip_list;
+
+/* Maintain rvip list */
+#define ENQUEUE_RVIP_LIST(v) { \
+    int already_in_list = 0; \
+    MPIDI_VC_t *last_v_, *v_ = rvip_list; \
+    while (v_) { \
+	if (v_ == v) {\
+	    already_in_list = 1; \
+	    break; \
+	} \
+	last_v_ = v_; \
+	v_ = v_->mrail.next_rvip_vc; \
+    } \
+    if (!already_in_list) { \
+	if (rvip_list) \
+	    last_v_->mrail.next_rvip_vc = v; \
+	else \
+	    rvip_list = v; \
+	v->mrail.next_rvip_vc = NULL; \
+	v->mrail.rvip_count = 1; \
+    } else { \
+	v->mrail.rvip_count++; \
+    } \
+}
+
+#define DEQUEUE_RVIP_LIST(v) { \
+    MPIDI_VC_t *last_v_, *v_ = rvip_list; \
+    while (v_) { \
+	if (v_ == v) {\
+	    if (v->mrail.rvip_count > 1) { \
+	    	v->mrail.rvip_count--; \
+	    } else { \
+		if (v == rvip_list) \
+		rvip_list = v->mrail.next_rvip_vc; \
+		else \
+		last_v_->mrail.next_rvip_vc = v->mrail.next_rvip_vc; \
+	    } \
+	    break; \
+	} \
+	last_v_ = v_; \
+	v_ = v_->mrail.next_rvip_vc; \
+    } \
+}
+
 /*
  * Attached to each connection is a list of send handles that
  * represent rendezvous sends that have been started and acked but not
@@ -166,6 +211,8 @@
         if (NULL == (c)->mrail.sreq_head) {                     \
             (c)->mrail.sreq_tail = NULL;                        \
         }                                                       \
+    if ((c)->mrail.sreq_head == NULL)                           \
+    DEQUEUE_RVIP_LIST(c);                                       \
 }
 
 #define MPIDI_CH3I_MRAIL_SET_PKT_RNDV(_pkt, _req)               \
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h	2006-11-10 20:07:37.000000000 +0100
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/gen2/mpidi_ch3_rdma_pre.h	2007-03-26 17:02:05.000000000 +0200
@@ -273,6 +273,11 @@
      */
     void    *nextflow;
     int     inflow;
+    
+    /* rvip_list construction */
+    void    *next_rvip_vc;
+    int     rvip_count;
+
     /* used to distinguish which VIA barrier synchronozations have
      * completed on this connection.  Currently, only used during
      * process teardown.
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c	2006-11-10 20:07:37.000000000 +0100
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_istartrndvmsg.c	2007-03-26 17:02:05.000000000 +0200
@@ -168,6 +168,9 @@
     DEBUG_PRINT("[send rts]successful complete\n");
     MPIDI_DBG_PRINTF((50, FCNAME, "exiting"));
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3_ISTARTRNDVMSG);
+    
+    ENQUEUE_RVIP_LIST(vc);
+    
     return mpi_errno;
 
 }
@@ -243,6 +246,9 @@
     MPIDI_CH3I_CR_unlock();
 #endif
     DEBUG_PRINT("[send rts]successful complete\n");
+
+    ENQUEUE_RVIP_LIST(vc);
+
     return mpi_errno;
 
 }
@@ -286,6 +292,8 @@
     MPIDI_CH3I_CR_unlock();
 #endif
 
+    ENQUEUE_RVIP_LIST(vc);
+
     return mpi_errno;
 }
 
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c	2006-11-10 20:07:37.000000000 +0100
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/channels/mrail/src/rdma/ch3_rndvtransfer.c	2007-03-26 17:02:05.000000000 +0200
@@ -28,6 +28,9 @@
 
 MPIDI_VC_t *flowlist;
 
+/* rendez-vous in progress list */
+MPIDI_VC_t *rvip_list = NULL;
+
 #undef DEBUG_PRINT
 #ifdef DEBUG
 #define DEBUG_PRINT(args...) \
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/src/ch3u_recvq.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/src/ch3u_recvq.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/src/ch3u_recvq.c	2006-08-04 16:03:58.000000000 +0200
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/src/ch3u_recvq.c	2007-03-26 17:06:22.000000000 +0200
@@ -503,3 +503,105 @@
     MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_FDP_OR_AEU);
     return rreq;
 }
+
+/*
+ * MPIDI_CH3U_Recvq_get_AVT()
+ *
+ * Get the Active VCs Table to know which VCs to poll. 
+ * The table is global, MPIDI_CH3U_Recvq_release_AVT must be called after use.
+ */
+#undef FUNCNAME
+#define FUNCNAME MPIDI_CH3U_Recvq_get_vcs
+#undef FCNAME
+#define FCNAME MPIDI_QUOTE(FUNCNAME)
+void MPIDI_CH3U_Recvq_get_AVT(MPIDI_VC_t *** vcs, int *count)
+{
+    static MPIDI_VC_t ** vcs_array = NULL;
+#ifdef VCS_DEBUG
+    static MPIDI_VC_t ** last_vcs_array = NULL;
+#endif
+    static int vcs_array_size = 16;
+    MPID_Request * rreq;
+    int i, n_vcs = 0;
+    MPIDI_VC_t *vc;
+
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_GET_AVT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_GET_AVT);
+    
+    /* FIXME Lock */
+    
+    if (vcs_array == NULL) { /* Done once */
+	vcs_array = (MPIDI_VC_t **)(malloc(sizeof(MPIDI_VC_t *) * vcs_array_size));
+#ifdef VCS_DEBUG
+	last_vcs_array = (MPIDI_VC_t **)(malloc(sizeof(MPIDI_VC_t *) * vcs_array_size));
+#endif
+    }
+    
+    rreq = recvq_posted_head;
+    while (rreq) {
+	if (rreq->dev.match.rank == -2) {
+	    /* ANY_SOURCE ! */
+	    *vcs = NULL;
+	    *count = -2;
+	    goto fn_exit;
+	}
+
+	/* request is not any_source */
+	MPIDI_Comm_get_vc(rreq->comm, rreq->dev.match.rank, &vc);
+
+	for (i=0; i<n_vcs; i++) {
+	    if (vcs_array[i] == vc)
+		goto next_req;
+	}
+	
+	/* we don't have this vc yet ; add it */
+	vcs_array[n_vcs++] = vc;
+	if (n_vcs > vcs_array_size) {
+	    vcs_array_size *= 2;
+	    vcs_array = (MPIDI_VC_t **)(realloc(vcs_array, sizeof(MPIDI_VC_t *) * vcs_array_size));
+#ifdef VCS_DEBUG
+	    last_vcs_array = (MPIDI_VC_t **)(realloc(last_vcs_array, sizeof(MPIDI_VC_t *) * vcs_array_size));
+#endif
+	}
+next_req:
+	rreq=rreq->dev.next;
+    }    
+    *count = n_vcs;
+    *vcs = vcs_array;
+
+#ifdef VCS_DEBUG
+    for (i=0; i<n_vcs; i++) {
+	if (vcs_array[i] != last_vcs_array[i]) {
+	    printf("vcs_array changed to :");
+	    for (i=0; i<n_vcs; i++)
+		printf("[%d] %p (%d)\n", i, vcs_array[i], vcs_array[i]->pg_rank);
+	    break;
+	}	    
+    }
+    
+    for (i=0; i<n_vcs; i++)
+	last_vcs_array[i] = vcs_array[i];
+#endif
+
+fn_exit:
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_GET_AVT);
+}
+
+/*
+ * MPIDI_CH3U_Recvq_release_AVT()
+ *
+ * Let other threads add/remove/get requests.
+ * 
+ */
+int MPIDI_CH3U_Recvq_release_AVT() 
+{
+    MPIDI_STATE_DECL(MPID_STATE_MPIDI_CH3U_RECVQ_RELEASE_AVT);
+
+    MPIDI_FUNC_ENTER(MPID_STATE_MPIDI_CH3U_RECVQ_RELEASE_AVT);
+    
+    /* FIXME unlock */
+
+    MPIDI_FUNC_EXIT(MPID_STATE_MPIDI_CH3U_RECVQ_RELEASE_AVT);
+    return 0;
+}
diff -ur mvapich2-0.9.8/src/mpid/osu_ch3/src/mpid_finalize.c mvapich2-0.9.8-tpp/src/mpid/osu_ch3/src/mpid_finalize.c
--- mvapich2-0.9.8/src/mpid/osu_ch3/src/mpid_finalize.c	2006-04-09 19:57:00.000000000 +0200
+++ mvapich2-0.9.8-tpp/src/mpid/osu_ch3/src/mpid_finalize.c	2007-03-26 17:02:05.000000000 +0200
@@ -83,6 +83,9 @@
 	MPIU_ERR_POP(mpi_errno);
     }
 		
+    /* Stop targeted polling since we do not enqueue requests the standard way */
+    rdma_targeted_polling = 0;
+    
     /*
      * Initiate close protocol for all active VCs
      */


More information about the mvapich-discuss mailing list