[mvapich-discuss] greater than 32-bit malloc problems

Bill Barth bbarth at tacc.utexas.edu
Sat Dec 29 01:28:02 EST 2007


We were having some trouble getting mvapich (1 & 2) to let us allocate
more than 2GB (i.e., limit of a signed 32-bit integer) of memory on our
new machine. The problem manifests as an apparent infinite loop in
dreg.c, though I think it's just a really long loop due to a size
variable being cast from a signed 32-bit integer near the upper limit to
an unsigned 64-bit integer. I think I've tracked it down to a handful of
places where 'unsigned' is used rather than 'size_t' and 'int' is used
rather than 'intptr_t'. The sneaky part is that this works just fine on
ILP32 and ILP64 architectures but not LP64 aritectures. Below is a patch
for MVAPICH2 (against the 11-05-2007 nightly tarball) that seems to
alleviate our problems. Also below is a sample program that demonstrates
the problem. 

If this has been fixed since, I apologize for spamming the list and
would appreciate a pointer to the fixed code. I'd be interested in
feedback on whether I've fixed the problem in the right way and on
whether there are other places where I should look for similar problems.
Finally, I'd be grateful if somebody would port the same changes into
MVAPICH1 and push it into the nightly tarball.

Thanks,
Bill.

---------------patch---------------------
diff --exclude='*~' -r -u -U 2
./src/mpe2/src/graphics/include/mpetools.h
../mvapich2-1.0-2007-12-03/src/mpe2/src/graphics/include/mpetools.h
--- ./src/mpe2/src/graphics/include/mpetools.h  2007-12-28
13:34:11.782155746 -0600
+++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/graphics/include/mpetools.h
2007-07-25 20:07:38.000000000 -0500
@@ -18,10 +18,10 @@
 #include <stdlib.h>
 #endif
 
-#define MALLOC(a)    malloc((size_t)(a))
+#define MALLOC(a)    malloc((unsigned)(a))
 #define FREE(a)      free((char *)(a))
-#define CALLOC(a,b)    calloc((size_t)(a),(size_t)(b))
-#define REALLOC(a,b)   realloc(a,(size_t)(b))
+#define CALLOC(a,b)    calloc((unsigned)(a),(unsigned)(b))
+#define REALLOC(a,b)   realloc(a,(unsigned)(b))
 
 #define NEW(a)    (a *)MALLOC(sizeof(a))
 
diff --exclude='*~' -r -u -U 2 ./src/mpe2/src/logging/include/clog_mem.h
../mvapich2-1.0-2007-12-03/src/mpe2/src/logging/include/clog_mem.h
--- ./src/mpe2/src/logging/include/clog_mem.h   2007-12-28
13:34:11.782155746 -0600
+++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/logging/include/clog_mem.h
2007-07-25 20:07:38.000000000 -0500
@@ -12,7 +12,7 @@
 #if defined(MPIR_MEMDEBUG)
 /* Enable memory tracing.  This requires MPICH's mpid/util/tr2.c codes
*/
 #include "mpimem.h"             /* Chameleon memory debugging stuff */
-#define MALLOC(a)       MPID_trmalloc((size_t)(a),__LINE__,__FILE__)
+#define MALLOC(a)       MPID_trmalloc((unsigned)(a),__LINE__,__FILE__)
 #define FREE(a)         MPID_trfree(a,__LINE__,__FILE__)
 #define REALLOC(a,b)    realloc(a,b)
 #else
diff --exclude='*~' -r -u -U 2 ./src/mpe2/src/wrappers/src/mpe_proff.c
../mvapich2-1.0-2007-12-03/src/mpe2/src/wrappers/src/mpe_proff.c
--- ./src/mpe2/src/wrappers/src/mpe_proff.c     2007-12-28
13:34:11.782155746 -0600
+++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/wrappers/src/mpe_proff.c
2007-07-25 20:07:38.000000000 -0500
@@ -75,7 +75,7 @@
 */
 #if defined(MPIR_MEMDEBUG)
 /* Enable memory tracing.  This requires MPICH's mpid/util/tr2.c codes
*/
-#define MALLOC(a)    MPID_trmalloc((size_t)(a),__LINE__,__FILE__)
+#define MALLOC(a)    MPID_trmalloc((unsigned)(a),__LINE__,__FILE__)
 #define FREE(a)      MPID_trfree(a,__LINE__,__FILE__)
 
 #else
diff --exclude='*~' -r -u -U 2
./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.c
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
hooks.c
--- ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.c
2007-12-28 16:06:44.395942463 -0600
+++
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
hooks.c     2007-07-27 13:38:48.000000000 -0500
@@ -118,7 +118,7 @@
 
 #ifndef DISABLE_TRAP_SBRK
 
-void *mvapich2_sbrk(intptr_t delta)
+void *mvapich2_sbrk(int delta)
 {
     if (delta < 0) {
 
diff --exclude='*~' -r -u -U 2
./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.h
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
hooks.h
--- ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.h
2007-12-28 16:06:53.556016593 -0600
+++
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
hooks.h     2007-07-27 13:38:48.000000000 -0500
@@ -46,7 +46,7 @@
 #endif
 
 #ifndef DISABLE_TRAP_SBRK
-void *mvapich2_sbrk(intptr_t delta);
+void *mvapich2_sbrk(int delta);
 #endif /* DISABLE_TRAP_SBRK */
 
 #endif /* DISABLE_PTMALLOC */
diff --exclude='*~' -r -u -U 2
./src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_impl.h
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma
_impl.h
--- ./src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_impl.h
2007-12-28 13:34:11.802155907 -0600
+++
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma
_impl.h     2007-07-25 20:07:38.000000000 -0500
@@ -328,8 +328,8 @@
 #undef MALLOC
 #undef FREE
 
-#define MALLOC(a)    malloc((size_t)(a))
-#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
+#define MALLOC(a)    malloc((unsigned)(a))
+#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
 #define FREE(a)      free((char *)(a))
 #define NEW(a)    (a *)MALLOC(sizeof(a))
 #define STRDUP(a)   strdup(a)
diff --exclude='*~' -r -u -U 2
./src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_header.h
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/udapl/uda
pl_header.h
--- ./src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_header.h
2007-12-28 13:34:11.802155907 -0600
+++
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/udapl/uda
pl_header.h 2007-07-25 20:30:05.000000000 -0500
@@ -38,8 +38,8 @@
 #undef MALLOC
 #undef FREE
 /* src/env/initutil.c NEW not defined */
-#define MALLOC(a)    malloc((size_t)(a))
-#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
+#define MALLOC(a)    malloc((unsigned)(a))
+#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
 #define FREE(a)      free((char *)(a))
 #define NEW(a)    (a *)MALLOC(sizeof(a))
 #define STRDUP(a)      strdup(a)
diff --exclude='*~' -r -u -U 2
./src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_header.h
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi
_header.h
--- ./src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_header.h
2007-12-28 13:34:11.802155907 -0600
+++
../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi
_header.h   2007-07-25 20:30:05.000000000 -0500
@@ -35,8 +35,8 @@
 #undef MALLOC
 #undef FREE
 /* src/env/initutil.c NEW not defined */
-#define MALLOC(a)    malloc((size_t)(a))
-#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
+#define MALLOC(a)    malloc((unsigned)(a))
+#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
 #define FREE(a)      free((char *)(a))
 #define NEW(a)    (a *)MALLOC(sizeof(a))
 #define STRDUP(a)      strdup(a)
-------------------test code-------------------------
#ifdef PARALLEL
#  include "mpi.h"
#endif
#include <unistd.h>
#include <stdlib.h>
#include <stdio.h>
#include <signal.h>
#include <assert.h>
#include <sys/time.h>

int main(int argc, char *argv[])
{
  int np=-1, i_am=-1;

#ifdef PARALLEL
  MPI_Init (&argc,&argv);
  MPI_Comm_size (MPI_COMM_WORLD, &np);
  MPI_Comm_rank (MPI_COMM_WORLD, &i_am);
#endif

  if (argc<2)
    {
#   ifdef PARALLEL
      MPI_Abort(MPI_COMM_WORLD,1);
#   else
      exit(1);
#   endif
    }


  size_t size=atoll(argv[1]);

  printf("%03d: About to allocate %llu KB\n",i_am,
size*8/1024);fflush(stdout);
  double *buf=malloc(sizeof(double)*size);
  if (buf == NULL)
    {
      printf("%03d: Failed to allocate!\n",i_am);fflush(stdout);
#   ifdef PARALLEL
      MPI_Abort(MPI_COMM_WORLD,1);
#   else
      exit(1);
#   endif
    }
  printf("%03d: Done.\n",i_am);fflush(stdout);
  
  printf("%03d: About to fill.\n",i_am);fflush(stdout);
  for (int i=0; i < size; ++i)
    buf[i]=-123.;
  printf("%03d: Done.\n",i_am);fflush(stdout);

  sleep(5);
#ifdef PARALLEL
  MPI_Barrier(MPI_COMM_WORLD);
#endif
  free(buf);
#ifdef PARALLEL
  MPI_Finalize();
#endif

}

--
Bill Barth, Ph.D., Manager HPC Applications Group 
bbarth at tacc.utexas.edu        |   Phone: (512) 232-7069
Office: ROC 1.405             |   Fax:   (512) 475-9445




More information about the mvapich-discuss mailing list