[mvapich-discuss] greater than 32-bit malloc problems

Matthew Koop koop at cse.ohio-state.edu
Sat Dec 29 18:56:50 EST 2007


Bill,

Thanks for sending this patch and investigating the issue. We'll test
things out on our end and apply changes to both MVAPICH and MVAPICH2 in
the near future.

Thanks,

Matt

On Sat, 29 Dec 2007, Bill Barth wrote:

> We were having some trouble getting mvapich (1 & 2) to let us allocate
> more than 2GB (i.e., limit of a signed 32-bit integer) of memory on our
> new machine. The problem manifests as an apparent infinite loop in
> dreg.c, though I think it's just a really long loop due to a size
> variable being cast from a signed 32-bit integer near the upper limit to
> an unsigned 64-bit integer. I think I've tracked it down to a handful of
> places where 'unsigned' is used rather than 'size_t' and 'int' is used
> rather than 'intptr_t'. The sneaky part is that this works just fine on
> ILP32 and ILP64 architectures but not LP64 aritectures. Below is a patch
> for MVAPICH2 (against the 11-05-2007 nightly tarball) that seems to
> alleviate our problems. Also below is a sample program that demonstrates
> the problem.
>
> If this has been fixed since, I apologize for spamming the list and
> would appreciate a pointer to the fixed code. I'd be interested in
> feedback on whether I've fixed the problem in the right way and on
> whether there are other places where I should look for similar problems.
> Finally, I'd be grateful if somebody would port the same changes into
> MVAPICH1 and push it into the nightly tarball.
>
> Thanks,
> Bill.
>
> ---------------patch---------------------
> diff --exclude='*~' -r -u -U 2
> ./src/mpe2/src/graphics/include/mpetools.h
> ../mvapich2-1.0-2007-12-03/src/mpe2/src/graphics/include/mpetools.h
> --- ./src/mpe2/src/graphics/include/mpetools.h  2007-12-28
> 13:34:11.782155746 -0600
> +++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/graphics/include/mpetools.h
> 2007-07-25 20:07:38.000000000 -0500
> @@ -18,10 +18,10 @@
>  #include <stdlib.h>
>  #endif
>
> -#define MALLOC(a)    malloc((size_t)(a))
> +#define MALLOC(a)    malloc((unsigned)(a))
>  #define FREE(a)      free((char *)(a))
> -#define CALLOC(a,b)    calloc((size_t)(a),(size_t)(b))
> -#define REALLOC(a,b)   realloc(a,(size_t)(b))
> +#define CALLOC(a,b)    calloc((unsigned)(a),(unsigned)(b))
> +#define REALLOC(a,b)   realloc(a,(unsigned)(b))
>
>  #define NEW(a)    (a *)MALLOC(sizeof(a))
>
> diff --exclude='*~' -r -u -U 2 ./src/mpe2/src/logging/include/clog_mem.h
> ../mvapich2-1.0-2007-12-03/src/mpe2/src/logging/include/clog_mem.h
> --- ./src/mpe2/src/logging/include/clog_mem.h   2007-12-28
> 13:34:11.782155746 -0600
> +++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/logging/include/clog_mem.h
> 2007-07-25 20:07:38.000000000 -0500
> @@ -12,7 +12,7 @@
>  #if defined(MPIR_MEMDEBUG)
>  /* Enable memory tracing.  This requires MPICH's mpid/util/tr2.c codes
> */
>  #include "mpimem.h"             /* Chameleon memory debugging stuff */
> -#define MALLOC(a)       MPID_trmalloc((size_t)(a),__LINE__,__FILE__)
> +#define MALLOC(a)       MPID_trmalloc((unsigned)(a),__LINE__,__FILE__)
>  #define FREE(a)         MPID_trfree(a,__LINE__,__FILE__)
>  #define REALLOC(a,b)    realloc(a,b)
>  #else
> diff --exclude='*~' -r -u -U 2 ./src/mpe2/src/wrappers/src/mpe_proff.c
> ../mvapich2-1.0-2007-12-03/src/mpe2/src/wrappers/src/mpe_proff.c
> --- ./src/mpe2/src/wrappers/src/mpe_proff.c     2007-12-28
> 13:34:11.782155746 -0600
> +++ ../mvapich2-1.0-2007-12-03/src/mpe2/src/wrappers/src/mpe_proff.c
> 2007-07-25 20:07:38.000000000 -0500
> @@ -75,7 +75,7 @@
>  */
>  #if defined(MPIR_MEMDEBUG)
>  /* Enable memory tracing.  This requires MPICH's mpid/util/tr2.c codes
> */
> -#define MALLOC(a)    MPID_trmalloc((size_t)(a),__LINE__,__FILE__)
> +#define MALLOC(a)    MPID_trmalloc((unsigned)(a),__LINE__,__FILE__)
>  #define FREE(a)      MPID_trfree(a,__LINE__,__FILE__)
>
>  #else
> diff --exclude='*~' -r -u -U 2
> ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.c
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
> hooks.c
> --- ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.c
> 2007-12-28 16:06:44.395942463 -0600
> +++
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
> hooks.c     2007-07-27 13:38:48.000000000 -0500
> @@ -118,7 +118,7 @@
>
>  #ifndef DISABLE_TRAP_SBRK
>
> -void *mvapich2_sbrk(intptr_t delta)
> +void *mvapich2_sbrk(int delta)
>  {
>      if (delta < 0) {
>
> diff --exclude='*~' -r -u -U 2
> ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.h
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
> hooks.h
> --- ./src/mpid/osu_ch3/channels/mrail/src/gen2/mem_hooks.h
> 2007-12-28 16:06:53.556016593 -0600
> +++
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/mem_
> hooks.h     2007-07-27 13:38:48.000000000 -0500
> @@ -46,7 +46,7 @@
>  #endif
>
>  #ifndef DISABLE_TRAP_SBRK
> -void *mvapich2_sbrk(intptr_t delta);
> +void *mvapich2_sbrk(int delta);
>  #endif /* DISABLE_TRAP_SBRK */
>
>  #endif /* DISABLE_PTMALLOC */
> diff --exclude='*~' -r -u -U 2
> ./src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_impl.h
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma
> _impl.h
> --- ./src/mpid/osu_ch3/channels/mrail/src/gen2/rdma_impl.h
> 2007-12-28 13:34:11.802155907 -0600
> +++
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/gen2/rdma
> _impl.h     2007-07-25 20:07:38.000000000 -0500
> @@ -328,8 +328,8 @@
>  #undef MALLOC
>  #undef FREE
>
> -#define MALLOC(a)    malloc((size_t)(a))
> -#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
> +#define MALLOC(a)    malloc((unsigned)(a))
> +#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
>  #define FREE(a)      free((char *)(a))
>  #define NEW(a)    (a *)MALLOC(sizeof(a))
>  #define STRDUP(a)   strdup(a)
> diff --exclude='*~' -r -u -U 2
> ./src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_header.h
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/udapl/uda
> pl_header.h
> --- ./src/mpid/osu_ch3/channels/mrail/src/udapl/udapl_header.h
> 2007-12-28 13:34:11.802155907 -0600
> +++
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/udapl/uda
> pl_header.h 2007-07-25 20:30:05.000000000 -0500
> @@ -38,8 +38,8 @@
>  #undef MALLOC
>  #undef FREE
>  /* src/env/initutil.c NEW not defined */
> -#define MALLOC(a)    malloc((size_t)(a))
> -#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
> +#define MALLOC(a)    malloc((unsigned)(a))
> +#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
>  #define FREE(a)      free((char *)(a))
>  #define NEW(a)    (a *)MALLOC(sizeof(a))
>  #define STRDUP(a)      strdup(a)
> diff --exclude='*~' -r -u -U 2
> ./src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_header.h
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi
> _header.h
> --- ./src/mpid/osu_ch3/channels/mrail/src/vapi/vapi_header.h
> 2007-12-28 13:34:11.802155907 -0600
> +++
> ../mvapich2-1.0-2007-12-03/src/mpid/osu_ch3/channels/mrail/src/vapi/vapi
> _header.h   2007-07-25 20:30:05.000000000 -0500
> @@ -35,8 +35,8 @@
>  #undef MALLOC
>  #undef FREE
>  /* src/env/initutil.c NEW not defined */
> -#define MALLOC(a)    malloc((size_t)(a))
> -#define CALLOC(a,b)  calloc((size_t)(a),(size_t)(b))
> +#define MALLOC(a)    malloc((unsigned)(a))
> +#define CALLOC(a,b)  calloc((unsigned)(a),(unsigned)(b))
>  #define FREE(a)      free((char *)(a))
>  #define NEW(a)    (a *)MALLOC(sizeof(a))
>  #define STRDUP(a)      strdup(a)
> -------------------test code-------------------------
> #ifdef PARALLEL
> #  include "mpi.h"
> #endif
> #include <unistd.h>
> #include <stdlib.h>
> #include <stdio.h>
> #include <signal.h>
> #include <assert.h>
> #include <sys/time.h>
>
> int main(int argc, char *argv[])
> {
>   int np=-1, i_am=-1;
>
> #ifdef PARALLEL
>   MPI_Init (&argc,&argv);
>   MPI_Comm_size (MPI_COMM_WORLD, &np);
>   MPI_Comm_rank (MPI_COMM_WORLD, &i_am);
> #endif
>
>   if (argc<2)
>     {
> #   ifdef PARALLEL
>       MPI_Abort(MPI_COMM_WORLD,1);
> #   else
>       exit(1);
> #   endif
>     }
>
>
>   size_t size=atoll(argv[1]);
>
>   printf("%03d: About to allocate %llu KB\n",i_am,
> size*8/1024);fflush(stdout);
>   double *buf=malloc(sizeof(double)*size);
>   if (buf == NULL)
>     {
>       printf("%03d: Failed to allocate!\n",i_am);fflush(stdout);
> #   ifdef PARALLEL
>       MPI_Abort(MPI_COMM_WORLD,1);
> #   else
>       exit(1);
> #   endif
>     }
>   printf("%03d: Done.\n",i_am);fflush(stdout);
>
>   printf("%03d: About to fill.\n",i_am);fflush(stdout);
>   for (int i=0; i < size; ++i)
>     buf[i]=-123.;
>   printf("%03d: Done.\n",i_am);fflush(stdout);
>
>   sleep(5);
> #ifdef PARALLEL
>   MPI_Barrier(MPI_COMM_WORLD);
> #endif
>   free(buf);
> #ifdef PARALLEL
>   MPI_Finalize();
> #endif
>
> }
>
> --
> Bill Barth, Ph.D., Manager HPC Applications Group
> bbarth at tacc.utexas.edu        |   Phone: (512) 232-7069
> Office: ROC 1.405             |   Fax:   (512) 475-9445
>
>
> _______________________________________________
> mvapich-discuss mailing list
> mvapich-discuss at cse.ohio-state.edu
> http://mail.cse.ohio-state.edu/mailman/listinfo/mvapich-discuss
>



More information about the mvapich-discuss mailing list