[mvapich-discuss] Memory leaks were detected when checking with cuda-memcheck

Yussuf Ali yussuf.ali at jaea.go.jp
Wed Oct 10 00:28:02 EDT 2018


Dear MVAPICH-GDR developers and users,

 

we were using the cuda-memcheck tool to check for memory leaks in a small
example program which uses MVAPCH-GDR 2.3a. 

At the end of the program several leaks were detected, however we are not
sure where these leaks come from. Is something wrong with this MPI program?

 

We set MV2_USE_CUDA=1 and run the program is executed with the following
command using two MPI processes : 

mpiexec cuda-memcheck --leak-check full program a.out

 

According to the NVIDIA documentation cudaDeviceReset() is necessary in
order to detect memory leaks, so this function call was inserted after
MPI_Finalize. 

 

#include <stdio.h>

#include <cuda.h>

#include "mpi.h"

 

int main(int argc, char *argv[])

{

  MPI_Init(&argc, &argv);

  int rank = 0;

  MPI_Comm_rank(MPI_COMM_WORLD, &rank);

  cudaSetDevice(rank);

 

  double *s_buf;

  double *r_buf;

  cudaMalloc(&s_buf,sizeof(double));

  cudaMalloc(&r_buf,sizeof(double));

 

  int dst = -1;

  if(rank==0)dst = 1;

  else dst = 0;

  double data = (rank == 0 ? 0.0 : 1.0);

  printf("Rank: %i Data is: %f \n", rank, data);

 

  cudaMemcpy(s_buf, &data, sizeof(double),cudaMemcpyHostToDevice);

  MPI_Request req[2];

  MPI_Irecv(r_buf, 1, MPI_DOUBLE, dst, 123, MPI_COMM_WORLD, &req[0]);

  MPI_Isend(s_buf, 1, MPI_DOUBLE, dst, 123, MPI_COMM_WORLD, &req[1]);

 

  MPI_Waitall(2, req, MPI_STATUSES_IGNORE);

 

  double check;

  cudaMemcpy(&check,r_buf,sizeof(double),cudaMemcpyDeviceToHost);

  MPI_Barrier(MPI_COMM_WORLD);

  printf("Rank: %i Received: %f \n", rank, check);

 

  cudaFree(s_buf);

  cudaFree(r_buf);

  cudaDeviceSynchronize();

  MPI_Finalize();

  cudaDeviceReset();

  return 0;

}

 

The output of cuda-memcheck is:

 

========= CUDA-MEMCHECK

========= Leaked 524288 bytes at 0x7fff98d80000

=========     Saved host backtrace up to driver entry point at cudaMalloc
time

=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x17f)
[0x22bedf]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so [0x3cd0e0]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x31b73]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x10d7b]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
(cudaMalloc + 0x178) [0x42138]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cudaipc_allocate_ipc_region + 0x119) [0x3c79e9]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cuda_init_dynamic + 0x31d) [0x3c175d]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (is_device_buffer
+ 0x124) [0x3c19a4]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPID_Irecv +
0x1e0) [0x35a130]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPI_Irecv +
0x5a5) [0x2df4d5]

=========     Host Frame:./a.out [0x1171]

=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5)
[0x21b15]

=========     Host Frame:./a.out [0xf69]

=========

========= Leaked 524288 bytes at 0x7fff98d00000

=========     Saved host backtrace up to driver entry point at cudaMalloc
time

=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x17f)
[0x22bedf]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so [0x3cd0e0]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x31b73]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x10d7b]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
(cudaMalloc + 0x178) [0x42138]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cudaipc_allocate_ipc_region + 0x119) [0x3c79e9]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cuda_init_dynamic + 0x31d) [0x3c175d]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (is_device_buffer
+ 0x124) [0x3c19a4]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPID_Irecv +
0x1e0) [0x35a130]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPI_Irecv +
0x5a5) [0x2df4d5]

=========     Host Frame:./a.out [0x1171]

=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5)
[0x21b15]

=========     Host Frame:./a.out [0xf69]

=========

========= LEAK SUMMARY: 1048576 bytes leaked in 2 allocations

========= ERROR SUMMARY: 2 errors

========= CUDA-MEMCHECK

========= Leaked 524288 bytes at 0x7fff98c80000

=========     Saved host backtrace up to driver entry point at cudaMalloc
time

=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x17f)
[0x22bedf]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so [0x3cd0e0]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x31b73]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x10d7b]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
(cudaMalloc + 0x178) [0x42138]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cudaipc_allocate_ipc_region + 0x119) [0x3c79e9]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cuda_init_dynamic + 0x31d) [0x3c175d]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (is_device_buffer
+ 0x124) [0x3c19a4]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPID_Irecv +
0x1e0) [0x35a130]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPI_Irecv +
0x5a5) [0x2df4d5]

=========     Host Frame:./a.out [0x1171]

=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5)
[0x21b15]

=========     Host Frame:./a.out [0xf69]

=========

========= Leaked 524288 bytes at 0x7fff98c00000

=========     Saved host backtrace up to driver entry point at cudaMalloc
time

=========     Host Frame:/lib64/libcuda.so.1 (cuMemAlloc_v2 + 0x17f)
[0x22bedf]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so [0x3cd0e0]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x31b73]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
[0x10d7b]

=========     Host Frame:/lustre/app/acc/cuda/9.0.176/lib64/libcudart.so.9.0
(cudaMalloc + 0x178) [0x42138]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cudaipc_allocate_ipc_region + 0x119) [0x3c79e9]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so
(cuda_init_dynamic + 0x31d) [0x3c175d]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (is_device_buffer
+ 0x124) [0x3c19a4]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPID_Irecv +
0x1e0) [0x35a130]

=========     Host
Frame:/lustre/app/mvapich2-gdr/ofed4.2/gnu/lib64/libmpi.so (MPI_Irecv +
0x5a5) [0x2df4d5]

=========     Host Frame:./a.out [0x1171]

=========     Host Frame:/lib64/libc.so.6 (__libc_start_main + 0xf5)
[0x21b15]

=========     Host Frame:./a.out [0xf69]

=========

========= LEAK SUMMARY: 1048576 bytes leaked in 2 allocations

========= ERROR SUMMARY: 2 errors

 

How can we resolve this issue? 

 

Thank you for your help,

Yussuf 

 

 

 

 

-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://mailman.cse.ohio-state.edu/pipermail/mvapich-discuss/attachments/20181010/9c138eee/attachment-0001.html>


More information about the mvapich-discuss mailing list