Compute Sanitizer not able to detect memory leak when using cuMemAlloc and OpenACC

I found a bug from CUDA-memcheck and Compute Sanitizer, that they can’t detect memory leak when using cuMemAlloc and OpenACC.
compiler: nvhpc/22.5
MPI: OpenMPI/4.1.4
source code

#include <stdio.h>
#include <mpi.h>
#include <cuda.h>
#include <openacc.h>
#include <string.h>
#include <limits.h>

int rank;
enum {N = 10};
enum {GB = 1073741824}; //1024*1024*1024

// This will output the proper CUDA error strings
// in the event that a CUDA host call returns an error
#define checkCudaErrors(err)  __checkCudaErrors (err, __FILE__, __LINE__)

inline void __checkCudaErrors( CUresult err, const char *file, const int line )
{
    if( CUDA_SUCCESS != err) {
        fprintf(stderr,
                "CUDA Driver API error = %04d from file <%s>, line %i.\n",
                err, file, line );
        exit(-1);
    }
}

void print_mem_device() {
  // cudaMemGetInfo ( size_t* free, size_t* total )
  size_t free_bytes, total_bytes;
  cuMemGetInfo( &free_bytes, &total_bytes );
  printf("free: %zu GB, total: %zu GB.\n", free_bytes / GB, total_bytes / GB);
}

void testCudaDriverAPIOpenACC() {
  int *a = malloc(sizeof(int) * GB); // 4GB
  int *b = malloc(sizeof(int) * GB);
  int *c = malloc(sizeof(int) * GB);
  int *d = malloc(sizeof(int) * GB);
  int *e = malloc(sizeof(int) * GB);
  int in[N];
  int out[N];
  for (int i = 0; i < N; ++i) {
    in[i] = -1;
    out[i] = 1;
  }

  CUdeviceptr d_leak[5];
  int test = 0;
  print_mem_device();
  
#pragma acc data copy(a[0:GB])
  {
    // memory leak, d_data, d_leak is not explicitly freed.
    CUdeviceptr d_data;
    checkCudaErrors( cuMemAlloc(&d_leak[test], sizeof(int) * GB) );
    checkCudaErrors( cuMemAlloc(&d_data, sizeof(int) * N) );
    checkCudaErrors( cuMemcpyHtoD(d_data, in, sizeof(int) * N) );
    checkCudaErrors( cuMemcpyDtoH(out, d_data, sizeof(int) * N) );
  }
  print_mem_device();
  test++;

#pragma acc data copyin(b[0:GB])
  {
    // memory leak, d_leak is not explicitly freed.
    checkCudaErrors( cuMemAlloc(&d_leak[test], sizeof(int) * GB) );
  }
  print_mem_device();
  test++;

#pragma acc data copyout(c[0:GB])
  {
    // memory leak, d_leak is not explicitly freed.
    checkCudaErrors( cuMemAlloc(&d_leak[test], sizeof(int) * GB) );
  }
  print_mem_device();
  test++;

#pragma acc data create(d[0:GB])
  {
    // memory leak, d_leak is not explicitly freed.
    checkCudaErrors( cuMemAlloc(&d_leak[test], sizeof(int) * GB) );
  }
  print_mem_device();
  test++;

#pragma acc data copy(e[0:GB])
  {
    ;
  }
  // allocate GPU memory outside openacc directives
  checkCudaErrors( cuMemAlloc(&d_leak[test], sizeof(int) * GB) );
  print_mem_device();
}

int main() {
  int size;
  MPI_Init(NULL, NULL);
  MPI_Comm_rank(MPI_COMM_WORLD, &rank);
  MPI_Comm_size(MPI_COMM_WORLD, &size);

  if (rank == 0)
    printf("size: %d.\n", size);
  printf("rank: %d.\n", rank);

  testCudaDriverAPIOpenACC();          // compute sanitizer failed with segment fault, CUDA-memcheck detected no memleak

  MPI_Finalize();
}

the compilation script is

CFLAGS="-O0 -g -I${CUDA_ROOT}/include -acc=gpu -gpu=cc80,nordc,debug -Minfo"
LDFLAGS="-g -lnvToolsExt -lcuda -acc=gpu -gpu=cc80,nordc,debug"

mpicc -O0 -g -I${CUDA_ROOT}/include -acc=gpu -gpu=cc80,nordc,debug -Minfo -lnvToolsExt -lcuda -o mem_test mem_test.c

CUDA-memcheck results:

$ srun -n 1 cuda-memcheck --tool memcheck --leak-check full mem_test
$ size: 1
rank: 0
free: 0 GB, total: 0 GB.
free: 70 GB, total: 79 GB.
free: 66 GB, total: 79 GB.
free: 62 GB, total: 79 GB.
free: 58 GB, total: 79 GB.
free: 54 GB, total: 79 GB.
========== LEAK SUMMARY: 0 bytes leaked in 0 allocations

compute sanitizer results are very similar.
the memory leak inside d_leak was not detected.

I cannot reproduce your issue using the latest Docker image for nvhpc. Can you please try the following and confirm it works on your side?

$ cat exec.sh
#!/bin/sh

set -e
set -x

cd $(dirname $0)

HPC_LIBS_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/targets/x86_64-linux/lib/

mpicc -O0 -g -I${CUDA_ROOT}/include -acc=gpu -gpu=cc80,nordc,debug -Minfo -L$HPC_LIBS_DIR -lcuda -lnvToolsExt -o mem_test mem_test.c
mpirun -q --allow-run-as-root -n 1 compute-sanitizer --show-backtrace no --tool memcheck --leak-check full mem_test
$ sudo docker run --gpus all -it -v $(pwd):/workspace --rm nvcr.io/nvidia/nvhpc:23.1-devel-cuda12.0-ubuntu22.04 /workspace/exec.sh

====================
== NVIDIA HPC SDK ==
====================

NVIDIA HPC SDK version 23.1

Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES.  All rights reserved.

+ dirname /workspace/exec.sh
+ cd /workspace
+ HPC_LIBS_DIR=/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/targets/x86_64-linux/lib/
+ mpicc -O0 -g -I/include -acc=gpu -gpu=cc80,nordc,debug -Minfo -L/opt/nvidia/hpc_sdk/Linux_x86_64/23.1/cuda/12.0/targets/x86_64-linux/lib/ -lcuda -lnvToolsExt -o mem_test mem_test.c
testCudaDriverAPIOpenACC:
     51, Generating copy(a[:1073741824]) [if not already present]
     63, Generating copyin(b[:1073741824]) [if not already present]
     71, Generating copyout(c[:1073741824]) [if not already present]
     79, Generating create(d[:1073741824]) [if not already present]
     87, Generating copy(e[:1073741824]) [if not already present]
+ mpirun -q --allow-run-as-root -n 1 compute-sanitizer --show-backtrace no --tool memcheck --leak-check full mem_test
--------------------------------------------------------------------------
WARNING: Open MPI tried to bind a process but failed.  This is a
warning only; your job will continue, though performance may
be degraded.

  Local host:        16126427ff9a
  Application name:  /opt/nvidia/hpc_sdk/Linux_x86_64/23.1/compilers/bin/compute-sanitizer
  Error message:     failed to bind memory
  Location:          ../../../../../orte/mca/rtc/hwloc/rtc_hwloc.c:447

--------------------------------------------------------------------------
========= COMPUTE-SANITIZER
size: 1.
rank: 0.
free: 0 GB, total: 0 GB.
========= Program hit CUDA_ERROR_OUT_OF_MEMORY (error 2) due to "out of memory" on CUDA API call to cuMemAlloc_v2.
=========
CUDA Driver API error = 0002 from file <mem_test.c>, line 54.
========= Leaked 67108864 bytes at 0x7f3f88000000
=========
========= Leaked 4294967296 bytes at 0x7f3f8c000000
=========
========= Leaked 1024000 bytes at 0x7f40b3200000
=========
========= Leaked 1024000 bytes at 0x7f40b3400000
=========
========= Target application returned an error
========= LEAK SUMMARY: 4364124160 bytes leaked in 4 allocations
========= ERROR SUMMARY: 5 errors

We haven’t heard from you recently. We are going to close this topic in 2 weeks. If you have other issue, feel free to reopen or start a new topic. Thanks !

This topic was automatically closed after 10 days. New replies are no longer allowed.