Cuda allocate device memory failed

shunkang1997 · January 31, 2019, 1:48am

My device is RTX 2080 Ti and I try to write Cuda program on this device. However, when I try to allocate memory from the device, I always get the “out of memory” error from a specific command. I make sure that I have enough GPU memory and the most strange thing is that I can allocate more GPU memory for other points only except that. My code is as the following.

float caltrace_gpu(float* W, float* WTWInv, float * data, int N, long long M,
                int C, int num_block, float* mean, float* rv,  cublasHandle_t handle)
{
    // Set cuda context
    cudaError_t cudaStat;
    cublasStatus_t stat;
    
    // Initialize device pointer
    float* d_data;
    float* d_W;
    float* d_WTWInv;
    float* d_v;
    float* d_result;
    float* d_result2;
    float* d_temp;
    float* d_one;
    float* d_mean;
    float prod;
    float* d_cby1;
    float* d_cby2;
    float* d_cby3;
    float* d_R;
    float* d_data_final;
    float* d_temp_final;
    //Initialize one vector in order to perform element wise sub
    float* vector_ones = new float[N];
    for (long long i = 0; i < N; i++)
    {
        vector_ones[i] = 1.0f;
    }
    // The coefficient needed in computation
    float al = 1.0f;
    float al_n = -1.0f;
    float beta = 0.0f;
    
    // settings for the blocks
    int P = num_block;
    int SNIPs = static_cast<int>(M / P + 1);
    int SNIPs_final = M - SNIPs * (P - 1);
    
    // Allocate memeory on device
    cudaStat = cudaMalloc((void**)&d_data, SNIPs * N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 1" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_W, N * C * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 2" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_WTWInv, C * C * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 3" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_v, N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 4" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_result2, N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 5" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_result, N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 6" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_temp, SNIPs * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 7" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_one, N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 8" << endl;
        return EXIT_FAILURE;
    }
    // cudaStat = cudaMalloc((void**)&d_mean, SNIPs * sizeof(float));
    cudaStat = cudaMalloc((void**)&d_cby1, C * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 9" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_cby2, C * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 10" << endl;
        return EXIT_FAILURE;
    }
    cudaStat = cudaMalloc((void**)&d_cby3, N * sizeof(float));
    if (cudaStat != cudaSuccess) {
        cout << "device memory allocation failed 11" << endl;
        return EXIT_FAILURE;
    }

I always fail to allocate memory for d_WTWInv. However, when I delete this command, I can successfully allocate memory for the latter pointers. The C is equal to 20.

Topic		Replies	Views
cuda_driver failed_to_allocate problem CUDA_ERROR_OUT_OF_MEMORY CUDA Programming and Performance	0	1749	April 18, 2019
cudaMalloc3DArray out of memory can not allocate the available amount of memory CUDA Programming and Performance	3	1813	January 31, 2011
CUDA_ERROR_OUT_OF_MEMORY: out of memory on Nvidia Quadro 8000, with more than enough available memory Frameworks tensorflow	3	2839	October 6, 2020
How to solve memory allocation problem in cuda?? CUDA Programming and Performance	4	31024	February 2, 2015
How to clear cuda errors? CUDA Programming and Performance	7	438	June 14, 2024
bug in memory allocation? CUDA Programming and Performance	6	4166	May 24, 2012
Why nvidia-smi, nor cudaMemGetInfo do not throw error with over-occupied device memory? CUDA Programming and Performance cuda	6	574	June 8, 2023
Extreme performance degradation (<1/100) due to allocate unified memory area? CUDA Programming and Performance	0	514	June 19, 2017
Trouble allocating device memory for a struct CUDA Programming and Performance cuda	8	590	March 8, 2022
Cuda C++ Out of memory CUDA Programming and Performance cuda	4	28	April 16, 2025

Cuda allocate device memory failed

Related topics