cudaMalloc failing, 2D array error code 11


I’m have attempted to implement a 2D array in CUDA as follows:

u_int32_t **device_fb = 0;

    u_int32_t **host_fb = 0;

cudaMalloc((void **)&device_fb, (block_size*grid_size)*sizeof(u_int32_t*));

for(int i=0; i<(block_size*grid_size); i++)


        cudaMalloc((void **)&host_fb[i], numOpsPerCore*sizeof(u_int32_t));


    cudaMemcpy(device_fb, host_fb, (block_size*grid_size)*sizeof(u_int32_t*), cudaMemcpyHostToDevice);

On testing, host_fb is NULL. In addition, when I grab the error code for the first iteration of “cudaMalloc((void **)&host_fb[i], numOpsPerCore*sizeof(u_int32_t));” I get cudaErrorInvalidValue. What am I doing wrong? Thanks!