CUBLAS cublasSgemv on jetson nano

This code to test the cublasSgemv routine on jetson nano inexplicably fails when the size is more. If I set M_ROWS = 2000 and M_COLUMNS = 1000 and V_ROWS = 1000, I get

File SgemvTester.cpp, Line 141: CUDA Runtime Error: unspecified launch failure

Otherwise it works,

const int M_ROWS = 200;    /* C Rows */
const int M_COLUMNS = 100; /* C Columns */
const int V_ROWS = 100;


/**
 * Initializes a fortran matrix using column wise
 */
void initializeFortranMatrix(float *M, const int N_Rows, const int N_Cols, int initVal)
{
    for (int col = 0; col < N_Cols; col++)
    {
          for (int row = 0; row < N_Rows; row++)
              {
	          M[col * N_Rows + row] = static_cast<float>(initVal);
          } 
    }
}

void initializeVector(float *V, const int N_Rows, int initVal)
{
    for (int row = 0; row < N_Rows; row++)
         V[row] = static_cast<float>(initVal);
}

int main(int argc, char **argv)
{
    float *M;
    float *V;
    float *Y;

    cublasHandle_t handle;

    checkCUBLAS(cublasCreate(&handle));

    /* Allocate managed storage */
    checkCuda(cudaMallocManaged(&M, sizeof(float) * M_ROWS * M_COLUMNS));
    checkCuda(cudaMallocManaged(&V, sizeof(float) * V_ROWS));
    checkCuda(cudaMallocManaged(&Y, sizeof(float) * V_ROWS));

    initializeFortranMatrix(M, M_ROWS, M_COLUMNS, 11); /* Assuming column-wise storage */
    initializeVector(V, V_ROWS, 1);                    /* Column */
    initializeVector(Y, V_ROWS, 0);                    /* Column */

    const float alpha = 1.0f;
    const float beta = 0.0f;


    checkCUBLAS(cublasSgemv(handle,
	                       CUBLAS_OP_N,
	                       M_ROWS, M_COLUMNS, &alpha, M, M_ROWS, V, 1, &beta, Y, 1));

    checkCuda(cudaDeviceSynchronize());

    checkCuda(cudaFree(M));
    checkCuda(cudaFree(V));
    checkCuda(cudaFree(Y));

    checkCuda(cudaDeviceReset());

    return 0;
}

The problem was the size of the resultant vector which should be M_ROW and not M_COLUMNS. Sort of confused because of the column major order of passing args.