Excuse me, I would like to ask the following questions about the use of the cublasZgemmBatched function

I called the cublasSgemmBatched function to divide a 181 * 35 * 3 matrix and a 35 * 1 * 3 matrix into three blocks for batch processing, that is, into sub matrices of 181 * 35 and 35 * 1, for a total of three times. But I don’t understand the parameters very well, and the code I write always doesn’t run.
The code is as follows:
int BF_num=181;
int M_Array=35;
int NF=1;

    cuDoubleComplex* a = (cuDoubleComplex*)malloc(BF_num * M_Array * 3 * sizeof(cuDoubleComplex));
    cuDoubleComplex* b = (cuDoubleComplex*)malloc(M_Array * NF * 3 * sizeof(cuDoubleComplex));
    cuDoubleComplex* c = (cuDoubleComplex*)malloc(BF_num * NF * 3 * sizeof(cuDoubleComplex));
  
    int m = BF_num, n = NF, k = M_Array;
    for(int i = 0; i < BF_num * M_Array * 3; i++)
             a[i].x=i,a[i].y=i;
    for(int j = 0; j < 1 * M_Array * 3; j++)
             b[j].x=j,b[j].x=j; 
    for(int j = 0; j < 1 * BF_num * 3; j++)
             c[j].x=j,c[j].y=j;; 
    cublasHandle_t handle_p; 
    cublasCreate(&handle_p);
  
   cuDoubleComplex alpha = {1.0,0};//cublas
   cuDoubleComplex beta = {0,0};//cublas
    int batch=3;
    int Ida = batch, Idb = batch, Idc = batch;
   cuDoubleComplex *a_array[batch];
   cuDoubleComplex *b_array[batch];
   cuDoubleComplex *c_array[batch];
    for (int i = 0; i < 3; i++) {
            a_array[i] = a + i * BF_num ;
            b_array[i] = b + i * M_Array;
            c_array[i] = c + i * BF_num;
    }

#pragma acc enter data copyin(a[0:BF_num * M_Array * batch],b[0:M_Array * NF * batch],c[0:BF_num * NF * batch],a_array[0:batch],b_array[0:batch],c_array[0:batch])
#pragma acc host_data use_device(a,b,c,a_array,b_array,c_array)
cublasZgemmBatched(handle_p,CUBLAS_OP_N,CUBLAS_OP_N,n,m,k,&alpha,b_array,batch,a_array,batch,&beta,c_array,batch,batch);
#pragma acc exit data copyout(c[0:BF_num * 1 * 3])

   for(int j = 0; j < BF_num / 3; j++)
       printf("c[%d]=%f\n",j,c[j]);

See if this example helps.