simple transpose issue

Hi,

I am doing a simple transpose operation as below:

const dblcmplx cmplx_1=make_cuDoubleComplex(1.0, 0.0);
const dblcmplx cmplx_0=make_cuDoubleComplex(0.0, 0.0);

dblcmplx * a, *b, * b_dev, * a_dev;

a = (dblcmplx*)malloc(sizeof(dblcmplx)32);
b = (dblcmplx*)malloc(sizeof(dblcmplx)23);

cudaMalloc((void**) &a_dev, sizeof(dblcmplx)32);
cudaMalloc((void**) &b_dev, sizeof(dblcmplx)23);

a[0] = make_cuDoubleComplex(1,1);
a[1] = make_cuDoubleComplex(2,2);
a[2] = make_cuDoubleComplex(3,3);
a[3] = make_cuDoubleComplex(4,4);
a[4] = make_cuDoubleComplex(5,5);
a[5] = make_cuDoubleComplex(6,6);

for (int i = 0; i< 3*2; i++){
printf(“%16.24lf %16.24lf\n”, creal(a[i]), cimag(a[i]));
}

cudaMemcpy(a_dev, a, sizeof(dblcmplx)32, cudaMemcpyHostToDevice);

cublasZgeam(blasHandle, CUBLAS_OP_T, CUBLAS_OP_N, 3, 2, &cmplx_1, a_dev, 3, &cmplx_0, a_dev, 3, b_dev, 3 );

cudaMemcpy(b, b_dev, sizeof(dblcmplx)23, cudaMemcpyDeviceToHost);

for (int i = 0; i< 2*3; i++){
printf(“%16.24lf %16.24lf\n”, creal(b[i]), cimag(b[i]));
}

The output of the above code is

1.000000000000000000000000 1.000000000000000000000000
4.000000000000000000000000 4.000000000000000000000000
0.000000000000000000000000 0.000000000000000000000000
2.000000000000000000000000 2.000000000000000000000000
5.000000000000000000000000 5.000000000000000000000000
0.000000000000000000000000 0.000000000000000000000000

I was expecting it to be

1.000000000000000000000000 1.000000000000000000000000
4.000000000000000000000000 4.000000000000000000000000
2.000000000000000000000000 2.000000000000000000000000
5.000000000000000000000000 5.000000000000000000000000
3.000000000000000000000000 3.000000000000000000000000
6.000000000000000000000000 6.000000000000000000000000

which is the transpose of matrix

1.000000000000000000000000 1.000000000000000000000000
2.000000000000000000000000 2.000000000000000000000000
3.000000000000000000000000 3.000000000000000000000000
4.000000000000000000000000 4.000000000000000000000000
5.000000000000000000000000 5.000000000000000000000000
6.000000000000000000000000 6.000000000000000000000000

what am I doing wrong? Please help.