Matrix of struct

Hi! i’m newbie in CUDA programming, i’m using a matrix of struct (index_line, element), i sort this matrix and i want to count the number of equal elements.

typedef struct cpl { //struct (index ,element)
int nbr, id;
__global__ void k_comp(cpl **dmat_pr  , int *dk_list) {
	int i = blockIdx.x ;
	int k = 0;
	while (dmat_pr[k][threadIdx.x].nbr == dmat_pr[k+1][threadIdx.x].nbr) {
	dk_list[threadIdx.x] = k;
int main()
        int dim[2]; 
	FILE *file = fopen("C:\Users\user\Documents\Zekri\test.txt", "r");
	int nbr_col = dim[0], int nbr_line = dim[1];
        cpl** mat_pr = (cpl**)malloc(nbr_line * sizeof(cpl*)); 
	for (int i = 0; i<nbr_line; ++i)
	mat_pr[i] = (cpl*)malloc(nbr_col * sizeof(cpl));
	create_mat_cpl(mat, mat_pr, nbr_col, nbr_line);
	display_mat(mat_pr, nbr_col, nbr_line);
	matrix_sort(mat_pr, nbr_col, nbr_line);
	printf("----------------------Matrix sorted-------------------------\n");
	int hk_list [30]; 
        int *dk_list;
	cpl **dmat_pr = (cpl**)malloc(nbr_line * sizeof(cpl*));
	for (int i = 0; i < nbr_line; ++i)
        dmat_pr[i] = (cpl*)malloc(nbr_col * sizeof(cpl))
	cudaMalloc((void **)&dmat_pr, sizeof(cpl));
	cudaMemcpy(dmat_pr, mat_pr, nbr_line * nbr_col * sizeof(cpl), cudaMemcpyHostToDevice);
	cudaMalloc(&dk_list, sizeof(int));
	k_comp <<<1, nbr_col >> >(dmat_pr,dk_list) ; 
        cudaMemcpy(hk_list, dk_list, nbr_col * sizeof(int), cudaMemcpyHostToDevice);
	for (int i = 0; i < nbr_col; i++)
		printf("K[%d] = %d\n", i, hk_list[i]);
	return 0;

when i run the program it displays anything,can any one of you tell me if the problem is on the allocation of the matrix or on the struct that i’m using ?
Thank you

In line 36, you’re using cudaMemcpyHostToDevice, not cudaMemcpyDeviceToHost.

Always check return values for errors, that can help identify typos like this.

Thank you for the reply, i fixed the line 36, but the problem is that the matrix is neither allocated nor copied in the device .