hi everyone,
i am trying to create a matrix and trying to copy that matrix into device memory, manipulate it and send it back out to the host to check if i sent my pointers properly…but i can’t for the life of me figure out what is wrong with my code.
i’m either getting all zero’s back or random numbers back…
can someone take a look and help me out? thanks!
[codebox]global void transpose(int* matrix){
//int i = threadIdx.x;
matrix[threadIdx.x] = 1;
}
/************************************************************
************/
/* HelloCUDA */
/************************************************************
************/
int main(int argc, char* argv)
{
if(!InitCUDA()) {
return 0;
}
int N = 15;
int rows, columns, matrix_length, stop;
size_t pitch;
printf("CUDA Matrix Transpose v1.0\n");
printf("===========================\n");
printf("This program will randomly generate numbers for a M x N matrix and transpose it.\n");
printf("Please enter the dimensions of the matrix dimensions:\n\n");
printf("Number of Rows: ");
scanf("%d", &rows);
printf("\nNumber of Columns: ");
scanf("%d", &columns);
matrix_length = rows * columns;
int* h_matrix;
h_matrix = (int*)malloc(sizeof(int) * matrix_length);
int* d_matrix;
cudaMalloc((void**) &d_matrix, sizeof(int) * matrix_length);
for(int i = 0; i < matrix_length; i++)
{
int temp = rand() % 100;
h_matrix[i] = temp;
}
printf("CPU\n===\n");
for (int i = 0; i < matrix_length;){
printf("| ");
for (int j = 0; j < columns; j++){
printf("%d ", h_matrix[j]);
}
printf("|");
printf("\n");
i += columns;
}
cudaMemcpy(d_matrix, h_matrix, sizeof(h_matrix) * matrix_length, cudaMemcpyHostToDevice);
transpose<<<1, matrix_length>>>(d_matrix);
cudaMemcpy(h_matrix, d_matrix, sizeof(d_matrix) * matrix_length, cudaMemcpyDeviceToHost);
printf("\nGPU\n===\n");
for (int i = 0; i < matrix_length;){
printf("| ");
for (int j = 0; j < columns; j++){
printf("%d ", d_matrix[j]);
}
printf("|");
printf("\n");
i += columns;
}[/codebox]