Hi,
I am learning cuda and trying to implement a 2D matrix. However, my code doesn’t work so far and I couldn’t figure out the problem. So, I am posting my code here hoping to get some help.
The code is simple: it initiate a matrix and assign each element’s value according to its thread ID. The code compiles fine by hang the system whenever I run it.
Thanks!
#include "./common/book.h"
#define WIDTH 10
#define HEIGHT 10
typedef struct
{
float *data;
int *width;
} matrixStruct;
__device__ void SetMatElement(matrixStruct *mat, int x, int y, float val)
{
int width;
width = *(mat->width);
*(mat->data+y*width+x) = val;
}
__global__ void SetMat(matrixStruct *mat)
{
int x = threadIdx.x+blockIdx.x*blockDim.x;
int y = threadIdx.y+blockIdx.y*blockDim.y;
int offset = x+y*blockDim.x*gridDim.x;
SetMatElement(mat, x, y, (float)offset);
}
int main(int argc, char *argv[])
{
matrixStruct *mat;
int width;
float *data;
data = (float*)malloc(10*10*sizeof(float));
for(int i = 0; i < WIDTH*HEIGHT; i++)
{
data[i] = 0.0f;
}
width = WIDTH;
HANDLE_ERROR(cudaMalloc((void**)&mat, sizeof(matrixStruct)));
HANDLE_ERROR(cudaMalloc((void**)&(mat->data), WIDTH*HEIGHT*sizeof(float)));
HANDLE_ERROR(cudaMemcpy(mat->data, data, WIDTH*HEIGHT*sizeof(float), cudaMemcpyHostToDevice));
HANDLE_ERROR(cudaMalloc((void**)&(mat->width), sizeof(float)));
HANDLE_ERROR(cudaMemcpy(mat->width, &width, sizeof(int), cudaMemcpyHostToDevice));
dim3 grids(WIDTH/2,HEIGHT/2);
dim3 threads(2,2);
SetMat<<<grids,threads>>>(mat);
HANDLE_ERROR(cudaMemcpy(&width, mat->width, sizeof(int), cudaMemcpyDeviceToHost));
printf("width = %d\n", width);
HANDLE_ERROR(cudaMemcpy(data, mat->data, WIDTH*HEIGHT*sizeof(float), cudaMemcpyDeviceToHost));
for(int i = 0; i < WIDTH; i++)
{
printf("data[%d] = %f\n",i,data[i]);
}
HANDLE_ERROR(cudaFree(mat->data));
HANDLE_ERROR(cudaFree(mat->width));
HANDLE_ERROR(cudaFree(mat));
free(data);
system("pause");
return(0);
}