Hi everybody
I’m trying to pass a 2D matrix of 0’s to device to calculate in the gpu a simple operation:
for example if I have a matrix[height][width] where heigh = 2 an width = 2
matrix[0][0] = 0
matrix[0][1] = 1
matrix[1][0] = 2
matrix[1][1] = 3
Finally I want copy the results from device to host and print results. The code works fine if height = widht, but it doesn’t work if height != width
Any Ideas?
Thank you. I’m desperate :(
#include <stdio.h>
#include <stdlib.h>
#include <cuda.h>
#include <curand_kernel.h>
__global__ void MyKernel(float** dev_matrix, size_t pitch, int width, int height)
{
int number = 0;
for (int i = 0; i < height; ++i)
{
float* row = (float*)((char*)dev_matrix + i*pitch);
for (int j = 0; j < width; ++j)
{
row[j] = number;
number++;
}
}
}
int main (int argc , char * argv [])
{
int width = 4, height = 2, i, j;
float matrix[width][height];
float **dev_matrix;
size_t pitch;
printf("\nMATRIX MANIPULATION\n");
for (i = 0; i < height; i++)
for (j = 0; j < width; j++)
matrix[i][j] = 0.0;
printf("Matrix in host memory\n");
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
printf("%f ", matrix[i][j]);
printf("\n");
}
cudaMallocPitch(&dev_matrix, &pitch, width * sizeof(float), height);
cudaMemcpy2D(dev_matrix, pitch, matrix, width * sizeof(float), width * sizeof(float), height, cudaMemcpyHostToDevice);
MyKernel<<<1, 1>>>(dev_matrix, pitch, width, height);
cudaMemcpy2D(matrix, width * sizeof(float), dev_matrix, pitch, width * sizeof(float), height, cudaMemcpyDeviceToHost);
printf("Matrix after calculate elements in the gpu\n");
for (i = 0; i < height; i++)
{
for (j = 0; j < width; j++)
printf("%f ", matrix[i][j]);
printf("\n");
}
cudaFree(dev_matrix);
return 0;
}