cuda image mirror error

HI I’m a new cuda learner, i’d like to read a 512*512 image(lena.bmp) and mirror display it ,

this is my code, i can finish compiler it, but i got segmatantion error,

can anyone help me!!

thanks!!!

#include “stdio.h”
#include “stdlib.h”
#include “cuda.h”

int bmp_read(unsigned char *, int , int , char *);
int bmp_write(unsigned char *, int , int , char *);
global void Mirror(unsigned char *, unsigned char *, int , int);

int main() {
unsigned char *ori, *tar;
unsigned char *d_ori,*d_tar;

int xsize = 512;
int ysize = 512;
int n=xsize*ysize*3;

double t_start,t_end;

ori = (unsigned char *)malloc((size_t)xsize * ysize * 3);
tar = (unsigned char *)malloc((size_t)xsize * ysize * 3);

bmp_read(ori, xsize, ysize, "lena.bmp");

cudaMalloc((void**)&d_tar,n*sizeof(char));
cudaMalloc((void**)&d_ori,n*sizeof(char));
cudaMemcpy(d_ori,ori,n*sizeof(char),cudaMemcpyHostToDevice);
cudaMemcpy(d_tar,tar,n*sizeof(char),cudaMemcpyHostToDevice);    

t_start=clock(); 

dim3 dimGrid(32,96);
dim3 dimBlock(16,16);


Mirror<<<dimGrid,dimBlock>>>(d_tar, d_ori, xsize ,ysize);   
t_end=clock();

cudaMemcpy(tar,d_tar,n*sizeof(char),cudaMemcpyDeviceToHost);

bmp_write(d_tar, xsize, ysize, "lena_cuda");
cudaFree(d_tar);
cudaFree(d_ori);
printf("run time:%lfs",(double)((t_end-t_start)/CLOCKS_PER_SEC));

}

int bmp_read(unsigned char *image, int xsize, int ysize, char *filename) {
char fname_bmp[128];
sprintf(fname_bmp, “%s.bmp”, filename);

FILE *fp;
if (!(fp = fopen(fname_bmp, "rb"))) 
  return -1;
  
unsigned char header[54];
fread(header, sizeof(unsigned char), 54, fp);
fread(image, sizeof(unsigned char), (size_t)(long)xsize * ysize * 3, fp);

fclose(fp);
return 0;

}

int bmp_write(unsigned char *image, int xsize, int ysize, char *filename) {
unsigned char header[54] = {
0x42, 0x4d, 0, 0, 0, 0, 0, 0, 0, 0,
54, 0, 0, 0, 40, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 24, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0
};

long file_size = (long)xsize * (long)ysize * 3 + 54;
header[2] = (unsigned char)(file_size &0x000000ff);
header[3] = (file_size >> 8) & 0x000000ff;
header[4] = (file_size >> 16) & 0x000000ff;
header[5] = (file_size >> 24) & 0x000000ff;

long width = xsize;
header[18] = width & 0x000000ff;
header[19] = (width >> 8) &0x000000ff;
header[20] = (width >> 16) &0x000000ff;
header[21] = (width >> 24) &0x000000ff;

long height = ysize;
header[22] = height &0x000000ff;
header[23] = (height >> 8) &0x000000ff;
header[24] = (height >> 16) &0x000000ff;
header[25] = (height >> 24) &0x000000ff;

char fname_bmp[128];
sprintf(fname_bmp, "%s.bmp", filename);

FILE *fp;
if (!(fp = fopen(fname_bmp, "wb"))) 
  return -1;
  
fwrite(header, sizeof(unsigned char), 54, fp);
fwrite(image, sizeof(unsigned char), (size_t)(long)xsize * ysize * 3, fp);

fclose(fp);
return 0;

}

global void Mirror(unsigned char *ori,unsigned char *tar,int xsize,int ysize)

{
int n=xsizeysize3;
int x=blockIdx.xblockDim.x+threadIdx.x;
int y=blockIdx.y
blockDim.y+threadIdx.y;
int index=y*xsize+x;
if(index<=n)
{
tar[index]=ori[n-index];
}
}

bmp_write(d_tar, xsize, ysize, "lena_cuda");

Shouldn’t it be ‘tar’ instead of ‘d_tar’?

MK

P.S.
Use CUDA events for any measurements (CUDA Events).