I just want to read points from a file and get the distance between points in the map. To decreasing time, i use shared memory function, but my kernels doesn’t work anymore.Can anyone help me?
These are my codes,really simple:
#include<iostream>
#include<sstream>
#include<string>
#include<fstream>
const int maprow=32;
const int mapcolumn=32;
const int points=maprow*mapcolumn;
//const int maxnumber=9999;
//const int startpoint=1;
//const int endpoint=15;
void readfile(int* map)
{
std::ifstream read;
read.open("/home/wuhaoran/gridmap.txt");
for(int i=0;i<maprow;i++)
{
for(int j=0;j<mapcolumn;j++)
{
read>>map[maprow*i+j];
}
}
/*for(int i=0;i<points;i++)
{
std::cout<<map[i]<<" ";
}*/
}
__global__ void calDistance(int* dev_map, int* dev_distance)
{
int blockId = blockIdx.x + blockIdx.y * gridDim.x;
int threadID = blockId * (blockDim.x * blockDim.y)+ (threadIdx.y * blockDim.x) + threadIdx.x;
//int pointA=threadID/points;
//int pointB=threadID%points;
int pointA=blockId%gridDim.x*gridDim.x+threadIdx.x;
int pointB=blockId/gridDim.x*gridDim.x+threadIdx.y;//two points distance
__shared__ int mapmap[points];
__shared__ int distancedistance[points];
mapmap[threadIdx.x+threadIdx.y*blockDim.x]=dev_map[threadIdx.x+threadIdx.y*blockDim.x];
__syncthreads();
if(pointB==pointA)
{distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=0;}
else if(mapmap[pointB]==1||mapmap[pointA]==1)
{distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=10000;}
else if(pointB==pointA+32||pointB==pointA-32||pointB==pointA+1||pointB==pointA-1)
{
if(mapmap[pointB+1]==1||mapmap[pointB-1]==1||mapmap[pointB+32]==1||mapmap[pointB-32]==1)
distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=20;
else
distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=2;
}
else if(pointB==pointA+32+1||pointB==pointA-32+1||pointB==pointA+32-1||pointB==pointA-32-1)
{
if(mapmap[pointB+1]==1||mapmap[pointB-1]==1||mapmap[pointB+32]==1||mapmap[pointB-32]==1)
distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=200;
else
distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=3;
}
else
{distancedistance[threadIdx.x+threadIdx.y*blockDim.x]=10000;}
__syncthreads();
dev_distance[pointA+pointB*points]=distancedistance[threadIdx.x+threadIdx.y*blockDim.x];
}
int main()
{
int map[points]={0};
int distance[points*points]={0};
readfile(map);
//initialize the distance map
for(int i=0;i<points*points;i++)
{distance[i]=10000;}
int* dev_distance, *dev_map;
cudaMalloc( (void**)&dev_distance, points*points*sizeof(int) );
cudaMalloc( (void**)&dev_map, points*sizeof(int) );
cudaMemcpy(dev_map,map,points*sizeof(int),cudaMemcpyHostToDevice );
dim3 blockpergrid(32,32);
dim3 threadperblock(32,32);
calDistance<<<blockpergrid,threadperblock>>>(dev_map, dev_distance);
cudaMemcpy(distance,dev_distance,points*points*sizeof(int),cudaMemcpyDeviceToHost);
cudaDeviceSynchronize();
for(int i=0;i<(points*points);i++)
{
std::cout<<distance[i]<<" ";
if(i%points==31)
{std::cout<<std::endl;}
}
cudaFree(dev_distance);
cudaFree(dev_map);
}