problem is probably very obvious but it’s my first cuda program but i can’t figure out what’s wrong with it. It works fine with 32000 elements in array but when I’m running it with 64000 I’m getting errors when coping data from device do host. Probably I’ve made some when accessing global memory. Running in emulation mode it works fine. I’m using GF 8600M GT as cuda device.
#include <iostream>
#include <vector>
#include <iostream>
template <typename T,std::size_t COUNT>
class memblock {
public:
memblock() : m_ptr(NULL) {
if(cudaMalloc((void**)&m_ptr,sizeof(T)*COUNT) != cudaSuccess)
throw(std::bad_alloc());
}
~memblock() {
cudaFree(m_ptr);
}
void send(T* tab) {
if(cudaMemcpy(m_ptr,tab,sizeof(T)*COUNT,cudaMemcpyHostToDevice) != cudaSuccess)
std::cerr<<"send error\n";
}
void get(T* tab) {
if(cudaMemcpy(tab,m_ptr,sizeof(T)*COUNT,cudaMemcpyDeviceToHost) != cudaSuccess)
std::cerr<<"get error\n";
}
operator T* () { return m_ptr; }
private:
T* m_ptr;
};
struct __align__(16) point {
float x;
float y;
float r;
float any;
};
ostream& operator<< (ostream& strm,const point& pt) {
strm<<"| "<<pt.x<<' '<<pt.y<<' '<<pt.r<<' '<<pt.any<<" |";
return strm;
}
__host__ __device__ bool test(const point& pt1,const point& pt2) {
const float a = (pt1.x-pt2.x)*(pt1.x-pt2.x);
const float b = (pt1.y-pt2.y)*(pt1.y-pt2.y);
const float c = (pt1.r+pt2.r)*(pt1.r+pt2.r);
return ( a + b ) < c;
}
__global__ void compute(point* points,const unsigned size) {
const unsigned pos = threadIdx.x + blockIdx.x * blockDim.x;
point current = points[pos];
for(unsigned i=0;i<size;++i) {
if( (i != pos) && test(current,points[i]) ) current.any += 1;
}
points[pos] = current;
}
int main() {
const unsigned count = 64000;
std::vector<point> tab(count);
std::vector<point> tab1(count);
for(unsigned i=0;i<count;++i) {
tab[i].x = rand()%100;
tab[i].y = rand()%100;
tab[i].r = rand()%10;
tab[i].any = 0;
}
memblock<point,count> pts;
pts.send(&tab[0]);
compute<<<count/320,320>>>(pts,count);
pts.get(&tab1[0]);
float total = 0;
for(unsigned i=0;i<count;++i)
total += tab1[i].any;
std::cout<<total<<'\n';
std::cout<<tab1[count-1]<<'\n';
return 0;
}