hey
i’m working at a simple parallel bucket sort and one of my kernels keep returning me this error.
__global__ static void filler (int * poleDat, int * vyslednePole, int * pocty, int pocetPrvkov, int velkost, int pocetSubPostupnosti)
{
int i=0;
int j= 0;
int tmp=0;
int tx = threadIdx.x;
int ini = 0;
if(pocty[tx] != 0) {
for(i=0;i<tx;i++)
{
ini = ini + pocty[i]; //counting the position for the first element of the bucket
}
for (i=0;i<pocetPrvkov;i++) //looping through the array
{
if ((poleDat[i]>=tx*velkost) && (poleDat[i]<=((tx+1)*velkost))) //determin if the current element fits into the array
{
vyslednePole[ini+j]=poleDat[i]; //if it does, save it to the first free position in the bucket
j++; //position counter in the bucket
}
}
}
}
it seems that the error is caused by the line
vyslednePole[ini+j]=poleDat[i];
the variable vyslednePole is allocated in the host part this way
int * poleVysledne = AllocateOnDevice(pocetPrvkov);
int * AllocateOnDevice (int pocet)
{
int * Mdevice;
int size = pocet * sizeof(int);
cudaMalloc((void**)&Mdevice, size);
return Mdevice;
}
and then passed like an argument to the kernel. poleDat is allocated just the same way and also passed like an argument and the number of elemets is also pocetPrvkov so i see no problem here. the strange thing is that i keep getting this error even if i try to do just a simple copy of the array.
for (i=0;i<pocetPrvkov;i++) vyslednePole[i]=poleDat[i];
i would really appreciate any ideas what could it be caused with :">