Hi,
when trying to find a specific number in an array (smallest) it just works for several array lengths and not for all ones.
For example, if N=3560 or less it works but if N=4560 or bigger it does not works…
Is there any limits i am not respecting?
Note: I get the return (minor number) on the first element of the array, on exit…
const int N=4560;
const int threadsperblock=512;
const int blocksPerGrid=((N/2)/threadsperblock)+1;
__global__ void minor(int *array)
{
int tid=threadIdx.x + (blockIdx.x * blockDim.x);
int nMiddle=(N/2)+1;
while(nMiddle > 0)
{
if((tid < nMiddle) && (tid+nMiddle < N))
{
if(array[tid+nMiddle] < array[tid])
{
array[tid]=array[tid+nMiddle];
}
}
__syncthreads();
if(nMiddle >= 3)
nMiddle=(nMiddle/2) + 1;
else
nMiddle=(nMiddle/2);
}
}
...
for (int i=0; i<N; i++)
{
a[i] = N-i;
printf("%d ",N-i);
}
ret=cudaMemcpy(dev_a,a,N*sizeof(int),cudaMemcpyHostToDevice);
if(ret != cudaSuccess)
{
printf("Error cudaMemcpy 1\n");
exit(1);
}
minor<<<blocksPerGrid,threadsperblock>>>(dev_a);
...
Env: windows 7, NVIdia QUADRO FX580
Thanks
JoseBB