hi
i am trying to modify the in a way that i can specify number of threads, the is i get the error from cudaMemcpy with value of 4.
when i specify the thread with value of arraySize/2 it is ok but …
and i have no idea what is the problem.
__global__ void swapOnKernel(int *a, int size, int thread)
{
int i = blockDim.x * blockIdx.x + threadIdx.x * 2;
i = i* thread;
int cacheFirst;
int cacheSecond;
int cacheThird;
for (int k = 0; k < thread; k++) {
for (int j = 0; j < size / 2 + 1; j++) {
if (i + 1 < size) {
cacheFirst = a[i];
cacheSecond = a[i + 1];
if (cacheFirst > cacheSecond) {
int temp = cacheFirst;
a[i] = cacheSecond;
cacheSecond = a[i + 1] = temp;
}
}
if (i + 2 < size) {
cacheThird = a[i + 2];
if (cacheSecond > cacheThird) {
int temp = cacheSecond;
a[i + 1] = cacheThird;
a[i + 2] = temp;
}
}
__syncthreads();
}
i++;
__syncthreads();
}
}
this fuction is a bubble sort
kernel.cu (5.02 KB)