Hello, I have problem with my algorithms I can not work well with threads. I got a problem, publish a comparison of a sorting method in C with a sorting method in CUDA. Following is the algorithm that tried to use CUDA, but he did not compile anymore. I need help or tips on how to work with the threads inside the sorting method. This model was compiling but I returned to the numbers entered. Algorithm:
Thank you in advance.
#include<stdlib.h>
#include<stdio.h>
void __global__ insertion(int *v, int tam){
int i = blockDim.x * blockIdx.x + threadIdx.x;
int j = blockDim.y * blockIdx.y + threadIdx.y;
int a;
if(i<=tam){
a = v[i];
j = i;
while(v[j-1] > a){
v[j] = v[j-1];
j--;
}
v[j] = a;
}
}
int main(void){
int tamanho;
int *h_vetor;
int i,j;
int *d_vetor;
printf("Digite o tamanho do vetor: ");
scanf("%d",&tamanho);
h_vetor = (int*)malloc(sizeof(int));
cudaMalloc((void**)&d_vetor,sizeof(int));
for(i=0;i<tamanho;i++){
printf("Digite os numeros:");
scanf("%d",&h_vetor[i]);
}
cudaMemcpy(d_vetor,h_vetor,tamanho,cudaMemcpyHostToDevice);
dim3 dimBlock(16,16);
dim3 dimGrid((tamanho * dimBlock.x)/dimBlock.x,(tamanho * dimBlock.y)/dimBlock.y);
insertion<<<dimGrid,dimBlock>>>(d_vetor,tamanho);
cudaMemcpy(h_vetor,d_vetor,tamanho,cudaMemcpyDeviceToHost);
printf("Vetor ordenado: ");
for(j=0;j<tamanho;j++){
printf(" [ %d ] ",h_vetor[j]);
}
return 0;
}