# Question, Algoritm Help with kernel invocation

Hello, I have problem with my algorithms I can not work well with threads. I got a problem, publish a comparison of a sorting method in C with a sorting method in CUDA. Following is the algorithm that tried to use CUDA, but he did not compile anymore. I need help or tips on how to work with the threads inside the sorting method. This model was compiling but I returned to the numbers entered. Algorithm:

``````#include<stdlib.h>

#include<stdio.h>

void __global__ insertion(int *v, int tam){

int i = blockDim.x * blockIdx.x + threadIdx.x;

int j = blockDim.y * blockIdx.y + threadIdx.y;

int a;

if(i<=tam){

a = v[i];

j = i;

while(v[j-1] > a){

v[j] = v[j-1];

j--;

}

v[j] = a;

}

}

int main(void){

int tamanho;

int *h_vetor;

int i,j;

int *d_vetor;

printf("Digite o tamanho do vetor: ");

scanf("%d",&tamanho);

h_vetor = (int*)malloc(sizeof(int));

cudaMalloc((void**)&d_vetor,sizeof(int));

for(i=0;i<tamanho;i++){

printf("Digite os numeros:");

scanf("%d",&h_vetor[i]);

}

cudaMemcpy(d_vetor,h_vetor,tamanho,cudaMemcpyHostToDevice);

dim3 dimBlock(16,16);

dim3 dimGrid((tamanho * dimBlock.x)/dimBlock.x,(tamanho * dimBlock.y)/dimBlock.y);

insertion<<<dimGrid,dimBlock>>>(d_vetor,tamanho);

cudaMemcpy(h_vetor,d_vetor,tamanho,cudaMemcpyDeviceToHost);

for(j=0;j<tamanho;j++){

printf(" [ %d ] ",h_vetor[j]);

}

return 0;

}
``````

Hello, I have problem with my algorithms I can not work well with threads. I got a problem, publish a comparison of a sorting method in C with a sorting method in CUDA. Following is the algorithm that tried to use CUDA, but he did not compile anymore. I need help or tips on how to work with the threads inside the sorting method. This model was compiling but I returned to the numbers entered. Algorithm:

``````#include<stdlib.h>

#include<stdio.h>

void __global__ insertion(int *v, int tam){

int i = blockDim.x * blockIdx.x + threadIdx.x;

int j = blockDim.y * blockIdx.y + threadIdx.y;

int a;

if(i<=tam){

a = v[i];

j = i;

while(v[j-1] > a){

v[j] = v[j-1];

j--;

}

v[j] = a;

}

}

int main(void){

int tamanho;

int *h_vetor;

int i,j;

int *d_vetor;

printf("Digite o tamanho do vetor: ");

scanf("%d",&tamanho);

h_vetor = (int*)malloc(sizeof(int));

cudaMalloc((void**)&d_vetor,sizeof(int));

for(i=0;i<tamanho;i++){

printf("Digite os numeros:");

scanf("%d",&h_vetor[i]);

}

cudaMemcpy(d_vetor,h_vetor,tamanho,cudaMemcpyHostToDevice);

dim3 dimBlock(16,16);

dim3 dimGrid((tamanho * dimBlock.x)/dimBlock.x,(tamanho * dimBlock.y)/dimBlock.y);

insertion<<<dimGrid,dimBlock>>>(d_vetor,tamanho);

cudaMemcpy(h_vetor,d_vetor,tamanho,cudaMemcpyDeviceToHost);

for(j=0;j<tamanho;j++){

printf(" [ %d ] ",h_vetor[j]);

}

return 0;

}
``````