/* Im trying to solve convolution problem but kernel is not launching in this program*/
#include<stdio.h>
#include<stdlib.h>
#include<cuda.h>
#include<math.h>
global void convo_kernel(int *d_input, int *d_output, int n);
int main()
{
int i,n = 16;
int *h_input = (int )malloc(nsizeof(int));
int *h_output = (int )malloc(nsizeof(int));
int d_input, d_output;
cudaMalloc((void **)&d_input, nsizeof(int));
cudaMalloc((void **)&d_output,nsizeof(int));
// printf("\n Input Array: “);
for(i = 0; i < n; i++)
{
h_input[i] = 1;
//printf(” %d ", h_input[i]);
}
cudaMemcpy(d_input, h_input, n*sizeof(int), cudaMemcpyHostToDevice);
int Block = 4;
int Threads = 4;
convo_kernel<<<Block,Threads>>>(d_input, d_output, n);
return(0);
}
global void convo_kernel(int *d_input, int *d_output, int n)
{
int i = blockIdx.x * blockDim.x + threadIdx.x;
printf(“\n i = %d”,i);
}