#include <iostream>
#include <cuda.h>
#include <stdlib.h>
using namespace std;
__global__ void function(int a[], int size)
{
if(threadIdx.y < size)
{
a[threadIdx.y] = threadIdx.y;
}
}
void checkCUDAError(const char *msg){
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
int main()
{
const int N = 10;
int a[N];
int *dev_a;
cudaMalloc((void**) &dev_a, N * sizeof(int));
cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
dim3 block = (1,10,1);
function<<<1,block>>>(dev_a,N);
cudaThreadSynchronize();
checkCUDAError("kernel invocation");
cudaMemcpy(a, dev_a, N * sizeof(int), cudaMemcpyDeviceToHost);
// Check for any CUDA errors
checkCUDAError("memcpy");
for(int i=0; i < N; i++)
{
cout << a[i] << endl;
}
cudaFree(dev_a);
cout << "Correct!" << endl;
cin.get();
return 0;
}
So i have the following code and the way i called it was:
dim3 block = (1,10,1);
function<<<1,block>>>(dev_a,N);
So i should have 1 block with 10 threads in the Y direction and in the kernel i try to get the threads to write their IDs to the array however wrong numbers are written there!!!
WHY??
The error function does not return anything and “Correct” is printed at the end.
I appreciate any help. If anyone could run the code on their machine and post here the ouput i would appreciate it as well.
#include <iostream>
#include <cuda.h>
#include <stdlib.h>
using namespace std;
__global__ void function(int a[], int size)
{
if(threadIdx.y < size)
{
a[threadIdx.y] = threadIdx.y;
}
}
void checkCUDAError(const char *msg){
cudaError_t err = cudaGetLastError();
if( cudaSuccess != err)
{
fprintf(stderr, "Cuda error: %s: %s.\n", msg, cudaGetErrorString( err) );
exit(EXIT_FAILURE);
}
}
int main()
{
const int N = 10;
int a[N];
int *dev_a;
cudaMalloc((void**) &dev_a, N * sizeof(int));
cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);
dim3 block = (1,10,1);
function<<<1,block>>>(dev_a,N);
cudaThreadSynchronize();
checkCUDAError("kernel invocation");
cudaMemcpy(a, dev_a, N * sizeof(int), cudaMemcpyDeviceToHost);
// Check for any CUDA errors
checkCUDAError("memcpy");
for(int i=0; i < N; i++)
{
cout << a[i] << endl;
}
cudaFree(dev_a);
cout << "Correct!" << endl;
cin.get();
return 0;
}
So i have the following code and the way i called it was:
dim3 block = (1,10,1);
function<<<1,block>>>(dev_a,N);
So i should have 1 block with 10 threads in the Y direction and in the kernel i try to get the threads to write their IDs to the array however wrong numbers are written there!!!
WHY??
The error function does not return anything and “Correct” is printed at the end.
I appreciate any help. If anyone could run the code on their machine and post here the ouput i would appreciate it as well.