threadIdx.y question

thecoder3 · January 14, 2012, 8:16pm

#include <iostream>

#include <cuda.h>

#include <stdlib.h>

using namespace std;

__global__ void function(int a[], int size)

{

	if(threadIdx.y < size)

	{

		a[threadIdx.y] = threadIdx.y;

	}

}

void checkCUDAError(const char *msg){    

	cudaError_t err = cudaGetLastError();    

	

	if( cudaSuccess != err)

	{       

		fprintf(stderr, "Cuda error: %s: %s.\n", msg,   cudaGetErrorString( err) );        

		exit(EXIT_FAILURE);    

	}                         

 }

int main()

{

	const int N = 10;

	int a[N];

	int *dev_a;

	

	cudaMalloc((void**) &dev_a, N * sizeof(int));

	cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);

	

	dim3 block = (1,10,1);

	function<<<1,block>>>(dev_a,N);

	cudaThreadSynchronize();

	checkCUDAError("kernel invocation");

	

	cudaMemcpy(a, dev_a, N * sizeof(int), cudaMemcpyDeviceToHost);

	

	// Check for any CUDA errors    

	checkCUDAError("memcpy");

	

	for(int i=0; i < N; i++)

	{

		cout << a[i] << endl;

	}

	

	cudaFree(dev_a);

	

	cout << "Correct!" << endl;

	cin.get();

	return 0;

}

So i have the following code and the way i called it was:

dim3 block = (1,10,1);

function<<<1,block>>>(dev_a,N);

So i should have 1 block with 10 threads in the Y direction and in the kernel i try to get the threads to write their IDs to the array however wrong numbers are written there!!!

WHY??

The error function does not return anything and “Correct” is printed at the end.

I appreciate any help. If anyone could run the code on their machine and post here the ouput i would appreciate it as well.

Thanks in advance.

thecoder3 · January 14, 2012, 8:16pm

#include <iostream>

#include <cuda.h>

#include <stdlib.h>

using namespace std;

__global__ void function(int a[], int size)

{

	if(threadIdx.y < size)

	{

		a[threadIdx.y] = threadIdx.y;

	}

}

void checkCUDAError(const char *msg){    

	cudaError_t err = cudaGetLastError();    

	

	if( cudaSuccess != err)

	{       

		fprintf(stderr, "Cuda error: %s: %s.\n", msg,   cudaGetErrorString( err) );        

		exit(EXIT_FAILURE);    

	}                         

 }

int main()

{

	const int N = 10;

	int a[N];

	int *dev_a;

	

	cudaMalloc((void**) &dev_a, N * sizeof(int));

	cudaMemcpy(dev_a, a, N * sizeof(int), cudaMemcpyHostToDevice);

	

	dim3 block = (1,10,1);

	function<<<1,block>>>(dev_a,N);

	cudaThreadSynchronize();

	checkCUDAError("kernel invocation");

	

	cudaMemcpy(a, dev_a, N * sizeof(int), cudaMemcpyDeviceToHost);

	

	// Check for any CUDA errors    

	checkCUDAError("memcpy");

	

	for(int i=0; i < N; i++)

	{

		cout << a[i] << endl;

	}

	

	cudaFree(dev_a);

	

	cout << "Correct!" << endl;

	cin.get();

	return 0;

}

So i have the following code and the way i called it was:

dim3 block = (1,10,1);

function<<<1,block>>>(dev_a,N);

So i should have 1 block with 10 threads in the Y direction and in the kernel i try to get the threads to write their IDs to the array however wrong numbers are written there!!!

WHY??

The error function does not return anything and “Correct” is printed at the end.

I appreciate any help. If anyone could run the code on their machine and post here the ouput i would appreciate it as well.

Thanks in advance.

mfatica · January 14, 2012, 9:13pm

Change it to:
dim3 block = dim3(1,10,1);

and it will work.

mfatica · January 14, 2012, 9:13pm

Change it to:
dim3 block = dim3(1,10,1);

and it will work.

thecoder3 · January 14, 2012, 11:44pm

It did work indeed. Thanks for the input. Would you mind explaining why my previous version failed?

thecoder3 · January 14, 2012, 11:44pm

It did work indeed. Thanks for the input. Would you mind explaining why my previous version failed?

devkec · January 21, 2012, 6:24pm

your previous version used the comma operator, it was like writing:

int x = 1,10,1;

this really works! the comma operator when not used in function calls gives you the last element.

rapastranac · September 21, 2015, 7:04pm

It works thanks

Topic		Replies	Views
Simple question on passing to the kernel CUDA Programming and Performance	15	3391	January 15, 2012
How to realise 3 dimension calculation? CUDA Programming and Performance	9	242	August 28, 2023
cudaMemcpy problem CUDA Programming and Performance	2	1573	June 29, 2012
CUDA code giving wrong result CUDA Programming and Performance	0	399	May 4, 2020
Getting an Error Using CudaMalloc3d CUDA Programming and Performance	10	2459	December 10, 2015
Thread Synchronisation in parallel array write CUDA Programming and Performance	4	571	April 1, 2017
Cuda makes my pc crazy CUDA Programming and Performance	10	7386	September 16, 2010
PLease debug the code! CUDA Programming and Performance	2	434	July 15, 2011
I can't not get true answer at 3D array calculation CUDA Programming and Performance	12	1334	January 13, 2017
Strange Error in cuda CUDA Programming and Performance	2	487	March 6, 2017

threadIdx.y question

Related topics