cuda on quadro NVS 285? simple kernel doesn't work...

Hi,

i currently work in a company who doesn’t allow me to download the cuda SDK. Therefore i can’t launch the device query test and the memory bandwidth test.

So i wrote a (very) simple kernel, which build without any problem but doens’t give me the expected results. Instead of i get a zero filled vector.

This kernel compute the sum of two vectors of size N (floating point precision), and stores the result in a third vector.

The code is very simple so i guess i’ve not made mistake in it. I don’t understand what could be wrong… perhaps a harware problem, or an inattention mistake…

I use the toolkit v3.2 and a nvidia driver 6.14.12.5957 which date is 01/09/2010 under windows XP.

Thank you very much for the attention you’ll give to my request.

R.Portalez

Here is my code :

main.cpp

[codebox]#include

extern “C” void addGPU(int N, float* a, float* b, float* c) ;

int main(int argc, char** argv)

{

int N = 5 ;

float* a = (float*) malloc(N*sizeof(int)) ;

float* b = (float*) malloc(N*sizeof(int)) ;

float* cc = (float*) malloc(N*sizeof(int)) ;

for (int i = 0 ; i < N ; ++i)

{

	a[i] = float(i) ;

	b[i] = float(i) ;

	cc[i] = 0.f ;

}

addGPU(N, a, b, cc) ;

return 0 ;

}

[/codebox]

kernel.cu

[codebox]global void kernel_addGPU(int N, float* a, float* b, float* c)

{

int step = blockDim.x*gridDim.x ;

for (int k = threadIdx.x + blockIdx.x*blockDim.x ; k < N ; k += step)

{

	c[k] = a[k] + b[k] ;

}

}

extern “C” void addGPU(int N, float* a, float* b, float* c)

{

int size = N*sizeof(float) ;

float *d_a, *d_b, *d_c ;

cudaMalloc((void**) &d_a, size) ;

cudaMalloc((void**) &d_b, size) ;

cudaMalloc((void**) &d_c, size) ;

cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice) ;

cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice) ;

kernel_addGPU <<< 1, 8 >>> (N, d_a, d_b, d_c) ;

cudaThreadSynchronize () ;

cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost) ;

cudaFree(d_a) ;

cudaFree(d_b) ;

cudaFree(d_c) ;

}[/codebox]

Hi,

i currently work in a company who doesn’t allow me to download the cuda SDK. Therefore i can’t launch the device query test and the memory bandwidth test.

So i wrote a (very) simple kernel, which build without any problem but doens’t give me the expected results. Instead of i get a zero filled vector.

This kernel compute the sum of two vectors of size N (floating point precision), and stores the result in a third vector.

The code is very simple so i guess i’ve not made mistake in it. I don’t understand what could be wrong… perhaps a harware problem, or an inattention mistake…

I use the toolkit v3.2 and a nvidia driver 6.14.12.5957 which date is 01/09/2010 under windows XP.

Thank you very much for the attention you’ll give to my request.

R.Portalez

Here is my code :

main.cpp

[codebox]#include

extern “C” void addGPU(int N, float* a, float* b, float* c) ;

int main(int argc, char** argv)

{

int N = 5 ;

float* a = (float*) malloc(N*sizeof(int)) ;

float* b = (float*) malloc(N*sizeof(int)) ;

float* cc = (float*) malloc(N*sizeof(int)) ;

for (int i = 0 ; i < N ; ++i)

{

	a[i] = float(i) ;

	b[i] = float(i) ;

	cc[i] = 0.f ;

}

addGPU(N, a, b, cc) ;

return 0 ;

}

[/codebox]

kernel.cu

[codebox]global void kernel_addGPU(int N, float* a, float* b, float* c)

{

int step = blockDim.x*gridDim.x ;

for (int k = threadIdx.x + blockIdx.x*blockDim.x ; k < N ; k += step)

{

	c[k] = a[k] + b[k] ;

}

}

extern “C” void addGPU(int N, float* a, float* b, float* c)

{

int size = N*sizeof(float) ;

float *d_a, *d_b, *d_c ;

cudaMalloc((void**) &d_a, size) ;

cudaMalloc((void**) &d_b, size) ;

cudaMalloc((void**) &d_c, size) ;

cudaMemcpy(d_a, a, size, cudaMemcpyHostToDevice) ;

cudaMemcpy(d_b, b, size, cudaMemcpyHostToDevice) ;

kernel_addGPU <<< 1, 8 >>> (N, d_a, d_b, d_c) ;

cudaThreadSynchronize () ;

cudaMemcpy(c, d_c, size, cudaMemcpyDeviceToHost) ;

cudaFree(d_a) ;

cudaFree(d_b) ;

cudaFree(d_c) ;

}[/codebox]

I found an answer

my driver is too old. And the most recent driver downloadable is only compatible with the 3.1 cuda toolkit.

Thanks
rportalez

I found an answer

my driver is too old. And the most recent driver downloadable is only compatible with the 3.1 cuda toolkit.

Thanks
rportalez

So Can I try CUDA programming on a machine that has NVS 285?

What are the requirements/steps to prepare machine for the same?