Hello Community.
I need some help for a test on a simple code. My graphic card is quite old and I need to know if the following code give the right results on a Fermi GPU (it should be any NVIDIA with a name GTX480 or bigger if I am not wrong).
The code is the following:
#include <cuda.h>
#include <iostream>
#include <curand_kernel.h>
using namespace std;
struct test{
float *value;
curandState B;
__device__ void rand(){value[0]=curand_uniform(&B); value[1]=curand_uniform(&B); }
__device__ void foo(){rand();}
void init_func();
};
void test::init_func(){
value = new float[2];
}
__global__ void setup_kernel(curandState *state)
{
const int id=blockIdx.x;
curand_init(1234, id, 0, &state[id]);
}
__global__ void fill_mat(struct test *anobj, curandState *state)
{
const int Idx=blockIdx.x;
curandState localState = state[Idx];
anobj[Idx].B=localState;
anobj[Idx].foo();
}
int main()
{
int num=3;
curandState *devStates;
cudaMalloc( (void **)&devStates, num*sizeof(curandState) );
setup_kernel<<<num, 1>>>(devStates);
struct test *results = (struct test*)malloc(num*sizeof(struct test));
for(int j=0;j<num;++j)
{
results[j].init_func();
}
struct test *to_device;
cudaMalloc ( (void **)&to_device, num*sizeof(struct test));
for(int i=0;i<num;i++)
for(int k=0;k<2;k++)
cout<<"Results i="<<i<<" k:"<<k<<" before the kernel"<<results[i].value[k]<<endl;
cudaMemcpy(to_device,results,num*sizeof(struct test),cudaMemcpyHostToDevice);
fill_mat<<<num,1>>>(to_device, devStates);
cudaMemcpy(results,to_device,num*sizeof(struct test),cudaMemcpyDeviceToHost);
for(int i=0;i<num;i++)
for(int k=0;k<2;k++)
cout<<"Results i="<<i<<" k:"<<k<<" after the kernel"<<results[i].value[k]<<endl;
return 0;
}
As far I have understand in old graphic cards this code should give wrong results. Instead in new cards it should correctly give a series of zeros in the first “for loop” and a series of random number (between zero and one) in the second “for loop”.
If somebody could test it (with a GTX 480…) it would be really nice.
Thanks.