simple code test for fermi architecture

Hello Community.

I need some help for a test on a simple code. My graphic card is quite old and I need to know if the following code give the right results on a Fermi GPU (it should be any NVIDIA with a name GTX480 or bigger if I am not wrong).

The code is the following:

#include <cuda.h>

#include <iostream>

#include <curand_kernel.h>

using namespace std;

struct test{

float *value;

curandState B;

__device__ void rand(){value[0]=curand_uniform(&B);  value[1]=curand_uniform(&B); } 

__device__ void foo(){rand();}

void init_func();


void test::init_func(){

value = new float[2];


__global__ void setup_kernel(curandState *state)


const int id=blockIdx.x;

curand_init(1234, id, 0, &state[id]);


__global__ void fill_mat(struct test *anobj, curandState *state)


 const int Idx=blockIdx.x;

 curandState localState = state[Idx];




int main()


int num=3;

curandState *devStates;

cudaMalloc(  (void **)&devStates, num*sizeof(curandState) );

setup_kernel<<<num, 1>>>(devStates);

struct test *results = (struct test*)malloc(num*sizeof(struct test));

for(int j=0;j<num;++j)




struct test *to_device;

cudaMalloc ( (void **)&to_device, num*sizeof(struct test));

for(int i=0;i<num;i++)

 for(int k=0;k<2;k++)

 cout<<"Results i="<<i<<"  k:"<<k<<"  before the kernel"<<results[i].value[k]<<endl;

cudaMemcpy(to_device,results,num*sizeof(struct test),cudaMemcpyHostToDevice);

fill_mat<<<num,1>>>(to_device, devStates);

cudaMemcpy(results,to_device,num*sizeof(struct test),cudaMemcpyDeviceToHost);

for(int i=0;i<num;i++)

 for(int k=0;k<2;k++)

 cout<<"Results i="<<i<<"  k:"<<k<<"  after the kernel"<<results[i].value[k]<<endl;

return 0;


As far I have understand in old graphic cards this code should give wrong results. Instead in new cards it should correctly give a series of zeros in the first “for loop” and a series of random number (between zero and one) in the second “for loop”.

If somebody could test it (with a GTX 480…) it would be really nice.