main funtion
float test_volumn;
test_volumn=new float[256256256];
short test_weight=new short[256256256];
cudaMalloc(&weight,256256256sizeof(short));
cudaMalloc(&volumn,256256256sizeof(float));
dim3 dimblock(256);
dim3 dimgrid(256,256);
save_output_image_init_dev<<<dimgrid,dimblock>>>(weight,volumn);
cudaMemcpy(test_volumn,volumn,256256256sizeof(float),cudaMemcpyDeviceToHost);
cudaMemcpy(test_weight,weight,256256256sizeof(short),cudaMemcpyDeviceToHost);
cout<<test_weight[1]<<endl;
cout<<test_volumn[1];
kernel funtion
global void reconstruction::save_output_image_init_dev(short weight,float volumn)
{
int _high=blockIdx.x;
int _wide=blockIdx.y;
int _long=threadIdx.x;
int index=_high65536+_wide256+_long;
weight[index]=0;
volumn[index]=-1;
}
when i Programme it on GT9600 (compute capability 1.1),the output is 0 -1 ,it is right
but when i use another computer which equip the gt630 (compute capability 2.1) the output has changed not 0 -1.and the output seem to be random,not the same.It’s wrong.
why??