What do you mean get value ? Read it inside a thread or read it on host side ?
I presume you want to do smth like that:
GPUdata_struct // create structure which holds your x's and y's
{
float x;
float y;
};
__global__ void function(GPUdata_struct *GPUdata)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
const int gpuIdx = y * width + x;
GPUdata[gpuIdx].x = 5.0f * cos(gpuIdx/4.0); // set some values to GPUdata_struct array
GPUdata[gpuIdx].y = 3.0f * sin(gpuIdx);
}
int main()
{
const int size = 32;
GPUdata_structure *data_d; // data on the device
GPUdata_structure *data; // data on the host
cudaMalloc((void **)&(data_d), size * sizeof(GPUdata_structure))); // resserve memory on GPU
function<<< 1, size >>>(data_d); // fill in the data on the GPU
cudaMemcpy(data, data_d, size * sizeof(GPUdata_structure), cudaMemcpyDeviceToHost)); // copy data to host
for(int i = 0; i < size; ++i) // print data
printf("data[%d]:\tx:%f\ty:%f\n", i, data[i].x, data[i].y);
return 0;
}
Ofcourse you can do it the other way: initialize data on host, copy it to GPU, use it there.
In my case, I need to calculate in some [gpuIdx] index not all gpu thread.
So, I need to known which index that pass my condition and I will calculate on some gpu block.
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
const int gpuIdx = y * widthp2 + x;
if (GPUdata[gpuIdx] > 0.15) <----- for selecting gpu block
{
do something 1
}
if (GPUdata[x] > 1) <----- for calculate in some block that choose from do something 1
{
do something 2
}
from example, how can I get x position of gpuIdx in global function?