Hi,
I have an float2 array of size n and another float2 array of size n*m. What I need to do is to fill the second array with the first array.
__global__ void
cudaCombineCopy_32fc(float *cuMainSrc, float *cuDst, int elementN, int count)
{
int tid = threadIdx.x;
int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
if (idx < elementN * count)
{
cuDst[idx] = cuMainSrc[tid];
__syncthreads();
}
}
void Main()
{
cudaCombineCopy_32fc<<<Count, elemenCount>>>
(InputArray, OutputArray, elementCount, Count);
}
Is this method correct?
Tigga
August 28, 2008, 10:55am
2
Hi,
I have an float2 array of size n and another float2 array of size n*m. What I need to do is to fill the second array with the first array.
__global__ void
cudaCombineCopy_32fc(float *cuMainSrc, float *cuDst, int elementN, int count)
{
  int tid = threadIdx.x;
  int idx = (blockIdx.y * gridDim.x + blockIdx.x) * blockDim.x + threadIdx.x;
  if (idx < elementN * count)
  {
    cuDst[idx] = cuMainSrc[tid];
    __syncthreads();
  }
}
void Main()
{
  cudaCombineCopy_32fc<<<Count, elemenCount>>>
    (InputArray, OutputArray, elementCount, Count);
}
Is this method correct?
[snapback]431201[/snapback]
The __syncthreads is both unessesary and unsafe. An individual __syncthreads call must be called by every thread in a block or bad things happen. Having said that, the compiler probably optimises it away.