Why do these two kernels/calls not provide the same answer?
func call
ComplexMul (d_input, d_filter, n_elems);
.
.
.
// Complex multiplication
static __global__ void ComplexMul(cuComplex *a, cuComplex *b, int size)
{
const int numThreads = blockDim.x * gridDim.x;
const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = threadID; i < size; i += numThreads)
{
a[i].x = a[i].x*b[i].x-a[i].y*b[i].y;
a[i].y = a[i].x*b[i].y+a[i].y*b[i].x;
}
}
ComplexMul (d_input, d_filter, d_input, n_elems);
.
.
.
// Complex multiplication
static __global__ void ComplexMul(cuComplex *a, cuComplex *b, cuComplex *c, int size)
{
const int numThreads = blockDim.x * gridDim.x;
const int threadID = blockIdx.x * blockDim.x + threadIdx.x;
for (int i = threadID; i < size; i += numThreads)
{
c[i].x = a[i].x*b[i].x-a[i].y*b[i].y;
c[i].y = a[i].x*b[i].y+a[i].y*b[i].x;
}
}