This implementation does not work. I get an error during the compilation.
Here is my errorinformation: operator “=” matches these operands
Here is my complete code:
// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include <cufft.h>
#include <cutil_inline.h>
// Complex data type
typedef float2 Complex;
static global void absolute_value(Complex*);
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest(int argc, char** argv);
// The filter size is assumed to be a number smaller than the signal size
#define SIGNAL_SIZE 512*256
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int main(int argc, char** argv)
{
runTest(argc, argv);
cutilExit(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void runTest(int argc, char** argv)
{
if( cutCheckCmdLineFlag(argc, (const char**)argv, “device”) )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( cutGetMaxGflopsDeviceId() );
// Allocate host memory for the signal
Complex* h_signal_a = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
// Initalize the memory for the signal
for(unsigned int i = 0; i < SIGNAL_SIZE; ++i)
{
h_signal_a[i].x = i;
h_signal_a[i].y = 0;
printf(“Wert: %f \n”, h_signal_a[i].x);
}
int mem_size = sizeof(Complex) * SIGNAL_SIZE;
// Allocate device memory for signal
Complex* d_signal_a;
cutilSafeCall(cudaMalloc((void**)&d_signal_a, mem_size));
// Copy host memory to device
cutilSafeCall(cudaMemcpy(d_signal_a, h_signal_a, mem_size, cudaMemcpyHostToDevice));
absolute_value<<<SIGNAL_SIZE/512, 512>>>(d_signal_a);
// Check if kernel execution generated and error
cutilCheckMsg("Kernel execution failed [ ComplexPointwiseMulAndScale ]");
// Allocate host memory for the result
Complex* h_result = (Complex*)malloc(sizeof(Complex) * SIGNAL_SIZE);
// Copy device memory to host
cutilSafeCall(cudaMemcpy(h_signal_a, d_signal_a, mem_size, cudaMemcpyDeviceToHost));
// Das Ergebnis anzeigen
for (unsigned int i = 0; i < SIGNAL_SIZE; ++i)
{
printf(" Ergebnis: %f \n", h_signal_a[i].x);
}
// cleanup memory
free(h_signal_a);
cutilSafeCall(cudaFree(d_signal_a));
cudaThreadExit();
}
// Betrag berechnen
global void absolute_value(Complex *indata)
{
int index = threadIdx.x + blockIdx.x * blockDim.x;
float2 val = indata[index];
indata[index] <b>=</b> ((val.x * val.x + val.y * val.y)/4096.0f); // <b>here is the error</b> the compiler dont know the "=" operand for float2
}