Hi,
I am trying create a multiplication function, I have an array of floats however they don’t seem to have been transformed.
Also are there any book that detail using fft with examples for arithmetic calculations? So far I have read the fft library guide and created the below.
extern "C" void mpmul(const Uchar* u,
const Uchar* v, Uchar* w) {
w = (Uchar*)malloc((strlen((char*)u) + strlen((char*)v))* sizeof(Uchar));
const double base = 256.0;
int j = 0, nn = 1, n = strlen((char*)u), m = strlen((char*)v), p = strlen((char*)w), n_max = MAX(m, n);
double carry, temp;
while (nn < n_max)
nn <<= 1;
nn <<= 1;
// Change upstream may cause this bit to change
//std::vector<double> a, b;
// Create array used for calculations
float *a; float *b;
a = (float*)malloc(nn*sizeof(float));
b = (float*)malloc(nn*sizeof(float));
// Fill Array with Zeros
for(; j < nn; j++ ){
a[j] = '0';
b[j] = '0';
}
for (j = 0; j < n; j++)
a[j] = (u[j] & 15);
for (j = 0; j < m; j++)
b[j] = (v[j] & 15);
for (j = 0; j < n; j++)
printf("%f", a[j]);
printf("\n");
for (j = 0; j < m; j++)
printf("%f", b[j]);
printf("\n");
int *d_nn;
float *d_a; float *d_b;
//GPU memory allocation
cudaMalloc((void**) &d_a, sizeof(float) * nn);
cudaMalloc((void**) &d_b, sizeof(float) * nn);
cudaMalloc((void**) &d_nn, sizeof(int) * nn);
//transfer to GPU memory
cudaMemcpy(d_a, a, sizeof(float) * nn, cudaMemcpyHostToDevice);
cudaMemcpy(d_b, b, sizeof(float) * nn, cudaMemcpyHostToDevice);
cudaMemcpy(d_nn, (int*)nn, sizeof(int)*nn, cudaMemcpyHostToDevice);
cufftHandle plan;
// creates 1D FFT plan
cufftPlan1d(&plan, nn, CUFFT_R2C, 1);
cufftExecR2C(plan, (cufftReal*)d_a, (cufftComplex*)d_a);
cufftExecR2C(plan, (cufftReal*)d_b, (cufftComplex*)d_b);
cudaMemcpy(b, d_b, sizeof(float)*nn, cudaMemcpyDeviceToHost);
cudaMemcpy(a, d_a, sizeof(float)*nn, cudaMemcpyDeviceToHost);
for (int i = 0; i < nn; i++) {
printf("%f\n", b[i]);
}
printf("\n");
b[0] *= a[0];
b[1] *= a[1];
for (j = 2; j < nn; j += 2) {
b[j] = (temp = b[j]) * a[j] - b[j + 1] * a[j + 1];
b[j + 1] = temp * a[j + 1] + b[j + 1] * a[j];
}
}