Thank you txbob,
I have four files:
Header, cpp file, cu file and makefile.
My make file is:
NVCC=nvcc
CUDAFLAGS= -arch=sm_30
OPT= -g -G
RM=/bin/rm -f
all: OFDM
main: OFDM.o Generate.o
${NVCC} ${OPT} -o main OFDM.o Generate.o
Generate.o: Header.cuh Generate.cpp
${NVCC} ${OPT} ${CUDAFLAGS} -std=c++11 -c Generate.cpp
OFDM.o: Header.cuh OFDM.cu
$(NVCC) ${OPT} $(CUDAFLAGS) -std=c++11 -c OFDM.cu -lcufft
OFDM: OFDM.o Generate.o
${NVCC} ${CUDAFLAGS} -o OFDM OFDM.o Generate.o -lcufft
clean:
${RM} *.o OFDM
Header:
#define BATCH 1
#define FFT_size 512
#define numberOfUEs 10
#define numberOfBlocks 64
#define numberOfThreads 64
double getGeneratedRandom();
cuDoubleComplex getModulatedSignal();
cuDoubleComplex *getiFFT_main(cuDoubleComplex *inputSignal);
Generate.cpp:
#include "device_launch_parameters.h"
#include <cuda_runtime.h>
#include "device_functions.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cufft.h>
#include <time.h>
#include <iostream>
#include <cuComplex.h>
#include "Header.cuh"
double getGeneratedRandom()
{
int randomNumberInt = rand() % ((100 - (-100)) + 1) - 100;
double randomNumber = (double)randomNumberInt / 100;
return randomNumber;
}
cuDoubleComplex getModulatedSignal()
{
double modulatedSignalReal = getGeneratedRandom();
double modulatedSignalComplex = getGeneratedRandom();
if (modulatedSignalReal > 0.5)
modulatedSignalReal = 1;
else
modulatedSignalReal = -1;
if (modulatedSignalComplex > 0.5)
modulatedSignalComplex = 1;
else
modulatedSignalComplex = -1;
static cuDoubleComplex signal;
signal = make_cuDoubleComplex(modulatedSignalReal, modulatedSignalComplex);
return signal;
}
cuDoubleComplex * getiFFT_main(cuDoubleComplex *inputSignal)
{
int mem_size = sizeof(cuDoubleComplex)*FFT_size;
cufftHandle plan;
cufftComplex *d_signal_in, *d_signal_out;
cudaMalloc(&d_signal_in, mem_size);
cudaMalloc(&d_signal_out, mem_size);
cudaMemcpy(d_signal_in, inputSignal, mem_size, cudaMemcpyHostToDevice);
cufftPlan1d(&plan, FFT_size, CUFFT_C2C, BATCH);
cufftExecC2C(plan, d_signal_in, d_signal_out, CUFFT_INVERSE);
cudaDeviceSynchronize();
cudaMemcpy(inputSignal, d_signal_out, FFT_size * sizeof(cuDoubleComplex), cudaMemcpyDeviceToHost);
cufftDestroy(plan);
cudaFree(d_signal_in);
cudaFree(d_signal_out);
return inputSignal;
}
OFDM.cu:
#include "device_launch_parameters.h"
#include "device_functions.h"
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <cuda_runtime.h>
#include <cufft.h>
#include <time.h>
#include "Header.cuh"
/*
for n = 1:N
for carrier_index = 1:FFT_size
Tx_carrier_signal(carrier_index) = QPSK_mod(rand(1),rand(1));
end
Time_signal(n,:) = sqrt(coeffs(n))*ifft(Tx_carrier_signal, FFT_size).* sqrt(FFT_size);
end
*/
int main()
{
srand((unsigned)time(0));
double coefficientsArray[numberOfUEs] = { 0.01, 0.05, 0.08, 0.09, 0.1, 0.11, 0.12, 0.13, 0.15, 0.16 };
cuDoubleComplex signalArray[FFT_size];
cuDoubleComplex timeSignalArray[numberOfUEs][FFT_size];
for (int a = 0; a < numberOfUEs; a++)
{
for (int carrier_index = 0; carrier_index < FFT_size; carrier_index++)
{
signalArray[carrier_index] = getModulatedSignal();
//printf("generated signal: %d %.2f, %.2f\n", carrier_index, cuCreal(signalArray[carrier_index]), cuCimag(signalArray[carrier_index]));
}
cuDoubleComplex * iFFTsignalArray = getiFFT_main(signalArray);
for (int b = 0; b < FFT_size; b++)
{
printf("generated ifft signal: [%d] [%d] %.2f, %.2f\n", a, b, cuCreal(iFFTsignalArray[b]), cuCimag(iFFTsignalArray[b]));
}
for (int b = 0; b < FFT_size; b++)
{
timeSignalArray[a][b] = cuCmul(iFFTsignalArray[b], make_cuDoubleComplex(sqrt((double)coefficientsArray[a])*sqrt((double)FFT_size), 0));
//printf("generated time signal array: [%d][%d] %.2f, %.2f\n", a, b , cuCreal(timeSignalArray[a][b]), cuCimag(timeSignalArray[a][b]));
}
}
//printf("generated ifft signal: %d %.2f, %.2f\n", a, cuCreal(iFFTsignalArray[a]), cuCimag(iFFTsignalArray[a]));
/*
for (int b = 0; b < FFT_size; b++)
{
timeSignalArray[a][b] = cuCmul(iFFTsignalArray[b], make_cuDoubleComplex(sqrt((double)coefficientsArray[a])*sqrt((double)FFT_size), 0));
//printf("generated time signal array: [%d][%d] %.2f, %.2f\n", a, b , cuCreal(timeSignalArray[a][b]), cuCimag(timeSignalArray[a][b]));
}
*/
return 0;
}
output is too large two-dimensional (10x512), so I expect all values to be filled, but after 255 all the values are 0 in each row:
generated ifft signal: [0] [0] -16285016252571713536.00, -14810003504.49
generated ifft signal: [0] [1] 1995785480.20, 1096887.76
generated ifft signal: [0] [2] 1305770.25, -7722831.03
generated ifft signal: [0] [3] -56837415062.17, 1400109.75
generated ifft signal: [0] [4] -0.00, -375201729.35
generated ifft signal: [0] [5] -32483348621.50, -27009093713.53
generated ifft signal: [0] [6] -482210497.30, -7249689.01
generated ifft signal: [0] [7] 0.23, 9407738.01
generated ifft signal: [0] [8] -29685420161.93, 166338848.64
generated ifft signal: [0] [9] -26213.95, -88940.27
generated ifft signal: [0] [10] -142348.84, 909034109.31
generated ifft signal: [0] [11] 122958160.49, 1111066.26
and after the 255th
generated ifft signal: [0] [253] -56837398678.17, -7722833.03
generated ifft signal: [0] [254] 1305767.75, 1096887.26
generated ifft signal: [0] [255] 1995784968.20, -14809999408.49
generated ifft signal: [0] [256] 0.00, 0.00
generated ifft signal: [0] [257] 0.00, 0.00
generated ifft signal: [0] [258] 0.00, 0.00
generated ifft signal: [0] [259] 0.00, 0.00
generated ifft signal: [0] [260] 0.00, 0.00
generated ifft signal: [0] [261] 0.00, 0.00
generated ifft signal: [0] [262] 0.00, 0.00
generated ifft signal: [0] [263] 0.00, 0.00
generated ifft signal: [0] [264] 0.00, 0.00