This code was crashed in if ch = 1.
Of course this code is not complete. There are no device running.
But crashing, I can’t understood.
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <cutil.h>
#include <math_constants.h>
const int DATASIZE = 48000;
const int FIR_CHMAX = 10;
const float FIR_FLOAT_FS = 48000.0F;
short* indata;
float* h;
short* outdata;
short* refdata;
struct FilterDefinitionF {
int nTap;
float fFH;
float fFL;
};
struct SourceSignal {
short* lpwSource;
int nSourceSize;
};
float FIR_ComputeFilterReferenceKernel(FilterDefinitionF* def, short* work, float* h, int i, int nn) {
float z = 0.0F;
for(int n=0; n < def->nTap; n++) {
short x0 = work[i + n] + work[i + nn - n];
z += h[n] * static_cast<float>(x0);
}
return z;
}
void FIR_ComputeFilterFloat(FilterDefinitionF* def, SourceSignal source, short* destination) {
float* h = (float )malloc(def->nTap * sizeof(float));
float fWH = 2.0F * CUDART_PI_F * def->fFH / FIR_FLOAT_FS;
float fWL = 2.0F * CUDART_PI_F * def->fFL / FIR_FLOAT_FS;
float d_h = NULL;
const int nThreads = 1;
FilterDefinitionF* d_def = NULL;
int datnum = def->nTap + source.nSourceSize;
int datnumtmp = def->nTap + source.nSourceSize + def->nTap;
int nn = 2 * def->nTap;
short* work = (short *)malloc(datnumtmp * sizeof(short));
short* d_work = NULL;
float* d_c = NULL;
float* c;
cudaError_t result;
for(int n = def->nTap; n > 0; n--) {
h[def->nTap - n] = sinf(fWH * static_cast<float>(n)) - sinf(fWL* static_cast<float>(n)) / (static_cast<float>(CUDART_PI_F) * static_cast<float>(n));
}
int s = def->nTap;
int s2 = def->nTap + source.nSourceSize;
for(int i=0; i < def->nTap; i++) {
work[i]=0;
}
for(int i=0; i < source.nSourceSize; i++) {
work[s + i]= source.lpwSource[i];
}
for(int i=0; i < def->nTap; i++) {
work[s2 + i]=0;
}
result = CUDA_SAFE_CALL(cudaMalloc((void**)&d_def, sizeof(FilterDefinitionF)));
if(result == cudaErrorMemoryAllocation) {
puts("Error");
return;
}
result = CUDA_SAFE_CALL(cudaMalloc((void**)&d_work, sizeof(short) * datnumtmp));
if(result == cudaErrorMemoryAllocation) {
puts("Error");
return;
}
result = CUDA_SAFE_CALL(cudaMalloc((void**)&d_h, sizeof(float) * def->nTap));
if(result == cudaErrorMemoryAllocation) {
puts("Error");
return;
}
result = CUDA_SAFE_CALL(cudaMalloc((void**)&d_c, sizeof(float) * nThreads));
if(result == cudaErrorMemoryAllocation) {
puts("Error");
return;
}
c = (float *)malloc(sizeof(float) * nThreads);
CUDA_SAFE_CALL(cudaMemcpy(d_def, def, sizeof(FilterDefinitionF), cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(d_work, work, sizeof(short) * datnumtmp, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL(cudaMemcpy(d_h, h, sizeof(float) * def->nTap, cudaMemcpyHostToDevice));
for(int i=0; i < source.nSourceSize; i++)
{
float z = 0.0F;
for(int n=0; n < def->nTap; n++) {
short x0 = work[i + n] + work[i + nn - n];
z += h[n] * static_cast<float>(x0);
}
destination[i] = static_cast<short>( z + h[def->nTap] * static_cast<float>(work[ i + def->nTap]));
}
free(work);
free(h);
CUDA_SAFE_CALL(cudaFree((void**)&d_def));
CUDA_SAFE_CALL(cudaFree((void**)&d_work));
CUDA_SAFE_CALL(cudaFree((void**)&d_h));
CUDA_SAFE_CALL(cudaFree((void**)&d_c));
free©;
}
void FIR_ComputeFilterReference(FilterDefinitionF* def, SourceSignal source, short* destination) {
float* h = (float *)malloc(def->nTap * sizeof(float));
float fWH = 2.0F * CUDART_PI_F * def->fFH / FIR_FLOAT_FS;
float fWL = 2.0F * CUDART_PI_F * def->fFL / FIR_FLOAT_FS;
int datnum = def->nTap + source.nSourceSize;
int datnumtmp = def->nTap + source.nSourceSize + def->nTap;
int nn = 2 * def->nTap;
short* work = (short *)malloc(datnumtmp * sizeof(short));
for(int n = def->nTap; n > 0; n--) {
h[def->nTap - n] = sinf(fWH * static_cast<float>(n)) - sinf(fWL* static_cast<float>(n)) / (static_cast<float>(CUDART_PI_F) * static_cast<float>(n));
}
int s = def->nTap;
int s2 = def->nTap + source.nSourceSize;
for(int i=0; i < def->nTap; i++) {
work[i]=0;
}
for(int i=0; i < source.nSourceSize; i++) {
work[s + i]= source.lpwSource[i];
}
for(int i=0; i < def->nTap; i++) {
work[s2 + i]=0;
}
for(int i=0; i < source.nSourceSize; i++)
{
float z = FIR_ComputeFilterReferenceKernel(def, work, h, i, nn);
destination[i] = static_cast<short>( z + h[def->nTap] * static_cast<float>(work[ i + def->nTap]));
}
free(work);
free(h);
}
bool FIR_ApplyFilterBankFloat(int ch, SourceSignal source, short* destination) {
const float fL[10]={ 20.0, 40.0, 80.0, 160.0, 320.0, 640.0, 1280.0, 2560.0, 5120.0, 10280.0 };
const float fH[10]={ 40.0, 80.0, 160.0, 320.0, 640.0, 1280.0, 2560.0, 5120.0, 10240.0, 20480.0 };
const int TAP[10]={ 29000, 25000, 12500, 6250, 3125, 1600, 1600, 1600, 1600, 1600 };
printf("Dev CH: %d\n", ch);
if(ch >= 0 && ch < FIR_CHMAX) {
FilterDefinitionF* def = (FilterDefinitionF*)malloc(sizeof(FilterDefinitionF));
def->nTap = TAP[ch];
def->fFL = fL[ch];
def->fFH = fH[ch];
FIR_ComputeFilterFloat(def, source, destination);
free(def);
return true;
}
else {
return false;
}
}
bool FIR_ApplyFilterBankReference(int ch, SourceSignal source, short* destination) {
const float fL[10]={ 20.0, 40.0, 80.0, 160.0, 320.0, 640.0, 1280.0, 2560.0, 5120.0, 10280.0 };
const float fH[10]={ 40.0, 80.0, 160.0, 320.0, 640.0, 1280.0, 2560.0, 5120.0, 10240.0, 20480.0 };
const int TAP[10]={ 29000, 25000, 12500, 6250, 3125, 1600, 1600, 1600, 1600, 1600 };
printf("Ref CH: %d\n", ch);
if(ch >= 0 && ch < FIR_CHMAX) {
FilterDefinitionF* def = (FilterDefinitionF*)malloc(sizeof(FilterDefinitionF));
def->nTap = TAP[ch];
def->fFL = fL[ch];
def->fFH = fH[ch];
FIR_ComputeFilterReference(def, source, destination);
free(def);
return true;
}
else {
return false;
}
}
void PrepareMemory() {
indata = (short *)malloc(DATASIZE * sizeof(short));
outdata = (short *)malloc(DATASIZE * sizeof(short));
refdata = (short *)malloc(DATASIZE * sizeof(short));
}
void ReleaseMemory() {
free(indata);
free(outdata);
free(refdata);
}
void InitSample() {
for(int i = 0; i < DATASIZE; i++) {
// indata[i] = 0.0F;
indata[i] = i;
}
indata[DATASIZE / 2] = 255.0F;
}
void DiffRef() {
double z = 0.0;
for(int i = 0; i < DATASIZE; i++) {
z += fabs((double)outdata[i] - (double)refdata[i]);
}
printf("Diff: %f\n", z);
}
void RunTest() {
bool result;
PrepareMemory();
InitSample();
SourceSignal ss;
ss.lpwSource = indata;
ss.nSourceSize = DATASIZE;
unsigned int timer = 0;
for(int n = 0; n < FIR_CHMAX; n++) {
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
result = FIR_ApplyFilterBankFloat(n, ss, outdata);
CUT_SAFE_CALL( cutStopTimer( timer));
printf( "Device Processing time: %f (ms)\n", cutGetTimerValue( timer));
CUT_SAFE_CALL( cutDeleteTimer( timer));
if(result) printf("TRUE\n");
CUT_SAFE_CALL( cutCreateTimer( &timer));
CUT_SAFE_CALL( cutStartTimer( timer));
result = FIR_ApplyFilterBankReference(n, ss, refdata);
CUT_SAFE_CALL( cutStopTimer( timer));
printf( "Reference Processing time: %f (ms)\n", cutGetTimerValue( timer));
CUT_SAFE_CALL( cutDeleteTimer( timer));
if(result) printf("TRUE\n");
DiffRef();
}
ReleaseMemory();
}
int main(int argc, char** argv) {
CUT_DEVICE_INIT();
RunTest();
CUT_EXIT(argc, argv);
}