Hello,
I’m not very experienced in Cuda and now I’ve a programm which crashes but I can’t understand why.
The code doesn’t make much sense any more, because I changed some parts in order to locate the mistake.
#define ANZAHL_GAUSE 4
#define ANZAHL_FELDER 1460
#define PI 3.141592654f
#define ANZAHL_SCHRITTE 50000
#define dt 0.005f
#define t0 -125.0f
struct Feld
{
float widthSQ2[ANZAHL_FELDER];
float amplitude[ANZAHL_FELDER];
float freq[ANZAHL_FELDER];
float phase[ANZAHL_FELDER];
};
Feld Daten[ANZAHL_GAUSE];
texture<float> d_Daten;
float EFeld_Ausgabe[ANZAHL_SCHRITTE*ANZAHL_FELDER];
float d_EFeld_Ausgabe[ANZAHL_SCHRITTE*ANZAHL_FELDER];
__device__ float d_EFeld(float t, short index)
{
float E = 0.0f;
float Wert;
for (int i=0; i< ANZAHL_GAUSE; i++)
{
Wert = tex1Dfetch(d_Daten, i*sizeof(Feld)/sizeof(float)+2*ANZAHL_FELDER+index)*t+
tex1Dfetch(d_Daten, i*sizeof(Feld)/sizeof(float)+3*ANZAHL_FELDER+index);
E += __expf(-t*t/tex1Dfetch(d_Daten, i*sizeof(Feld)/sizeof(float)+index))*
tex1Dfetch(d_Daten, i*sizeof(Feld)/sizeof(float)+ANZAHL_FELDER+index)*__cosf(Wert);
}
return E*0.001f;
}
__global__ void kernel(float* Pointer)
{
short tid = threadIdx.x + blockIdx.x * blockDim.x; //Nummer des Feldes
float t = t0;
float dummy;
if(tid < ANZAHL_FELDER)
{
for(int i = 0; i < ANZAHL_SCHRITTE; i++)
{
dummy = d_EFeld(5000.0f, tid);
Pointer[0] = dummy*t;
Pointer[tid+i*ANZAHL_FELDER] = 5.0;
t+=dt;
}
}
}
---------------------------------------------
float* dev_E; //Die berechneten Ergebnisse
float* dev_Data; //Die Gause - wird an eine Textur gebunden!
cutilSafeCall(cudaMalloc( (void**)&dev_Data, sizeof(Feld)*ANZAHL_GAUSE));
cutilSafeCall(cudaMemcpy(dev_Data , Daten, sizeof(Feld)*ANZAHL_GAUSE,cudaMemcpyHostToDevice));
cutilSafeCall(cudaBindTexture( NULL, d_Daten, dev_Data, sizeof(Feld)*ANZAHL_GAUSE));
cutilSafeCall(cudaMalloc( (void**)&dev_E, sizeof(float)*ANZAHL_FELDER*ANZAHL_SCHRITTE));
kernel<<<ANZAHL_FELDER, 1>>>(dev_E);
cutilSafeCall(cudaMemcpy(d_EFeld_Ausgabe , dev_E, sizeof(float)*ANZAHL_FELDER*ANZAHL_SCHRITTE,cudaMemcpyDeviceToHost));
cutilSafeCall(cudaFree( dev_E ));
cutilSafeCall(cudaUnbindTexture( d_Daten ));
cutilSafeCall(cudaFree(dev_Data));
I’m using a GeForce GT 8800 and Visual Studio 2008. The depicted Code crashes - I get a black screen for a few seconds while the driver restarts.
I get the message “e:/cppIntegration/cppIntegration.cu(190) : cudaSafeCall() Runtime API error : the launch timed out and was terminated.” in the console.
Line 190 is:
cutilSafeCall(cudaMemcpy(d_EFeld_Ausgabe , dev_E, sizeof(float)*ANZAHL_FELDER*ANZAHL_SCHRITTE,cudaMemcpyDeviceToHost));
This doesn’t help me - why is there a timeout - the programm ran only a few seconds. I changed the code in the kernel slightly and the effect was that the application doesn’t crash anymore:
for(int i = 0; i < ANZAHL_SCHRITTE; i++)
{
dummy = d_EFeld(5000.0, tid);
Pointer[0] = dummy*t;
//Pointer[tid+i*ANZAHL_FELDER] = 5.0;
t+=dt;
}
or
for(int i = 0; i < ANZAHL_SCHRITTE; i++)
{
dummy = d_EFeld(5000.0, tid);
Pointer[0] = dummy;//*t;
Pointer[tid+i*ANZAHL_FELDER] = 5.0;
t+=dt;
}
…and so on; there are some more configurations (e.g. changing the 5000.0 to 0.0)
I don’t understand why these changes prevent the crashing - e.g. the last code does only replace Pointer[0] = dummy*t;
with Pointer[0] = dummy;//*t;
Do you know what I’m doing wrong?
Thank you very much!
Andy