Hello All! Can you help please with the next kernel:
[codebox]
#define SHARED
device int FindAndDelete(float *okno, float element, int ArrayLenght, int width)
{
for (int p = 0; p < ArrayLenght; p++)
{
if (okno[p * width] == element)
{
for (int k = p; k < (ArrayLenght - 1); k++)
okno[k * width] = okno[(k + 1) * width];
return p;
}
}
return -1;
//return 0;
}
// Âñòà âëÿåò â óïîðÿäî÷åÃÃûé ïî âîçðà ñòà Ãèþ ìà ññèâ çà äà ÃÃîé äëèÃû ýëåìåÃò ñ çà äà ÃÃûì çÃà ÷åÃèåì
device int InsertWithSort(float *okno, float element, int ArrayLenght, int width)
{
int index1 = 0;
int index2 = ArrayLenght;
while (index1 != index2)
{
if (okno[((index1 + index2) / 2) * width] > element)
index2 = (index1 + index2) / 2;
else
index1 = (index1 + index2) / 2 + 1;
}
// Âñòà âèòü ýëåìåÃò â ïîëîæåÃèå index1, ñäâèÃóâ âñå ñëåäóþùèå ýëåìåÃòû â ìà ññèâå Ãà îäÃó ïîçèöèþ
for (int p = ArrayLenght; p > index1; p--)
okno[p * width] = okno[(p - 1) * width];
okno[index1 * width] = element;
return index1;
}
extern “C” global void Detection(float *signal, float *okno, float *medium, int NumSweeps, int NumSamples, int window_size, int start_index, int end_index)
{
#if defined SHARED
__shared__ float windowmemory[16][208 + 1]; //+1 to eliminate bank conflicts
int sweep = blockIdx.x * blockDim.x + threadIdx.x;
if (sweep >= NumSweeps)
return;
//1. çà ïîëÃèòü ÎêÃî ñ ñîðòèðîâêîé
for (int sample = 0; sample < window_size; sample++)
#if defined SHARED
InsertWithSort(windowmemory[threadIdx.x], signal[sample * NumSweeps + sweep], sample, 1);
#else
InsertWithSort(okno + sweep, signal[sample * NumSweeps + sweep], sample, NumSweeps);
#endif
//2. ïîäñ÷èòà òü ñðåäÃåå ÎêÃÃ
float med = 0;
for (int sample = start_index; sample <= end_index; sample++)
#if defined SHARED
med += windowmemory[threadIdx.x][sample];
#else
med += okno[sample * NumSweeps + sweep];
#endif
medium[(window_size / 2) * NumSweeps + sweep] = med / (end_index - start_index + 1);
//3. ïîäñ÷¸ò ñðåäÃåé ïðè äâèæåÃèè ÎêÃÃ
for (int sample = window_size; sample < NumSamples; sample++)
{
//1. óäà ëèòü ñòà ðûé ýëåìåÃò
#if defined SHARED
int deleted_index = FindAndDelete(windowmemory[threadIdx.x], signal[(sample - window_size) * NumSweeps + sweep], window_size, 1);
#else
int deleted_index = FindAndDelete(okno + sweep, signal[(sample - window_size) * NumSweeps + sweep], window_size, NumSweeps);
#endif
//3. âñòà âèòü Ãîâûé ýëåìåÃò
#if defined SHARED
int inserted_index = InsertWithSort(windowmemory[threadIdx.x], signal[sample * NumSweeps + sweep], window_size - 1, 1);
#else
int inserted_index = InsertWithSort(okno + sweep, signal[sample * NumSweeps + sweep], window_size - 1, NumSweeps);
#endif
//4. ïåðåñ÷èòà òü ñðåäÃåå
if (deleted_index < start_index)
{
if (inserted_index < start_index) { }
if (inserted_index >= start_index)
if (inserted_index <= end_index)
{
#if defined SHARED
med -= windowmemory[threadIdx.x][(start_index - 1)];
#else
med -= okno[(start_index - 1) * NumSweeps + sweep];
#endif
med += signal[sample * NumSweeps + sweep];
}
if (inserted_index > end_index)
{
#if defined SHARED
med -= windowmemory[threadIdx.x][(start_index - 1)];
med += windowmemory[threadIdx.x][end_index];
#else
med -= okno[(start_index - 1) * NumSweeps + sweep];
med += okno[end_index * NumSweeps + sweep];
#endif
}
}
if (deleted_index >= start_index)
if (deleted_index <= end_index)
{
med -= signal[(sample - window_size) * NumSweeps + sweep];
if (inserted_index <= start_index)
#if defined SHARED
med += windowmemory[threadIdx.x][start_index];
#else
med += okno[start_index * NumSweeps + sweep];
#endif
if (inserted_index > start_index)
if (inserted_index <= end_index)
med += signal[sample * NumSweeps + sweep];
if (inserted_index > end_index)
#if defined SHARED
med += windowmemory[threadIdx.x][end_index];
#else
med += okno[end_index * NumSweeps + sweep];
#endif
}
if (deleted_index > end_index)
{
if (inserted_index < start_index)
{
#if defined SHARED
med -= windowmemory[threadIdx.x][(end_index + 1)];
med += windowmemory[threadIdx.x][start_index];
#else
med -= okno[(end_index + 1) * NumSweeps + sweep];
med += okno[start_index * NumSweeps + sweep];
#endif
}
if (inserted_index >= start_index)
if (inserted_index <= end_index)
{
#if defined SHARED
med -= windowmemory[threadIdx.x][(end_index + 1)];
#else
med -= okno[(end_index + 1) * NumSweeps + sweep];
#endif
med += signal[sample * NumSweeps + sweep];
}
if (inserted_index > end_index) { }
}
medium[(sample - window_size / 2) * NumSweeps + sweep] = med / (end_index - start_index + 1);
}
//ñðåäÃÿÿ äëÿ Ãà ÷à ëÃ
for (int sample = 0; sample < window_size / 2; sample++)
medium[sample * NumSweeps + sweep] = medium[(window_size / 2) * NumSweeps + sweep];
//ñðåäÃÿÿ äëÿ êîÃöÃ
for (int sample = 0; sample < window_size / 2; sample++)
medium[(NumSamples - 1 - sample) * NumSweeps + sweep] = medium[(NumSamples - 1 - window_size / 2) * NumSweeps + sweep];
}
[/codebox]