Hello everyone,
I want to parallelize and convert this nested for loop to GPU code:
for (row = (minRow - m_nWidth); row<maxRow; row++)
{
for (col = (minCol - m_nWidth); col<maxCol; col++)
{
....
}
}
I wrote this code and it’s not working:
int idx = blockIdx.x * blockDim.x + threadIdx.x;
int idy = blockIdx.y * blockDim.y + threadIdx.y;
row = idx;
col = idy;
if (row >= (minRow - m_nWidth) && row < maxRow)
{
if (col >= (minCol - m_nWidth) && col < maxCol)
{
.....
}
}
}
// Call
void CurveDetParA::parallel_CDA(int row, int col, int minRow, int minCol, int maxRow, int maxCol, int *m_ngI, int *m_ngpI, int *m_ngppI, int counter,
int m_nWidth, int nIndexl, int nIndexu, int lPixel, int uPixel, int nG, int nGP, int nGPP, int *m_ng, int *m_ngp, int *m_ngpp, IMAGEDATA *m_pImage)
{
dim3 dimBlock(1, 1, 1);
dim3 dimGrid(m_nWidth, m_nWidth, 1);
parallelCD1 << <dimGrid, dimBlock >> > (row, col, minRow, minCol, maxRow, maxCol, m_ngI, m_ngpI, m_ngppI, counter, m_nWidth, nIndexl, nIndexu, lPixel, uPixel, nG, nGP, nGPP, m_ng, m_ngp, m_ngpp, m_pImage);
}
Any help will be appreciated. TIA!