I am trying to use cuda to apply connected component labelling algorithm using recursive function (dfs) and i got stuck and i need some help

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>


// the 4 directions for each 1 in the matrix 
int dx[4] = { 0, 0, -1, 1 };
int dy[4] = { 1, -1, 0, 0 };

size_t cols, rows;

int** readmatrix(size_t* rows, size_t* cols, const char* filename,int number)
{
    if (rows == NULL || cols == NULL || filename == NULL)
        return NULL;

    *rows = 0;
    *cols = 0;

    FILE* fp = fopen(filename, "r");

    if (fp == NULL)
    {
        fprintf(stderr, "could not open %s: %s\n", filename, strerror(errno));
        return NULL;
    }

    int** matrix = NULL, ** tmp;

    char line[1024];

    while (fgets(line, sizeof line, fp))
    {
        if (*cols == 0)
        {
            // determine the size of the columns based on
            // the first row
            char* scan = line;
            int dummy;
            int offset = 0;
            while (sscanf(scan, "%d%n", &dummy, &offset) == 1)
            {
                scan += offset;
                (*cols)++;
            }
        }

        tmp = realloc(matrix, (*rows + 1) * sizeof * matrix);

        if (tmp == NULL)
        {
            fclose(fp);
            return matrix; // return all you've parsed so far
        }

        matrix = tmp;

        matrix[*rows] = calloc(*cols, sizeof * matrix[*rows]);

        if (matrix[*rows] == NULL)
        {
            fclose(fp);
            if (*rows == 0) // failed in the first row, free everything
            {
                fclose(fp);
                free(matrix);
                return NULL;
            }

            return matrix; // return all you've parsed so far
        }

        int offset = 0;
        char* scan = line;
        for (size_t j = 0; j < *cols; ++j)
        {
            if (sscanf(scan, "%d%n", matrix[*rows] + j, &offset) == 1)
                scan += offset;
            else
                matrix[*rows][j] = 0; // could not read, set cell to 0
        }

        // incrementing rows
        (*rows)++;
    }

    fclose(fp);

    if (*rows == number && *rows == cols)
    {
        return matrix;
    }

    else if (*rows >= number && number <= *cols)
    {
        *rows = number;
        *cols = number;
        int** tmp_matrix = malloc(sizeof(int*) * number);
        for (int i = 0; i < number; ++i)
            tmp_matrix[i] = malloc(sizeof(int) * number);


        for (int i = 0; i < number; i++)
            for (int j = 0; j < number; j++)
                tmp_matrix[i][j] = matrix[i][j];


        return tmp_matrix;
    }

    else
    {
        return matrix = NULL;
    }
    
}
//DFS function to iterate through the matrix
void dfs(int** m, int** h, int x, int y, int c)
{
    m[x][y] = c;

    for (int i = 0; i < 4; i++)
    {
        int nx = x + dx[i], ny = y + dy[i];

        if (nx < 0 || nx >= rows || ny < 0 || ny >= cols)
        {
            continue;
        }

        if (h[nx][ny] && !m[nx][ny])
            dfs(m, h, nx, ny, c);
    }
}

int main(int argc, char* argv[])
{
 // input parameters   
    int nbrows = 0;
    int set = 0;
    char c[1000];
    char k[1000];
    int** w;
    
    if (argc < 2)
    {

        printf("Input file:\n");
        scanf("%s", &c);
        printf("\nOutput file:\n");
        scanf("%s", &k);
        printf("\nMatrix size:\n");
        scanf("%d", &nbrows);
    }
    else
    {
        strcpy(c, argv[1]);
        strcpy(k, argv[2]);
        nbrows = atoi(argv[3]);


        if (!k)
        {
            printf("output file not found");
            return 1;
        }

        if (!c)
        {
            printf("Input file not found");
            return 1;
        }


    }
    
    FILE* pf = fopen(&k, "w");
    fprintf(pf, "Program description:\nThis program reads NxN  binary matrix and find the connected components and label them using DFS (depth first search) .\n\n");

    
    int** matrix = readmatrix(&rows, &cols, &c,nbrows);

    if (rows == cols)
    { 
        if (matrix == NULL)
        {
            fprintf(stderr, "could not read matrix\n");
            return 1;
        }

        w = (int**)malloc(sizeof(int*) * rows);
        for (int i = 0; i < rows; ++i)
        {
            w[i] = (int*)malloc(cols * sizeof(int));
        }

        for (int i = 0; i < rows; i++)
            for (int j = 0; j < cols; j++)
            {
                w[i][j] = 0;
            }

        for (int i = 0; i < rows; i++)
            for (int j = 0; j < cols; j++)
            {
                if (matrix[i][j] && !w[i][j])
                    dfs(w, matrix, i, j, ++set);
            }

        fprintf(pf, "Input:\n");
        for (size_t i = 0; i < rows; ++i)
        {
            for (size_t j = 0; j < cols; ++j)
                fprintf(pf,"%3d ", matrix[i][j]);
            fprintf(pf,"\n");
        }

        fprintf(pf,"\nOutput:\n");
        for (size_t i = 0; i < rows; ++i)
        {
            for (size_t j = 0; j < cols; ++j)
                fprintf(pf,"%3d ", w[i][j]);
            fprintf(pf, "\n");
        }

        // freeing memory
        for (size_t i = 0; i < rows; ++i)
            free(matrix[i]);
        free(matrix);

        for (size_t i = 0; i < rows; ++i)
            free(w[i]);
        free(w);

    }

    else
    {
        fprintf(pf, "Sorry!\nThis program for NxN matrix");
    }
    
    fclose(pf);
    return 0;
}

please properly format code that you paste here. one simple way to do it:

  • edit your post above by clicking on the pencil edit icon
  • select all of your code
  • click the </> button at the top of the edit window
  • save your changes

This algorithm won’t be trivially convertible to a CUDA kernel. If you simply want connected component labelling in CUDA, you could use npp. Otherwise, if you want to write your own code to do it, I would suggest studying the literature. There are a number of papers that specifically describe a parallel approach and there is this