Simple CUDA programming question?

Please take mercy upon a CUDA newbie and I would very much appreciate some assistance. Simply put, I’m writing a heap sort code utilizing CUDA (really easy, I know, I’m just taking baby step here.), and I’ve hit a bit of a snag

#include <stdio.h>

#include <stdlib.h>

#include <time.h>

#include <string.h>

__global__ void generate (int low, int high, int array1[])

{

  int output, x;

  FILE *outfile = fopen ("numbers.txt", "w");

srand ((unsigned) time(NULL));

for (x=low; x < high; x++)

    {

      output = (rand() % 10000000);

      fprintf(outfile, "%d\n", output);

      array1 [x] = output;

    }

  fclose (outfile);

  makeHeap (array1, 1000000);

}

__device__ void makeHeap (int array1 [], int count)

{

  int i, val, s, f;

  for (i = 1; i < count; i++)

    {

      val = array1[i];

      s = i;

      f = (s - 1)/2;

      while (s > 0 && array1[f] < val)

	{

	  array1[s] = array1[f];

	  s = f;

	  f = (s-1)/2;

	}

      array1[s] = val;

    }

  heapSort(array1, 1000000);

}

__device__ void heapSort (int array1[], int count)

{

  int i, s, f, ivalue, x;

  FILE *outfile2 = fopen("heapNum1.txt", "w");

  for (i = count-1; i > 0; i--)

    {

      ivalue = array1[i];

      array1[i] = array1 [0];

      f = 0;

      if (i == 0)

	s = -1;

      else

	s = 1;

      if (i > 2 && array1[2] > array1[1])

	s = 2;

      while (s >=0 && ivalue < array1[s])

	{

	  array1[f] = array1[s];

	  f = s;

	  s = 2 * f + 1;

	  if (s+1 <= i - 1 && array1[s] < array1[s+1])

	    s++;

	  if (s > i-1)

	    s = -1;

	}

      array1[f] = ivalue;

    }

for (x=0; x < 1000000; x++)

    {

      fprintf(outfile2, "%d\n", array1[x]);

    }

}

int main ()

{

  int holder1 [1000000];

  double timeCount;

  clock_t startClock, stopClock;

startClock = clock();

generate <<<1,1>>> (0, 1000000, holder1);

stopClock = clock();

  timeCount = (stopClock - startClock) / 1000;

  printf("Elapsed time is %.2f milliseconds\n", timeCount);

return (0);

}

When I try to compile, I get the following errors:

/home/user/sorts.cu(20): error: identifier “makeHeap” is undefined

/home/user/sorts.cu(39): error: identifier “heapSort” is undefined

I’m not really sure what I’ve done wrong. I’m in the process of reading a CUDA book to try to learn more, but I’d like to get this code working. And no, it is not for any sort of a class, just a bored man with a lot of free time. Any help would be appreciated. Thanks!

The immediate cause of the compiler error is that you are calling the functions makeHeap and heapSort in generate without declaring them first. The easiest way to fix this is to more the definitions of these functions above the definition of generate().

However, once you do that, this code will still fail to compile because you are calling fopen() from a global function. The CUDA device does not have access to the filesystem (or any other OS services), so what you have written cannot work. Furthermore, you are passing a host pointer to your global function, which is also something you generally do not want to do.

You should think of CUDA as a way to run parallel calculations on data stored in device (i.e. GPU) memory. The job of the code running on the CPU is to do any necessary I/O, load the data into device memory, and then launch a CUDA kernel to operate on that device memory. When you are done using whatever kernels you want to operate on the device memory, you can copy the result back to host (i.e. CPU) memory for output to screen/disk/etc.

Heap sort doesn’t really work well as a parallel calculation. I would suggest you try something more like an element-wise multiplication between two input vectors as a starter program. That is trivially parallel and easy to map to CUDA.