Qualifying Pointers referring to const/shared data

Is there any way in CUDA to explicitly declare whether a pointer points to constant or shared memory space as opposed to the global device space?

Apparently nvcc tries to figure it out, but cannot always come to a conclusion. In CUDA 1.0, it silently assumed global device space. In CUDA 1.1beta, it warns about the situation and then assume global device space. This is clearly a step in the right direction. Unfortunately, even if you are not using any constant or shared data, these ambiguities arise, and I still gets lots of warnings, which now obfuscate any real problems.

So there are 2 problem:

(1) I can’t use constant or shared space when I want/need to.

(2) I can’t always make the compiler shut up even when I am using global space

If there’s a way to specify to qualify the type of the referenced data, that would solve both problems.

Below is small example. You can see the various combinations of problem by changing the two “#if 0” statements to “#if 1”.

Thanks for any insight.

#include <stdio.h>

#include <cuda.h>

#define CUDA_ERROR                              \

{                                               \

    cudaError_t error;                          \

    error = cudaGetLastError();                 \

    fprintf(stderr, "Cuda Error (%s:%d): %s\n", \

            __FILE__, __LINE__,                 \

            cudaGetErrorString(error));         \

}

typedef struct {

	unsigned short a;

} type1;

typedef struct {

	type1 **b;

	type1  *c[1];

} type2;

#if 0

__device__ type1 table[2] = {

#else

__device__ __constant__ type1 table[2] = {

#endif

	{0x5601},

	{0x5601}

};

__device__ unsigned short test;

__device__ type2 glob;

__global__

void sample()

{

        type2 *foo;

        foo = &glob;

#if 1

        for (int i=0; i<1; i++)

            foo->c[i] = &table[0];

#else

        foo->c[0] = &table[0];

#endif

        foo->b = &(foo->c[0]);

        test = (*foo->b)->a;

       return;

}

int main()

{

    int deviceCount;

    cudaGetDeviceCount(&deviceCount);

    if (deviceCount == 0) {

        fprintf(stderr, "There is no device.\n");

        exit(1);

    }

    int dev;

    for (dev = 0; dev < deviceCount; ++dev) {

        cudaDeviceProp deviceProp;

        cudaGetDeviceProperties(&deviceProp, dev);

        if (deviceProp.major >= 1)

            break;

    }

    if (dev == deviceCount) {

        fprintf(stderr, "There is no device supporting CUDA.\n");

        exit(1);

    }

    else

    {

        cudaSetDevice(dev);

        fprintf(stderr, "CUDA device is set.\n");

    }

    

    CUDA_ERROR;

   // lauch grid

    dim3 dimBlock(1, 1, 1);

    dim3 dimGrid(1,1,1);

    sample<<<dimGrid, dimBlock>>>();

    CUDA_ERROR;

    sample<<<dimGrid, dimBlock>>>();

    CUDA_ERROR;

    sample<<<dimGrid, dimBlock>>>();

    CUDA_ERROR;

    sample<<<dimGrid, dimBlock>>>();

    CUDA_ERROR;

    sample<<<dimGrid, dimBlock>>>();

    CUDA_ERROR;

   return 0;

}

Hey Jon,

I don’t think it’s legal to define the contents of a constant array directly in the file that way. If I change the code to the following it runs without reporting launch errors. Key point: use cudaMemcpyToSymbol() to define constant arrays.

It still has the warning though, so if you determine it’s not working correctly, let me know.

Mark

#include <stdio.h>

#include <cuda.h>

#define CUDA_ERROR                              \

{                                               \

   cudaError_t error;                          \

   error = cudaGetLastError();                 \

   fprintf(stderr, "Cuda Error (%s:%d): %s\n", \

           __FILE__, __LINE__,                 \

           cudaGetErrorString(error));         \

}

typedef struct {

unsigned short a;

} type1;

typedef struct {

type1 **b;

type1  *c[1];

} type2;

#if 0

__device__ type1 table[2] = {

#else

__device__ __constant__ type1 table[2];

#endif

__device__ unsigned short test;

__device__ type2 glob;

__global__

void sample()

{

       type2 *foo;

       foo = &glob;

#if 1

       for (int i=0; i<1; i++)

           foo->c[i] = &table[0];

#else

       foo->c[0] = &table[0];

#endif

       foo->b = &(foo->c[0]);

       test = (*foo->b)->a;

      return;

}

int main()

{

   int deviceCount;

   cudaGetDeviceCount(&deviceCount);

   if (deviceCount == 0) {

       fprintf(stderr, "There is no device.\n");

       exit(1);

   }

   int dev;

   for (dev = 0; dev < deviceCount; ++dev) {

       cudaDeviceProp deviceProp;

       cudaGetDeviceProperties(&deviceProp, dev);

       if (deviceProp.major >= 1)

           break;

   }

   if (dev == deviceCount) {

       fprintf(stderr, "There is no device supporting CUDA.\n");

       exit(1);

   }

   else

   {

       cudaSetDevice(dev);

       fprintf(stderr, "CUDA device is set.\n");

   }

  type1 tableData[2] = {{0x5601}, {0x5601}};

   cudaMemcpyToSymbol(table, tableData, 2);

   

   CUDA_ERROR;

  // lauch grid

   dim3 dimBlock(1, 1, 1);

   dim3 dimGrid(1,1,1);

   sample<<<dimGrid, dimBlock>>>();

   CUDA_ERROR;

   sample<<<dimGrid, dimBlock>>>();

   CUDA_ERROR;

   sample<<<dimGrid, dimBlock>>>();

   CUDA_ERROR;

   sample<<<dimGrid, dimBlock>>>();

   CUDA_ERROR;

   sample<<<dimGrid, dimBlock>>>();

   CUDA_ERROR;

  return 0;

}

Internal discussion leads me to believe that this might be legal after all, so I’ve filed a bug.

Thanks,
Mark

Jon, one of our compiler engineers pointed out a flaw in your code:

You declare table like this:

__device__ __constant__ type1 table[2];

This means that it will be stored in constant memory. Constant memory is a different memory address space than global memory. So it is illegal to assign the address of table to a global pointer variable, as you do like so:

__device__ type2 glob;

...

...

      type2 *foo;

      foo = &glob;

...

...

      foo->c[i] = &table[0];

This is likely causing the error you get. It would be nice to get a better error than an unspecified launch error (in fact, it would be nice to get a compile-time error!), so I’m glad I filed the bug. But this should help you work around similar errors in your real code.

Thanks,

Mark

Very interesting. I’m going to have to experiment with whether pointers to constant tables cannot be used at all, then. Also, I’m curious to find out if I can have pointers to shared memory embedded in data structures in my local registers.

So if I take away the constant label, why do I get a warning from the compiler (1.1) that it cannot determine which address space my pointer is in?

OK, update from our compiler engineers. First, I have to retract my statement that this is illegal. It is legal to assign a constant address to a global object. The problem is that the compiler is getting confused by all the indirection (hence the warning) and as a result generating a global load instruction rather than a constant load.

So, there is a compiler bug preventing the compiler to determine the type of the pointer due to the multi-field struct. An engineer is investigating a fix. I realize the following is ugly, but he offered a temporary workaround for the problem:

typedef struct {

    type1 **b;

} type3;

typedef struct {

    type1 *c[1];

} type4;

...

__device__ type3 glob1;

__device__ type4 glob2;

...

type3 *foo1;

type4 *foo2;

foo1 = &glob1;

foo2 = &glob2;

for (int i=0; i<1; i++)

    foo2->c[i] = &table[0];

foo1->b = &(foo2->c[0]);

test = (*foo1->b)->a;

Mark

That’s very useful information. So when there is any confusion by the compiler (about the memory space a pointer points to), we can avoid the situation by removing the pointer from multi-field structures. I think this may actually work for me.

I can envision a lot of data structures where this could be problematic.Hopefully the fix will improve the situation.

I do wish there were simply a way to hint to the compiler what type of memory the pointer points to, which should be known at compile time for any feasible code. I guess from the compiler-writer’s perspective, this shouldn’t really be necessary due to automatic static analysis. I guess it depends if it is simply a “bug”, or if it is a “hard problem” where there are levels of indirection.

Thanks again for the useful information!!