CUDA Global Menory

siyam · June 24, 2011, 8:58am

Hi
this is my code

 #include "stdafx.h"
  #include <iostream>
   using namespace std;

  #define n 10
  __device__ int glMem[n];

  __global__ void initVals()
  {
for(int i=0;i<n;i++)
	glMem[i] = 0;
 }

 __global__ void test(int *out)
{
for(int i=0;i<n;i++)
	out[i] = 10;
}

int main()
{
const size_t sz = size_t(n)*sizeof(int);
initVals<<<1,1>>>();
int *devMem;
cudaMalloc((void **)&devMem, sz);
test<<<1, 1>>>(devMem);
int *hoMem=new int[n];
cudaMemcpy(hoMem, devMem,sz, cudaMemcpyDeviceToHost);

//print
for(int i=0;i<n;i++)
	cout<<hoMem[i]<<endl;
return 0;
}

IN this code I define

glMem

to size n. If I dont know the size earlier hw can I define??
for example I need to define like this.

__device__ int *glMem;

It doesnt work. Please give some code sample…

Skybuck · June 24, 2011, 2:34pm

Maybe you need to add some threadIdx.x to the kernel to make it work.

Try something like:

// for(int i=0;i<n;i++)
out[threadIdx.x] = 10;

Also launch with 10 threads:

test<<<1, 10>>>(devMem);

Oh wait I think I see problem… you need to pass the address of the array.

Normally this would be:

// test<<<1, 10>>>(&devMem); // probably wrong example of me *

But maybe devmem is already an address External Image Yeah probably so *

Anyway it seems you want to define memory dynamically.

So it would be something like:

global void KernelInitializeMemory( int *MemoryPointer, int MemoryElements )
{
MemoryPointer[threadIdx.x] = 0;

}

global void KernelUseMemory( int *MemoryPointer, int MemoryElements )
{
MemoryPointer[threadIdx.x] = 10;
}

// allocate cuda memory on host, see api’s.

int MemoryElements;
int *CudaMemory;

MemoryElements = 10;

// pseudo code, find correct api.
cudaAlloc( CudaMemory, MemoryElements * sizeof(int) );

// call kernels
KernelInitializeMemory<<<1, 10>>>( CudaMemory, MemoryElements );

KernelUseMemory<<<1, 10>>>( CudaMemory, MemoryElements );

// copy back

// clean up etc.

If you have compute 2.1 then you can also allocate memory inside the kernel itself with malloc like so:

device int *MyMemory;

global void Kernel()
{
MyMemory = malloc( sizeof(int) * N );
}

etc…

other kernel

free( MyMemory );