This is more of a comment really.
It seems to be possible with the existing CUDA 3.0 and FERMI to implement dynamic memory allocation using new and delete.
[codebox]
class Allocator
{
// handles memory management of all the memory blocks.
// Allocates memory using atomics, and synchronization
device void * allocate(unsigned int size, unsigned int alignment);
device void deallocate(void *, unsigned int size );
};
// launched by 1 cuda thread, adds the memory “d” of size to the pool,
// storing a memory block id
global void addToPool(void * d, unsigned int size, int * memBlockId)
global void rmFromPool(int memBlockId)
class dynamic_alloc
{
public:
__device__ void * operator new(size_t t) { return allocator.allocate(t, XXX (alignment)); }
// delete as well...
};
class SomeObject : public dynamic_alloc
{
};
global void func()
{
SomeObject * a = new SomeObject;
}
[/codebox]