Global Memory across kernel calls

Hey all,

Say I have the following piece of code…

INSIDE KERNEL :

[codebox]global void d_kernel(float *globalMem, int numBlocks, int i)

{

//Compute tempx, tempy

if(i==0)

globalMem[tempx+(tempy*X_SIZE)] = 2.5;

else if(i==1)

globalMem[tempx+(tempy*X_SIZE)] = tex2D(tex00,tempx,tempy);

}

[/codebox]

INSIDE MAIN :

[codebox]

texture<float, 2, cudaReadModeElementType> tex00;

float *globalMem,*globalMem1;

cudaMallocPitch((void**) &globalMem, &sizeGM, sizeof(float) * X_SIZE,Y_SIZE);

cudaMallocPitch((void**) &globalMem1, &sizeGM, sizeof(float) * X_SIZE,Y_SIZE);

d_kernel<<<dim3(numBlocks,numBlocks),dim3(BLOCK_DIM,BLOCK_DIM,1),0,0>>>(globalMem,numBlocks,0);

// I compute some value for globalMem in the above kernel. Now it is used as a texture for the next “globalMem2” - for faster accesses…

cudaBindTexture(0,tex00,globalMem5,channelDesc,sizeof(float)

X_SIZEY_SIZE);

d_kernel<<<dim3(numBlocks,numBlocks),dim3(BLOCK_DIM,BLOCK_DIM,1),0,0>>>(globalMem2,numBlocks,1);

int z =0;

CUDA_SAFE_CALL(cudaMemcpy2DAsync(&returned[X_SIZEY_SIZEz], X_SIZEsizeof(float), globalMem2, X_SIZEsizeof(float), X_SIZE*sizeof(float),Y_SIZE, cudaMemcpyDeviceToHost,0));

[/codebox]

Shouldn’t now on checking out “returned”, I have the value 2.5 stored in them? It really doesn’t work that way. So I conclude that the global memory gets flushed after every kernel execution. Is this true? Any way to do this?

Thanks,

Vandhan

You aren’t checking for errors after your kernel launch. I’m guessing that the kernel is not launching because your texture is not properly bound. 2D textures must be bound to an array with cudaBindTextureToArray(). cudaBindTexture is only for 1D data read with tex1Dfetch.

Thanks a lot…But it doesn’t seem to work for tex1Dfetch as well. Can’t I use a texture on globalMem? My new code is below :

[codebox]

texture<float, 1, cudaReadModeElementType> tex00;

float *globalMem, *globalMem2;

cudaMalloc((void**) &globalMem, sizeof(float) * X_SIZE,Y_SIZE);

cudaMalloc((void**) &globalMem1, sizeof(float) * X_SIZE,Y_SIZE);

cudaBindTexture(0,tex00,globalMem0,channelDesc,sizeof(float)

X_SIZEY_SIZE);

// I STILL GET AN ERROR HERE :(

d_kernel<<<dim3(numBlocks,numBlocks),dim3(BLOCK_DIM,BLOCK_DIM,1),0,0>>>(globalMem,globalMem2,numBlocks,0);

int z =0;

cudaMemcpy2DAsync(&returned[X_SIZEY_SIZEz], X_SIZEsizeof(float), globalMem2, X_SIZEsizeof(float), X_SIZE*sizeof(float),Y_SIZE, cudaMemcpyDeviceToHost,0);

[/codebox]

Kernel :

[codebox]

global void d_kernel(float *globalMem, float *globalMem2 int numBlocks, int i)

{

//Compute tempx, tempy

if(i==0)

globalMem[tempx+(tempy*X_SIZE)] = 2.5;

else if(i==1)

globalMem2[tempx+(tempy*X_SIZE)] = tex1Dfetch(tex00,tempx+(tempy*X_SIZE));

}

[/codebox]

I basically need to way to read some data JUST written into the global memory quickly without coming back to the CPU…

Thanks in advance,

Vandhan