How to pass the answers after applying tex3d ?

bbadal · August 24, 2018, 7:04am

I am trying to use texture reference API by using tex3d. But I am unable to get its value after all calculations. Last, of the program, I am printing all values, but there no values are printing. Can you help me to get all values after kernel calculations?

#include<stdio.h>
#include<cuda.h>

texture<float,cudaTextureType3D,cudaReadModeElementType> texreference;

__global__ void kernel(float* dmatrix, int size)
{
	int loop;
	int xindex;
	int yindex;
	int zindex;
	
	// calculate each thread global index
	xindex=blockIdx.x*blockDim.x+threadIdx.x;
	yindex=blockIdx.y*blockDim.y+threadIdx.y;
	
	for (loop=0;loop<size;loop++)
	{
		zindex=loop;
	
		// fetch cuda array through texture reference
		dmatrix[zindex*size*size + yindex*size+xindex]=
			tex3D(texreference,xindex,yindex,zindex);
			printf("output = %f\n",dmatrix[zindex*size*size + yindex*size+xindex]);
	}
	return;
}

int main(int argc, char** argv)
{
	int size=8;

	dim3 blocknum;
	dim3 blocksize;
	
	float* hmatrix;
	float* dmatrix;
	
	cudaArray* cudaarray;
	cudaExtent volumesize;
	cudaChannelFormatDesc channel;
	
	cudaMemcpy3DParms copyparms={0};
	
	// allocate host and device memory
	hmatrix=(float*)malloc(sizeof(float)*size*size*size);
	cudaMalloc((void**)&dmatrix,sizeof(float)*size*size*size);
	
	// initialize host array before usage
	for(int loop=0; loop<size*size*size;loop++)
	{
		hmatrix[loop]= loop ;
		printf("%f\n", hmatrix[loop] );
	}

	// set cuda array volume size
	volumesize=make_cudaExtent(size,size,size);	
		
	// create channel to describe data type
	channel=cudaCreateChannelDesc<float>();	

	// allocate device memory for cuda array
	cudaMalloc3DArray(&cudaarray,&channel,volumesize);

	// set cuda array copy parameters
	copyparms.extent=volumesize;
	copyparms.dstArray=cudaarray;
	copyparms.kind=cudaMemcpyHostToDevice;
	
	copyparms.srcPtr= make_cudaPitchedPtr((void*)hmatrix,sizeof(float)*size,size,size);
	
	cudaMemcpy3D(&copyparms);

	// set texture filter mode property
	// use cudaFilterModePoint or cudaFilterModeLinear
	texreference.filterMode=cudaFilterModePoint;

	// set texture address mode property
	// use cudaAddressModeClamp or cudaAddressModeWrap
	texreference.addressMode[0]=cudaAddressModeWrap;
	texreference.addressMode[1]=cudaAddressModeWrap;
	texreference.addressMode[2]=cudaAddressModeClamp;

	// bind texture reference with cuda array
	cudaBindTextureToArray(texreference,cudaarray,channel);

	
	blocksize.x=8;
	blocksize.y=8;
	blocksize.z=8;
	
	blocknum.x=(int)ceil((float)size/8);
	blocknum.y=(int)ceil((float)size/8);
	blocknum.z=(int)ceil((float)size/8);
	
		
	// execute device kernel
	kernel<<<blocknum,blocksize>>>(dmatrix,size);
	
	// unbind texture reference to free resource
	cudaUnbindTexture(texreference);
	
	// copy result array from device to host memory
	const int bytes = sizeof(float)*size*size*size;
	cudaMemcpy(hmatrix,dmatrix,bytes,cudaMemcpyDeviceToHost);
	
	// free host and device memory

	cudaFree(dmatrix);
	cudaFreeArray(cudaarray);
	
	for(int loop=0; loop<size*size*size;loop++)
		printf("%f\n", hmatrix[loop] );
   
	free(hmatrix);
	
  	
	return 0;
}

This is my code which I am using.
I am printing values from code line numbers 113. this all is same as previous as input.

Robert_Crovella · August 24, 2018, 1:06pm

add proper CUDA error checking to your code. (google that, if need be)
run your code with cuda-memcheck

Do these things before asking others for help.