(driver api)Can not get the right values form 3d texture... texture3d, driver api

Hi, I’m learning CUDA from GPU Computing SDK 4.0 and trying to write simpleTexture3D with driver api.(Win7-64, GTX 570, 8.17.12.7032)

Right now I can create a 3d CUarray and bind a CUtexref on it, all with the CUDA_SUCCESS result. But the values from tex3D function in cu file are always zero.

Here are the codes, please help, thanks!

Host

#include <cuda.h>

#include <vector>

typedef    unsigned short    ui16;

typedef             float     f32;

void build_array3d_ui16( CUarray& array3d, size_t width, size_t height, size_t depth, const void* data )

{

	CUresult result = CUDA_ERROR_UNKNOWN;

	CUDA_ARRAY3D_DESCRIPTOR descriptor3d;

	descriptor3d.Width       = width;

	descriptor3d.Height      = height;

	descriptor3d.Depth       = depth;

	descriptor3d.Format      = CU_AD_FORMAT_UNSIGNED_INT16;

	descriptor3d.NumChannels = 1;

	descriptor3d.Flags       = CUDA_ARRAY3D_LAYERED;

	result = cuArray3DCreate( &array3d, &descriptor3d );

	CUDA_MEMCPY3D memcpy3d;

	memset( &memcpy3d, 0, sizeof( memcpy3d ) );

	memcpy3d.WidthInBytes = sizeof( ui16 ) * width;

	memcpy3d.Height       =                  height;

	memcpy3d.Depth        =                  depth;

	memcpy3d.srcMemoryType = CU_MEMORYTYPE_HOST;

	memcpy3d.srcHost = data;

	memcpy3d.srcPitch  = sizeof( ui16 ) * width;

	memcpy3d.srcHeight =                  height;

	memcpy3d.dstMemoryType = CU_MEMORYTYPE_ARRAY;

	memcpy3d.dstArray = array3d;

	result = cuMemcpy3D( &memcpy3d );

}

void create_device_buffer_f32( CUdeviceptr& deviceptr, size_t size )

{

	CUresult result = CUDA_ERROR_UNKNOWN;

	result = cuMemAlloc( &deviceptr, size * sizeof( f32 ) );

}

void copy_device_buffer_data_f32( std::vector< f32 >& data, CUdeviceptr deviceptr, size_t size )

{

	CUresult result = CUDA_ERROR_UNKNOWN;

	data.resize( size );

	result = cuMemcpyDtoH( &data.front(), deviceptr, size * sizeof( f32 ) );

}

int cuda_api_driver()

{

	CUresult result = CUDA_ERROR_UNKNOWN;

	const unsigned int flag = 0;

	result = cuInit( flag );

	CUdevice device_id = 0;

	CUcontext device_context = 0;

	result = cuCtxCreate( &device_context, 0, device_id );

	size_t width  = 2;

	size_t height = 2;

	size_t depth  = 2;

	size_t full_size = width * height * depth;

	std::vector< ui16 > data_host( full_size );

	for( size_t i = 0; i < full_size; ++i )

	{

		data_host[i] = 1 << i;

	}

	CUarray array3d = 0;

	build_array3d_ui16( array3d, width, height, depth, &data_host.front() );

	CUmodule module = 0;

	result = cuModuleLoad( &module, "test.cubin" );

	CUfunction function = 0;

	result = cuModuleGetFunction( &function, module, "test_t3d" );

	CUtexref texref = 0;

	result = cuModuleGetTexRef( &texref, module, "t3d" );

	result = cuTexRefSetArray( texref, array3d, CU_TRSA_OVERRIDE_FORMAT );

	result = cuTexRefSetAddressMode( texref, 0, CU_TR_ADDRESS_MODE_WRAP );

	result = cuTexRefSetAddressMode( texref, 1, CU_TR_ADDRESS_MODE_WRAP );

	result = cuTexRefSetAddressMode( texref, 2, CU_TR_ADDRESS_MODE_WRAP );

	result = cuTexRefSetFilterMode( texref, CU_TR_FILTER_MODE_LINEAR );

	result = cuTexRefSetFlags( texref, CU_TRSF_NORMALIZED_COORDINATES );

	result = cuTexRefSetFormat( texref, CU_AD_FORMAT_UNSIGNED_INT16, 1 );

	CUdeviceptr device_buffer = 0;

	create_device_buffer_f32( device_buffer, full_size );

	int offset = 0;

	char launch_params[256] = { 0 };

	*( ( CUdeviceptr* )&launch_params[offset] ) = device_buffer;

	offset += sizeof( device_buffer );

	void* kernel_launch_config[5] =

	{

		CU_LAUNCH_PARAM_BUFFER_POINTER,  launch_params,

		CU_LAUNCH_PARAM_BUFFER_SIZE,    &offset,

		CU_LAUNCH_PARAM_END

	};

	result = cuLaunchKernel

		(

		function,

		1, 1, 1, // just for test

		1 ,1, 1, // just for test

		0, 0, 0, ( void** )&kernel_launch_config

		);

	result = cuCtxSynchronize();

	std::vector< f32 > data;

	copy_device_buffer_data_f32( data, device_buffer, 8 ); // expecting 1, 8, 16, 128, 0, 0, 0, 0 but all 0

	result = cuMemFree( device_buffer );

	result = cuArrayDestroy( array3d );

	result = cuCtxDestroy( device_context );

	return 0;

}

Device test.cu

texture< unsigned short, 3, cudaReadModeNormalizedFloat > t3d;

extern "C" __global__ void test_t3d( float* output_data )

{

	float f = ( unsigned short )( -1 );

	output_data[0] = tex3D( t3d, 0.0f, 0.0f, 0.0f ) * f;

	output_data[1] = tex3D( t3d, 1.0f, 0.0f, 0.0f ) * f;

	output_data[2] = tex3D( t3d, 0.0f, 1.0f, 0.0f ) * f;

	output_data[3] = tex3D( t3d, 1.0f, 1.0f, 0.0f ) * f;

}

<img src=‘The Official NVIDIA Forums | NVIDIA<#EMO_DIR#>/crying.gif’ class=‘bbc_emoticon’ alt=‘:’(’ /> I hate to ask this but…is it even possible to do that? I can not find any example or code about 3d texture with dirver api.

I have the same problem. Did you ever solve this problem?