compile as static lib - runtime issue

I’ve downloaded the little VisStudio project helper and used it to create a little test for a static lib. The lib compiles clean, and links in clean with my core codebase, but fails to run. “… cudaError at memory location …”

The code is intened to simply make a copy of a 640x480 byte array. Once I can do that I’ll move on to something actually worthy of the hardware. I have dual 8700 GTs running CUDA 2.0. and visStudio05 on XP sp2

I’ve validated the data I’m handing the lib. Both char arrays are 307200 in size and initialized properly. Its crashing on the kernel call.

Am I missing some basic concept? Maybe crossing a hardware limit?

Thanks for any help anyone can give.

– Troy

Here’s the code…

(I’d be happy to provide more details about env and main codebase if needed)

/********************************************************************

*  sample.cu

*  This is a example of the CUDA program.

*********************************************************************/

#include <stdio.h>

#include <stdlib.h>

#include <cuda_runtime.h>

#include <cutil.h>

texture< unsigned char, 2, cudaReadModeElementType > tex;

/************************************************************************/

/* Init CUDA                                                            */

/************************************************************************/

#if __DEVICE_EMULATION__

bool InitCUDA(void){return true;}

#else

bool InitCUDA(void)

{

	int count = 0;

	int i = 0;

	cudaGetDeviceCount(&count);

	if(count == 0) {

  fprintf(stderr, "There is no device.\n");

  return false;

	}

	for(i = 0; i < count; i++) {

  cudaDeviceProp prop;

  if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {

  	if(prop.major >= 1) {

    break;

  	}

  }

	}

	if(i == count) {

  fprintf(stderr, "There is no device supporting CUDA.\n");

  return false;

	}

	cudaSetDevice(i);

	printf("CUDA initialized.\n");

	return true;

}

#endif

/************************************************************************/

/* Example                                                              */

/************************************************************************/

__global__ static void HelloCUDA( unsigned char * result, unsigned int width, unsigned int height )

{

  // calculate normalized texture coordinates

  unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; //width ( col ) ?

  unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; //height ( row )?

  result[y*width + x] = tex2D( tex, (float) x, (float) y );

}

/************************************************************************/

/* HelloCUDA                                                            */

/************************************************************************/

int TestHelloCUDA(unsigned char * inImage, unsigned char * outImage, unsigned int width, unsigned int height )

{

	if(!InitCUDA()) {

  return 0;

	}

  /*------------------------------------------------------------------------*/

  /* Set up device blocking                                                 */

  /*------------------------------------------------------------------------*/

  dim3 dimBlock( 20, 15 );

  dim3 dimGrid( 32, 32 );

  

  /*------------------------------------------------------------------------*/

  /* prepare memory                                                         */

  /*------------------------------------------------------------------------*/

  unsigned char * d_data = NULL;

  unsigned int size = width * height;

  cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);

  cudaArray* cu_array;

 // set texture parameters

  tex.addressMode[0] = cudaAddressModeClamp;

  tex.addressMode[1] = cudaAddressModeClamp;

  tex.filterMode = cudaFilterModeLinear;

  tex.normalized = false;

 // allocate device memory for result

  CUDA_SAFE_CALL( cudaMalloc( (void**) &d_data, size * sizeof( *d_data ) ));

 // allocate array and copy image data

  CUDA_SAFE_CALL( cudaMallocArray( &cu_array, &channelDesc, width, height )); 

  CUDA_SAFE_CALL( cudaMemcpyToArray( cu_array, 0, 0, inImage, size, cudaMemcpyHostToDevice));

  CUDA_SAFE_CALL( cudaBindTextureToArray( tex, cu_array, channelDesc));

 /*------------------------------------------------------------------------*/

  /* make kernel call                                                       */

  /*------------------------------------------------------------------------*/

 CUT_CHECK_ERROR("prefail");

  HelloCUDA<<< dimGrid, dimBlock >>>( d_data, width, height );

  CUT_CHECK_ERROR("postfailed");

 CUDA_SAFE_CALL( cudaThreadSynchronize() );

    

  /*------------------------------------------------------------------------*/

  /* copy result from device to host                                        */

  /*------------------------------------------------------------------------*/

 CUDA_SAFE_CALL( cudaMemcpy( outImage, d_data, size, cudaMemcpyDeviceToHost) );

 /*------------------------------------------------------------------------*/

  /* cleanup memory                                                         */

  /*------------------------------------------------------------------------*/

 CUDA_SAFE_CALL(cudaFree(d_data));

  CUDA_SAFE_CALL(cudaFreeArray(cu_array));

    

	return 0;

}

In an effort to try an fix this I’ve moved all my code into a single project.
The error still exists so I’ve ruled out the static lib linking as the issue.
It seems that the unsigned char * I pass to the kernel is somehow invalid (d_data).
I get a successful return on the cudaMalloc call but when the kernel call executes I get an error on cudaLaunch.

If anyone would take the time to look at this and provide some feedback or guidance, I’d be forever grateful.
–Troy

eurika!!
this post details my issue
http://forums.nvidia.com/lofiversion/index.php?t30448.html

in a nut shell:
the texture option for linear filtering is not supported for non-float types.

On a side note as I’ve become more familiar with this forum, I’ve realized that this post is in the wrong spot. If an admin would be kind enough to move it to the “programming and development” section I’d apreciate it.

– Troy