I’ve downloaded the little VisStudio project helper and used it to create a little test for a static lib. The lib compiles clean, and links in clean with my core codebase, but fails to run. “… cudaError at memory location …”
The code is intened to simply make a copy of a 640x480 byte array. Once I can do that I’ll move on to something actually worthy of the hardware. I have dual 8700 GTs running CUDA 2.0. and visStudio05 on XP sp2
I’ve validated the data I’m handing the lib. Both char arrays are 307200 in size and initialized properly. Its crashing on the kernel call.
Am I missing some basic concept? Maybe crossing a hardware limit?
Thanks for any help anyone can give.
– Troy
Here’s the code…
(I’d be happy to provide more details about env and main codebase if needed)
/********************************************************************
* sample.cu
* This is a example of the CUDA program.
*********************************************************************/
#include <stdio.h>
#include <stdlib.h>
#include <cuda_runtime.h>
#include <cutil.h>
texture< unsigned char, 2, cudaReadModeElementType > tex;
/************************************************************************/
/* Init CUDA */
/************************************************************************/
#if __DEVICE_EMULATION__
bool InitCUDA(void){return true;}
#else
bool InitCUDA(void)
{
int count = 0;
int i = 0;
cudaGetDeviceCount(&count);
if(count == 0) {
fprintf(stderr, "There is no device.\n");
return false;
}
for(i = 0; i < count; i++) {
cudaDeviceProp prop;
if(cudaGetDeviceProperties(&prop, i) == cudaSuccess) {
if(prop.major >= 1) {
break;
}
}
}
if(i == count) {
fprintf(stderr, "There is no device supporting CUDA.\n");
return false;
}
cudaSetDevice(i);
printf("CUDA initialized.\n");
return true;
}
#endif
/************************************************************************/
/* Example */
/************************************************************************/
__global__ static void HelloCUDA( unsigned char * result, unsigned int width, unsigned int height )
{
// calculate normalized texture coordinates
unsigned int x = blockIdx.x*blockDim.x + threadIdx.x; //width ( col ) ?
unsigned int y = blockIdx.y*blockDim.y + threadIdx.y; //height ( row )?
result[y*width + x] = tex2D( tex, (float) x, (float) y );
}
/************************************************************************/
/* HelloCUDA */
/************************************************************************/
int TestHelloCUDA(unsigned char * inImage, unsigned char * outImage, unsigned int width, unsigned int height )
{
if(!InitCUDA()) {
return 0;
}
/*------------------------------------------------------------------------*/
/* Set up device blocking */
/*------------------------------------------------------------------------*/
dim3 dimBlock( 20, 15 );
dim3 dimGrid( 32, 32 );
/*------------------------------------------------------------------------*/
/* prepare memory */
/*------------------------------------------------------------------------*/
unsigned char * d_data = NULL;
unsigned int size = width * height;
cudaChannelFormatDesc channelDesc = cudaCreateChannelDesc(8, 0, 0, 0, cudaChannelFormatKindUnsigned);
cudaArray* cu_array;
// set texture parameters
tex.addressMode[0] = cudaAddressModeClamp;
tex.addressMode[1] = cudaAddressModeClamp;
tex.filterMode = cudaFilterModeLinear;
tex.normalized = false;
// allocate device memory for result
CUDA_SAFE_CALL( cudaMalloc( (void**) &d_data, size * sizeof( *d_data ) ));
// allocate array and copy image data
CUDA_SAFE_CALL( cudaMallocArray( &cu_array, &channelDesc, width, height ));
CUDA_SAFE_CALL( cudaMemcpyToArray( cu_array, 0, 0, inImage, size, cudaMemcpyHostToDevice));
CUDA_SAFE_CALL( cudaBindTextureToArray( tex, cu_array, channelDesc));
/*------------------------------------------------------------------------*/
/* make kernel call */
/*------------------------------------------------------------------------*/
CUT_CHECK_ERROR("prefail");
HelloCUDA<<< dimGrid, dimBlock >>>( d_data, width, height );
CUT_CHECK_ERROR("postfailed");
CUDA_SAFE_CALL( cudaThreadSynchronize() );
/*------------------------------------------------------------------------*/
/* copy result from device to host */
/*------------------------------------------------------------------------*/
CUDA_SAFE_CALL( cudaMemcpy( outImage, d_data, size, cudaMemcpyDeviceToHost) );
/*------------------------------------------------------------------------*/
/* cleanup memory */
/*------------------------------------------------------------------------*/
CUDA_SAFE_CALL(cudaFree(d_data));
CUDA_SAFE_CALL(cudaFreeArray(cu_array));
return 0;
}