Compilation errors when attempting to use cm_11 target in make file

I have written a small program that transfers data to a GPU, performs a simple atomic add and then transfers the data back. It is based on the simpleAtomicIntrinsic and bandWidthTest example programs. The problem I keep running into is make: *** No rule to make target obj/release/atomicAddBandwithTest.cu_sm_11.o', needed by …/…/bin/linux/release/atomicAddBandwidthTest’. Stop. when running make.

I am running on RedHat 5.3, all my environmental variables are set correctly, the other examples build just fine. The program I am attempting to build is in the same location as the other examples ~/NVIDIA_CUDA_SDK/projects. I am compiling for a GTX285.

My Makefile looks like this:

[codebox]# Add source files here

EXECUTABLE := atomicAddBandwidthTest

Cuda source files (compiled with cudacc)

CUFILES_sm_11 := atomicAddBandwithTest.cu

#SMVERSIONFLAGS := sm_11

#CUDEPS := atomicAddBandwidthTest_kernel.cu

C/C++ source files (compiled with gcc / c++)

CCFILES :=

############################################################

####################

Rules and targets

include …/…/common/common.mk

[/codebox]

my cu file is:

[codebox]// includes, system

#include <stdlib.h>

#include <stdio.h>

#include <string.h>

#include <math.h>

// includes, project

#include “cutil_inline.h”

// includes, kernels

#include “atomicAddBandwidthTest_kernel.cu”

////////////////////////////////////////////////////////////////////////////////

// declaration, forward

void runTest( int argc, char** argv);

testKernel(int* g_odata);

////////////////////////////////////////////////////////////////////////////////

// Program main

////////////////////////////////////////////////////////////////////////////////

int

main( int argc, char** argv)

{

runTest( argc, argv);

cutilExit(argc, argv);

}

////////////////////////////////////////////////////////////////////////////////

//! Run a simple test for CUDA

////////////////////////////////////////////////////////////////////////////////

void

runTest( int argc, char **argv)

{

cudaDeviceProp deviceProp;

deviceProp.major = 0;

deviceProp.minor = 0;

int dev;

// use command-line specified CUDA device, otherwise use device with highest Gflops/s

if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )

    cutilDeviceInit(argc, argv);

else

    cudaSetDevice( dev = cutGetMaxGflopsDeviceId() );

cutilSafeCall( cudaChooseDevice(&dev, &deviceProp) );

cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev) );

if(deviceProp.major > 1 || deviceProp.minor > 0)

{

    printf("Using Device %d: \"%s\"\n", dev, deviceProp.name);

}

else

{

    printf("There is no device supporting CUDA compute capability 1.1.\n");

    printf("TEST PASSED");

    cudaThreadExit();

    cutilExit(argc, argv);

}

unsigned int timer = 0;

cutilCheckError( cutCreateTimer( &timer));

cutilCheckError( cutStartTimer( timer));

unsigned int numThreads = 256;

unsigned int numBlocks = 64;

unsigned int numData = 1;

unsigned int memSize = sizeof(int) * numData;

//allocate mem for the result on host side

int *h_odata = (int *)malloc(memSize);

//initalize the memory

for(unsigned int i = 0; i < numData; i++)

    h_odata[i] = 0;

//To make the AND and XOR tests generate something other than 0…

// h_odata[8] = h_odata[10] = 0xff;

// allocate device memory for result

int *d_odata;

cutilSafeCall( cudaMalloc( (void**) &d_odata, memSize));

// copy host memory to device to initialize to zers

cutilSafeCall( cudaMemcpy( d_odata, h_odata, memSize, cudaMemcpyHostToDevice) );

// execute the kernel

testKernel<<<numBlocks, numThreads>>>(d_odata);

cutilCheckMsg("Kernel execution failed");

//Copy result from device to host

cutilSafeCall( cudaMemcpy( h_odata, d_odata, memSize, cudaMemcpyDeviceToHost) );

cutilCheckError( cutStopTimer( timer));

printf( "Processing time: %f (ms)\n", cutGetTimerValue( timer));

cutilCheckError( cutDeleteTimer( timer));

// cleanup memory

free(h_odata);

cutilSafeCall(cudaFree(d_odata));

cudaThreadExit();

}

testKernel(int* g_odata)

{

// access thread id

const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;

// Test various atomic instructions

// Arithmetic atomic instructions

// Atomic addition

atomicAdd(&g_odata[tid], 10);

}[/codebox]

Any constructive suggestions would be greatly appreciated.