I have written a small program that transfers data to a GPU, performs a simple atomic add and then transfers the data back. It is based on the simpleAtomicIntrinsic and bandWidthTest example programs. The problem I keep running into is make: *** No rule to make target obj/release/atomicAddBandwithTest.cu_sm_11.o', needed by
…/…/bin/linux/release/atomicAddBandwidthTest’. Stop. when running make.
I am running on RedHat 5.3, all my environmental variables are set correctly, the other examples build just fine. The program I am attempting to build is in the same location as the other examples ~/NVIDIA_CUDA_SDK/projects. I am compiling for a GTX285.
My Makefile looks like this:
[codebox]# Add source files here
EXECUTABLE := atomicAddBandwidthTest
Cuda source files (compiled with cudacc)
CUFILES_sm_11 := atomicAddBandwithTest.cu
#SMVERSIONFLAGS := sm_11
#CUDEPS := atomicAddBandwidthTest_kernel.cu
C/C++ source files (compiled with gcc / c++)
CCFILES :=
############################################################
####################
Rules and targets
include …/…/common/common.mk
[/codebox]
my cu file is:
[codebox]// includes, system
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <math.h>
// includes, project
#include “cutil_inline.h”
// includes, kernels
#include “atomicAddBandwidthTest_kernel.cu”
////////////////////////////////////////////////////////////////////////////////
// declaration, forward
void runTest( int argc, char** argv);
testKernel(int* g_odata);
////////////////////////////////////////////////////////////////////////////////
// Program main
////////////////////////////////////////////////////////////////////////////////
int
main( int argc, char** argv)
{
runTest( argc, argv);
cutilExit(argc, argv);
}
////////////////////////////////////////////////////////////////////////////////
//! Run a simple test for CUDA
////////////////////////////////////////////////////////////////////////////////
void
runTest( int argc, char **argv)
{
cudaDeviceProp deviceProp;
deviceProp.major = 0;
deviceProp.minor = 0;
int dev;
// use command-line specified CUDA device, otherwise use device with highest Gflops/s
if( cutCheckCmdLineFlag(argc, (const char**)argv, "device") )
cutilDeviceInit(argc, argv);
else
cudaSetDevice( dev = cutGetMaxGflopsDeviceId() );
cutilSafeCall( cudaChooseDevice(&dev, &deviceProp) );
cutilSafeCall( cudaGetDeviceProperties(&deviceProp, dev) );
if(deviceProp.major > 1 || deviceProp.minor > 0)
{
printf("Using Device %d: \"%s\"\n", dev, deviceProp.name);
}
else
{
printf("There is no device supporting CUDA compute capability 1.1.\n");
printf("TEST PASSED");
cudaThreadExit();
cutilExit(argc, argv);
}
unsigned int timer = 0;
cutilCheckError( cutCreateTimer( &timer));
cutilCheckError( cutStartTimer( timer));
unsigned int numThreads = 256;
unsigned int numBlocks = 64;
unsigned int numData = 1;
unsigned int memSize = sizeof(int) * numData;
//allocate mem for the result on host side
int *h_odata = (int *)malloc(memSize);
//initalize the memory
for(unsigned int i = 0; i < numData; i++)
h_odata[i] = 0;
//To make the AND and XOR tests generate something other than 0…
// h_odata[8] = h_odata[10] = 0xff;
// allocate device memory for result
int *d_odata;
cutilSafeCall( cudaMalloc( (void**) &d_odata, memSize));
// copy host memory to device to initialize to zers
cutilSafeCall( cudaMemcpy( d_odata, h_odata, memSize, cudaMemcpyHostToDevice) );
// execute the kernel
testKernel<<<numBlocks, numThreads>>>(d_odata);
cutilCheckMsg("Kernel execution failed");
//Copy result from device to host
cutilSafeCall( cudaMemcpy( h_odata, d_odata, memSize, cudaMemcpyDeviceToHost) );
cutilCheckError( cutStopTimer( timer));
printf( "Processing time: %f (ms)\n", cutGetTimerValue( timer));
cutilCheckError( cutDeleteTimer( timer));
// cleanup memory
free(h_odata);
cutilSafeCall(cudaFree(d_odata));
cudaThreadExit();
}
testKernel(int* g_odata)
{
// access thread id
const unsigned int tid = blockDim.x * blockIdx.x + threadIdx.x;
// Test various atomic instructions
// Arithmetic atomic instructions
// Atomic addition
atomicAdd(&g_odata[tid], 10);
}[/codebox]
Any constructive suggestions would be greatly appreciated.