Since the upgrade to Nsight 1.5 and Cuda 3.2 some of my code has broken.
I have instantiated an object of this class:
class SteerForSeekCUDA : public AbstractCUDAKernel
{
protected:
float3 m_target;
public:
SteerForSeekCUDA(VehicleGroup *pVehicleGroup, const float3 &target);
~SteerForSeekCUDA(void) {}
virtual void init(void);
virtual void run(void);
virtual void close(void);
};
definition:
extern "C"
{
__global__ void SteerForSeekKernel(vehicle_data *vehicleData, float3 target, int numAgents);
}
SteerForSeekCUDA::SteerForSeekCUDA(VehicleGroup *vehicleGroup, const float3 &target)
: AbstractCUDAKernel(vehicleGroup)
{
m_threadsPerBlock = 128;
m_target = target;
}
void SteerForSeekCUDA::init(void)
{
// Allocate device memory.
HANDLE_ERROR(cudaMalloc((void**)&m_pdVehicleData, getDataSizeInBytes()));
// Copy data to device memory.
HANDLE_ERROR(cudaMemcpy(m_pdVehicleData, (void*)getVehicleData(), getDataSizeInBytes(), cudaMemcpyHostToDevice));
}
void SteerForSeekCUDA::run(void)
{
dim3 grid = gridDim();
dim3 block = blockDim();
SteerForSeekKernel<<<grid, block>>>(m_pdVehicleData, m_target, getNumberOfAgents());
}
void SteerForSeekCUDA::close(void)
{
// Copy vehicle data back to the host memory.
HANDLE_ERROR(cudaMemcpy((void*)getVehicleData(), m_pdVehicleData, getDataSizeInBytes(), cudaMemcpyDeviceToHost));
// Deallocate device memory
HANDLE_ERROR(cudaFree(m_pdVehicleData));
m_pdVehicleData = NULL;
}
The base classes simply maintain the data and device pointers. My instantiation code is as follows:
void CUDAGroupSteerLibrarySingleton::steerForSeek(VehicleGroup &vehicleGroup, const float3 &target)
{
//vehicleGroup.OutputDataToFile("vehicledata.txt");
SteerForSeekCUDA kernel(&vehicleGroup, target);
kernel.init();
kernel.run();
kernel.close();
}
All the data is set fine (have output to files for verification as VS debugger just shows me junk). The problem is with the call to kernel.init();, it throws the following error:
All of the SDK sample code which I have tested work fine, and I’m sure there were no changes made to the working (with Cuda 3.1 and Nsight 1.0) code, although my svn repo is older than the classes so score 1 for poor practices. Can anyone see what I’m doing wrong or is it a problem caused by the toolkit update?
Many thanks in advance :)