Method invocation failure on c++ object compiled with nvcc

Since the upgrade to Nsight 1.5 and Cuda 3.2 some of my code has broken.

I have instantiated an object of this class:

class SteerForSeekCUDA : public AbstractCUDAKernel

	{

	protected:

		float3		m_target;

	public:

		SteerForSeekCUDA(VehicleGroup *pVehicleGroup, const float3 &target);

		~SteerForSeekCUDA(void) {}

		virtual void init(void);

		virtual void run(void);

		virtual void close(void);

	};

definition:

extern "C"

{

	__global__ void SteerForSeekKernel(vehicle_data *vehicleData, float3 target, int numAgents);

}

SteerForSeekCUDA::SteerForSeekCUDA(VehicleGroup *vehicleGroup, const float3 &target)

: AbstractCUDAKernel(vehicleGroup)

{

	m_threadsPerBlock = 128;

	m_target = target;

}

void SteerForSeekCUDA::init(void)

{

	// Allocate device memory.

	HANDLE_ERROR(cudaMalloc((void**)&m_pdVehicleData, getDataSizeInBytes()));

	// Copy data to device memory.

	HANDLE_ERROR(cudaMemcpy(m_pdVehicleData, (void*)getVehicleData(), getDataSizeInBytes(), cudaMemcpyHostToDevice));

}

void SteerForSeekCUDA::run(void)

{

	dim3 grid = gridDim();

	dim3 block = blockDim();

	SteerForSeekKernel<<<grid, block>>>(m_pdVehicleData, m_target, getNumberOfAgents());

}

void SteerForSeekCUDA::close(void)

{

	// Copy vehicle data back to the host memory.

	HANDLE_ERROR(cudaMemcpy((void*)getVehicleData(), m_pdVehicleData, getDataSizeInBytes(), cudaMemcpyDeviceToHost));

	// Deallocate device memory

	HANDLE_ERROR(cudaFree(m_pdVehicleData));

	m_pdVehicleData = NULL;

}

The base classes simply maintain the data and device pointers. My instantiation code is as follows:

void CUDAGroupSteerLibrarySingleton::steerForSeek(VehicleGroup &vehicleGroup, const float3 &target)

{

	//vehicleGroup.OutputDataToFile("vehicledata.txt");

	SteerForSeekCUDA kernel(&vehicleGroup, target);

	kernel.init();

	kernel.run();

	kernel.close();

}

All the data is set fine (have output to files for verification as VS debugger just shows me junk). The problem is with the call to kernel.init();, it throws the following error:

All of the SDK sample code which I have tested work fine, and I’m sure there were no changes made to the working (with Cuda 3.1 and Nsight 1.0) code, although my svn repo is older than the classes so score 1 for poor practices. Can anyone see what I’m doing wrong or is it a problem caused by the toolkit update?

Many thanks in advance :)

Since the upgrade to Nsight 1.5 and Cuda 3.2 some of my code has broken.

I have instantiated an object of this class:

class SteerForSeekCUDA : public AbstractCUDAKernel

	{

	protected:

		float3		m_target;

	public:

		SteerForSeekCUDA(VehicleGroup *pVehicleGroup, const float3 &target);

		~SteerForSeekCUDA(void) {}

		virtual void init(void);

		virtual void run(void);

		virtual void close(void);

	};

definition:

extern "C"

{

	__global__ void SteerForSeekKernel(vehicle_data *vehicleData, float3 target, int numAgents);

}

SteerForSeekCUDA::SteerForSeekCUDA(VehicleGroup *vehicleGroup, const float3 &target)

: AbstractCUDAKernel(vehicleGroup)

{

	m_threadsPerBlock = 128;

	m_target = target;

}

void SteerForSeekCUDA::init(void)

{

	// Allocate device memory.

	HANDLE_ERROR(cudaMalloc((void**)&m_pdVehicleData, getDataSizeInBytes()));

	// Copy data to device memory.

	HANDLE_ERROR(cudaMemcpy(m_pdVehicleData, (void*)getVehicleData(), getDataSizeInBytes(), cudaMemcpyHostToDevice));

}

void SteerForSeekCUDA::run(void)

{

	dim3 grid = gridDim();

	dim3 block = blockDim();

	SteerForSeekKernel<<<grid, block>>>(m_pdVehicleData, m_target, getNumberOfAgents());

}

void SteerForSeekCUDA::close(void)

{

	// Copy vehicle data back to the host memory.

	HANDLE_ERROR(cudaMemcpy((void*)getVehicleData(), m_pdVehicleData, getDataSizeInBytes(), cudaMemcpyDeviceToHost));

	// Deallocate device memory

	HANDLE_ERROR(cudaFree(m_pdVehicleData));

	m_pdVehicleData = NULL;

}

The base classes simply maintain the data and device pointers. My instantiation code is as follows:

void CUDAGroupSteerLibrarySingleton::steerForSeek(VehicleGroup &vehicleGroup, const float3 &target)

{

	//vehicleGroup.OutputDataToFile("vehicledata.txt");

	SteerForSeekCUDA kernel(&vehicleGroup, target);

	kernel.init();

	kernel.run();

	kernel.close();

}

All the data is set fine (have output to files for verification as VS debugger just shows me junk). The problem is with the call to kernel.init();, it throws the following error:

[attachment=24095:cuda_error.png]

All of the SDK sample code which I have tested work fine, and I’m sure there were no changes made to the working (with Cuda 3.1 and Nsight 1.0) code, although my svn repo is older than the classes so score 1 for poor practices. Can anyone see what I’m doing wrong or is it a problem caused by the toolkit update?

Many thanks in advance :)

I just downloaded and reinstalled all the older versions of toolkit, sdk (3.1) and nsight (1.0), and everything works fine using them. The only difference I can see is version 3.1 of the toolkit/sdk I used the 32bit version, while 3.2 I used 64bit. Would a 32bit app trying to call code compiled with the 64bit release of nvcc cause this problem? I don’t want to download too much using my family’s internet so I’ll try out the 32bit release of the toolkit/sdk when I get home after the weekend.

If it was caused by the target machine/sdk version mismatch, maybe a more verbose message or warning is needed.

I just downloaded and reinstalled all the older versions of toolkit, sdk (3.1) and nsight (1.0), and everything works fine using them. The only difference I can see is version 3.1 of the toolkit/sdk I used the 32bit version, while 3.2 I used 64bit. Would a 32bit app trying to call code compiled with the 64bit release of nvcc cause this problem? I don’t want to download too much using my family’s internet so I’ll try out the 32bit release of the toolkit/sdk when I get home after the weekend.

If it was caused by the target machine/sdk version mismatch, maybe a more verbose message or warning is needed.