Hi!
I have just started using cuda for a project at work.
We have a .net application where we would like to use cuda to do some image analysis using cuda.net.
Though I have a hard time getting it to work.
By some reason I can’t initialize cuda in my constructor of my class. Get Error invalid context if I try to do like that.
Do I have to make new cuda object for every frame?
Then if I make a new cuda object for every frame I get a memory leak and after ~10-20 frames cuda crashes due to out of memory. Do I have to something more than just cuda.Free()?
I can see that it releases all the memory for the array, it is something else that eats mem.
If i check how much memory I have right after “new CUDA.CUDA(0, true)” and then after i release the memory in the device for the array its the same. But then at the next frame it has less memory after “new CUDA.CUDA(0, true)” and so on until it crashes. What am I doing wrong?
My thought was to have it something like this:
using GASS.CUDA;
using GASS.CUDA.Types;
using System.IO;
namespace Imaging
{
public class CudaClass
{
private GASS.CUDA.CUDA cuda;
private CUfunction threshold;
public CudaClass()
{
InitializeCuda();
}
public void dispose()
{
if (this.cuda != null) {
this.cuda.Dispose();
this.cuda = null;
}
}
private void InitializeCuda()
{
Init and select 1st device.
cuda = new GASS.CUDA.CUDA(0, true);
// load module
cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "test_kernel.cubin"));
threshold = cuda.GetModuleFunction("threshold");
}
public void DoCudaStuff(float[] source, int frameWidth , int frameHeight )
{
int frameSize = frameWidth * frameHeight;
// do some CUDA stuff
try
{
// Init and select 1st device.
//CUDA.CUDA cuda = new CUDA.CUDA(0, true);
// load module
//CUmodule module = cuda.LoadModule(Path.Combine(Environment.CurrentDirectory, "test_kernel.cubin"));
//CUfunction threshold = cuda.GetModuleFunction(module, "threshold");
// copy host memory to device
CUdeviceptr deviceInData = cuda.CopyHostToDevice<float>(source);
int block_size_x = 496;
int block_size_y = 476;
int n_blocks_x = frameWidth / block_size_x + (frameSize % block_size_x == 0 ? 0 : 1);
int n_blocks_y = frameHeight / block_size_y + (frameSize % block_size_y == 0 ? 0 : 1);
// setup execution parameters
cuda.SetFunctionBlockShape(threshold, n_blocks_x, n_blocks_y, 1);
cuda.SetParameter(threshold, 0, (uint)deviceInData.Pointer);
cuda.SetParameter(threshold, IntPtr.Size, (uint)frameWidth);
cuda.SetParameter(threshold, IntPtr.Size*2, (uint)frameHeight);
cuda.SetParameterSize(threshold, (uint)IntPtr.Size * 3);
cuda.Launch(threshold, block_size_x, block_size_y);
float[] newPixels = new float[frameSize];
cuda.CopyDeviceToHost<float>(deviceInData, newPixels);
cuda.Free(deviceInData);
//cuda.UnloadModule(module);
//cuda.Dispose();
//cuda = null;
}
catch (Exception ex)
{
Console.WriteLine(ex);
}
}
}
}