I’m trying to calculate the histogram of a series of image, but I keep getting errors. Here’s a working example(I use a test image where every pixel is set to a constant value, I expect the histogram to have only one non zero value).
#include "cuda_runtime.h"
#include "npp.h"
void TestHistogram()
{
int srcWidth = 1344;
int srcHeight = 1344;
NppiSize roiSize = {srcWidth,srcHeight};
int allocSize = srcWidth*srcHeight;
int nLevels = 65537;
float* srcData;
cudaMalloc((void**)&srcData,allocSize*4);
//every value of srcData is set to 9
nppiSet_32f_C1R(9.0,srcData,srcWidth*4,roiSize);
//levels used in histogram calculation
array<float> ^levels;
levels = gcnew array<float> (nLevels);
for (int k=0; k<nLevels; k++) {
levels[k] = (float)k*(65536/float(nLevels-1));
}
//levels are copied on gpu memory
float* pLevels;
cudaMalloc((void**)&pLevels,nLevels*4);
pin_ptr<float> LevelPtr = &levels[0];
cudaMemcpy(pLevels,LevelPtr,nLevels*4,cudaMemcpyHostToDevice);
//creation of the scratch buffer
int hostBufferSize;
int status=nppiHistogramRangeGetBufferSize_32f_C1R(roiSize,nLevels,&hostBufferSize);
//nppiMeanGetBufferHostSize_32f_C1R(roiSize,&hostBufferSize);
unsigned char* deviceBuffer;
cudaMalloc((void**)&deviceBuffer,hostBufferSize);
int* histDevice;
cudaMalloc((void**)&histDevice,(nLevels-1)*4);
int status2=nppiHistogramRange_32f_C1R(srcData,srcWidth*4,roiSize,histDevice,pLevels,nLevels,deviceBuffer);
//the result is copied from the gpu
array<int> ^histHost = gcnew array<int> (nLevels-1);
pin_ptr<int> hostPtr = &histHost[0];
int status3 = cudaMemcpy(hostPtr,histDevice,(nLevels1)*4,cudaMemcpyDeviceToHost);
}
I get error 77 cudaErrorIllegalAddress when I do the cudaMemcpy on the hist variable.
I changed nppiHistogramRangeGetBufferSize_32f_C1R to nppiMeanGetBufferHostSize_32f_C1R by mistake and the code appears to be working, but after a cartain number of executions I get the same error.
Are there any limitations on the number of possible levels?