Npp HistogramRange

I’m trying to calculate the histogram of a series of image, but I keep getting errors. Here’s a working example(I use a test image where every pixel is set to a constant value, I expect the histogram to have only one non zero value).

#include "cuda_runtime.h"
#include "npp.h"

void TestHistogram() 
   int srcWidth = 1344;
   int srcHeight = 1344;
   NppiSize roiSize = {srcWidth,srcHeight};
   int allocSize = srcWidth*srcHeight;
   int nLevels = 65537;
   float* srcData;

   //every value of srcData is set to 9

   //levels used in histogram calculation
   array<float> ^levels;
   levels = gcnew array<float> (nLevels);
   for  (int k=0; k<nLevels; k++)   {
   levels[k] = (float)k*(65536/float(nLevels-1));

   //levels are copied on gpu memory
   float* pLevels;
   pin_ptr<float> LevelPtr = &levels[0];

   //creation of the scratch buffer
   int hostBufferSize;
   int status=nppiHistogramRangeGetBufferSize_32f_C1R(roiSize,nLevels,&hostBufferSize);
   unsigned char* deviceBuffer;

   int* histDevice;
   int status2=nppiHistogramRange_32f_C1R(srcData,srcWidth*4,roiSize,histDevice,pLevels,nLevels,deviceBuffer);      

   //the result is copied from the gpu
   array<int> ^histHost = gcnew array<int> (nLevels-1);
   pin_ptr<int> hostPtr = &histHost[0];
   int status3 = cudaMemcpy(hostPtr,histDevice,(nLevels1)*4,cudaMemcpyDeviceToHost);

I get error 77 cudaErrorIllegalAddress when I do the cudaMemcpy on the hist variable.
I changed nppiHistogramRangeGetBufferSize_32f_C1R to nppiMeanGetBufferHostSize_32f_C1R by mistake and the code appears to be working, but after a cartain number of executions I get the same error.

Are there any limitations on the number of possible levels?