“GeForce GTX 960”
CUDA Driver Version / Runtime Version 8.0 / 8.0
CUDA Capability Major/Minor version number: 5.2
I am running the Cuda Sample FilterBorderControlNPP (in Cuda Libraries Sample Codes). Everything is Ok if I run the original program. If I put the whole main function inside a loop, then the second time one of the npp functions throw and NPP exception.
int main(int argc, char *argv)
{
printf(“%s Starting…\n\n”, argv[0]);
for (int i = 0; i < 10; ++i)
{
try
{
const char * inputFile = "Lena.pgm";
std::string sFilename = inputFile;
std::string sOutputDir = "./";
cudaDeviceInit(argc, (const char **)argv);
if (printfNPPinfo(argc, argv) == false)
{
cudaDeviceReset();
exit(EXIT_SUCCESS);
}
char *filePath;
if (checkCmdLineFlag(argc, (const char **)argv, "input"))
{
getCmdLineArgumentString(argc, (const char **)argv, "input", &filePath);
}
else
{
filePath = sdkFindFilePath(inputFile, argv[0]);
}
if (!filePath)
{
std::cerr << "Couldn't find input file " << sFilename << std::endl;
exit(1);
}
sFilename = filePath;
// if we specify the filename at the command line, then we only test sFilename[0].
int file_errors = 0;
std::ifstream infile(sFilename.data(), std::ifstream::in);
if (infile.good())
{
std::cout << "gradientFilterBorderNPP opened <" << sFilename.data() << "> successfully!" << std::endl;
file_errors = 0;
infile.close();
}
else
{
std::cout << "gradientFilterBorderNPP unable to open <" << sFilename.data() << ">" << std::endl;
file_errors++;
infile.close();
}
if (file_errors > 0)
{
cudaDeviceReset();
exit(EXIT_FAILURE);
}
std::string sResultBaseFilename = sFilename;
std::string::size_type dot = sResultBaseFilename.rfind('.');
if (dot != std::string::npos)
{
sResultBaseFilename = sResultBaseFilename.substr(0, dot);
}
std::string sResultXFilename = sOutputDir + sFilename + "_gradientVectorPrewittBorderX_Vertical.pgm";
std::string sResultYFilename = sResultBaseFilename;
// sResultXFilename += "_gradientVectorPrewittBorderX_Vertical.pgm";
sResultYFilename += "_gradientVectorPrewittBorderY_Horizontal.pgm";
// if (checkCmdLineFlag(argc, (const char **)argv, "output"))
// {
// char *outputFilePath;
// getCmdLineArgumentString(argc, (const char **)argv, "output", &outputFilePath);
// sResultBaseFilename = outputFilePath;
// }
// declare a host image object for an 8-bit grayscale image
npp::ImageCPU_8u_C1 oHostSrc;
// load gray-scale image from disk
npp::loadImage(sFilename, oHostSrc);
// declare a device image and copy construct from the host image,
// i.e. upload host to device
npp::ImageNPP_8u_C1 oDeviceSrc(oHostSrc);
NppiSize oSrcSize = { (int)oDeviceSrc.width(), (int)oDeviceSrc.height() };
NppiPoint oSrcOffset = { 0, 0 };
// create struct with ROI size
NppiSize oSizeROI = { (int)oDeviceSrc.width() , (int)oDeviceSrc.height() };
// allocate device destination images of appropriatedly size
npp::ImageNPP_16s_C1 oDeviceDstX(oSizeROI.width, oSizeROI.height);
npp::ImageNPP_16s_C1 oDeviceDstY(oSizeROI.width, oSizeROI.height);
// run Prewitt edge detection gradient vector filter
NPP_CHECK_NPP(
nppiGradientVectorPrewittBorder_8u16s_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(),
oSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(),
oDeviceDstY.data(), oDeviceDstY.pitch(),
0, 0,
0, 0,
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
// allocate device destination images of appropriatedly size
npp::ImageNPP_8u_C1 oDeviceDstOutX(oSizeROI.width, oSizeROI.height);
npp::ImageNPP_8u_C1 oDeviceDstOutY(oSizeROI.width, oSizeROI.height);
// convert 16s_C1 result images to binary 8u_C1 output images using constant value to adjust amount of visible detail
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(), 32,
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(), 32,
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
// create host images for the results
npp::ImageCPU_8u_C1 oHostDstX(oDeviceDstOutX.size());
npp::ImageCPU_8u_C1 oHostDstY(oDeviceDstOutY.size());
// and copy the device result data into them
oDeviceDstOutX.copyTo(oHostDstX.data(), oHostDstX.pitch());
oDeviceDstOutY.copyTo(oHostDstY.data(), oHostDstY.pitch());
saveImage(sResultXFilename, oHostDstX);
std::cout << "Saved image: " << sResultXFilename << std::endl;
saveImage(sResultYFilename, oHostDstY);
std::cout << "Saved image: " << sResultYFilename << std::endl;
// now use the Prewitt gradient border filter function in such a way that no border replication operations will be applied
// create a Prewitt filter mask size object, Prewitt uses a 3x3 filter kernel
NppiSize oMaskSize = { 3, 3 };
// create a size object for the enlarged source image
NppiSize oEnlargedSrcSize = { oSrcSize.width + oMaskSize.width - 1, oSrcSize.height + oMaskSize.height - 1 };
// create an enlarged device source image
npp::ImageNPP_8u_C1 oEnlargedDeviceSrc(oEnlargedSrcSize.width, oEnlargedSrcSize.height);
// copy and enlarge the original device source image and surround it with a white edge (border)
NPP_CHECK_NPP(
nppiCopyConstBorder_8u_C1R(oDeviceSrc.data(), oDeviceSrc.pitch(), oSrcSize,
oEnlargedDeviceSrc.data(), oEnlargedDeviceSrc.pitch(),
oEnlargedSrcSize, oMaskSize.width / 2, oMaskSize.height / 2, 255));
// adjust oEnlargedDeviceSrc pixel pointer to point to the first pixel of the original source image in the enlarged source image
const Npp8u * pTemp = reinterpret_cast<const Npp8u *>(oEnlargedDeviceSrc.data());
pTemp += (oMaskSize.height / 2) * oEnlargedDeviceSrc.pitch();
const Npp8u * pAdjustedSrc = reinterpret_cast<const Npp8u *>((void *)(pTemp));
pAdjustedSrc += oMaskSize.width / 2;
// create device output images for the no source border results
npp::ImageNPP_8u_C1 oDeviceDstOutXNoBorders(oSizeROI.width, oSizeROI.height);
npp::ImageNPP_8u_C1 oDeviceDstOutYNoBorders(oSizeROI.width, oSizeROI.height);
// tell the filter function what cartesian pixel position pAdjustedSrc is pointing to within the enlarged source image
oSrcOffset.x += oMaskSize.width / 2;
oSrcOffset.y += oMaskSize.height / 2;
// run Prewitt edge detection gradient vector filter bypassing border control due to enlarged source image
NPP_CHECK_NPP(
nppiGradientVectorPrewittBorder_8u16s_C1R(pAdjustedSrc, oEnlargedDeviceSrc.pitch(),
oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(),
oDeviceDstY.data(), oDeviceDstY.pitch(),
0, 0,
0, 0,
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
// convert 16s_C1 result images to binary 8u_C1 output images using constant value to adjust amount of visible detail
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(), 32,
oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(), 32,
oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
// create additional output files
std::string sResultXNoBordersFilename = sResultBaseFilename;
std::string sResultYNoBordersFilename = sResultBaseFilename;
sResultXNoBordersFilename += "_gradientVectorPrewittBorderX_Vertical_WithNoSourceBorders.pgm";
sResultYNoBordersFilename += "_gradientVectorPrewittBorderY_Horizontal_WithNoSourceBorders.pgm";
// copy the device result data into the host output images
oDeviceDstOutXNoBorders.copyTo(oHostDstX.data(), oHostDstX.pitch());
oDeviceDstOutYNoBorders.copyTo(oHostDstY.data(), oHostDstY.pitch());
saveImage(sResultXNoBordersFilename, oHostDstX);
std::cout << "Saved image: " << sResultXNoBordersFilename << std::endl;
saveImage(sResultYNoBordersFilename, oHostDstY);
std::cout << "Saved image: " << sResultYNoBordersFilename << std::endl;
// now diff the two output images, one using border control and one bypassing border control
// create device output images for the diff results
npp::ImageNPP_8u_C1 oDeviceDstOutXDiff(oSizeROI.width, oSizeROI.height);
npp::ImageNPP_8u_C1 oDeviceDstOutYDiff(oSizeROI.width, oSizeROI.height);
// diff the two 8u_C1 result images one with and one without border control
NPP_CHECK_NPP(
nppiAbsDiff_8u_C1R(oDeviceDstOutXNoBorders.data(), oDeviceDstOutXNoBorders.pitch(),
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(),
oSizeROI));
NPP_CHECK_NPP(
nppiAbsDiff_8u_C1R(oDeviceDstOutYNoBorders.data(), oDeviceDstOutYNoBorders.pitch(),
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(),
oSizeROI));
// create additional output files
std::string sResultXDiffFilename = sResultBaseFilename;
std::string sResultYDiffFilename = sResultBaseFilename;
sResultXDiffFilename += "_gradientVectorPrewittBorderX_Vertical_BorderDiffs.pgm";
sResultYDiffFilename += "_gradientVectorPrewittBorderY_Horizontal_BorderDiffs.pgm";
// copy the device result data into the host output images
oDeviceDstOutXDiff.copyTo(oHostDstX.data(), oHostDstX.pitch());
oDeviceDstOutYDiff.copyTo(oHostDstY.data(), oHostDstY.pitch());
saveImage(sResultXDiffFilename, oHostDstX);
std::cout << "Saved image: " << sResultXDiffFilename << std::endl;
saveImage(sResultYDiffFilename, oHostDstY);
std::cout << "Saved image: " << sResultYDiffFilename << std::endl;
// if you closely examine the above difference files (recommend using GIMP for viewing using scaling with no interpolation) you will see several
// single pixel differences (white pixels) along the right and bottom edges of the default vs. borderless images
// this happens because border pixels in the original source image are duplicated when the filter kernels overlap the edge of the source image
// when using the first version of the filter call but are actually sampled from the enlarged source image when using the second version
// of the filter call
// the technique used in the second filter call can be used with any filter border function in NPP to duplicate results
// that would be generated from a non-border filter function call by filling the border pixel outside the embedded source image
// with the appropriate border pixel values
// here is how to use border control to process a source image in multiple calls and get correct output in the destination image
// since the source image pointer already points to the beginning of the source image in the enlarged source image it doesn't need changed
// tighten up the top and left source image borders - this will enable border replication on the left and top borders of the original source image
oSrcOffset.x = 0;
oSrcOffset.y = 0;
// tighten up the right and bottom side source image borders - this will enable border replication on the right and bottom borders of the original source image
oEnlargedSrcSize.width = oSrcSize.width;
oEnlargedSrcSize.height = oSrcSize.height;
// create device output images for the mixed edge results
npp::ImageNPP_8u_C1 oDeviceDstOutXMixedBorders(oSizeROI.width, oSizeROI.height);
npp::ImageNPP_8u_C1 oDeviceDstOutYMixedBorders(oSizeROI.width, oSizeROI.height);
// shrink output ROI width so that only the left half of the destination image will be generated
// however since oEnlargedSrcSize.width is still set to oSrcSize.width then border control will be disabled
// when the filter needs to access source pixels beyond the right side of the left half of the source image
int nLeftWidth = oSizeROI.width / 2;
int nRightWidth = oSizeROI.width - nLeftWidth;
oSizeROI.width = nLeftWidth;
// run Prewitt edge detection gradient vector filter to generate the left side of the output image
NPP_CHECK_NPP(
nppiGradientVectorPrewittBorder_8u16s_C1R(pAdjustedSrc, oEnlargedDeviceSrc.pitch(),
oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data(), oDeviceDstX.pitch(),
oDeviceDstY.data(), oDeviceDstY.pitch(),
0, 0,
0, 0,
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
// now move the enlarged source pointer to the horizontal middle of the enlarged source image and tell the function where it was moved to
pAdjustedSrc += nLeftWidth;
// and adjust the source offset parameter accordingly - this will in effect turn off border control for the left border allowing the necessary source pixels to be used
oSrcOffset.x += nLeftWidth;
// update oSizeROI.width so that only enough destination pixels will be produced to fill the right half of the destination image
oSizeROI.width = nRightWidth;
// run Prewitt edge detection gradient vector filter to generate the right side of the output image adjusting the destination image pointers appropriately
NPP_CHECK_NPP(
nppiGradientVectorPrewittBorder_8u16s_C1R(pAdjustedSrc, oEnlargedDeviceSrc.pitch(),
oEnlargedSrcSize, oSrcOffset,
oDeviceDstX.data() + nLeftWidth, oDeviceDstX.pitch(),
oDeviceDstY.data() + nLeftWidth, oDeviceDstY.pitch(),
0, 0,
0, 0,
oSizeROI, NPP_MASK_SIZE_3_X_3, nppiNormL1, NPP_BORDER_REPLICATE));
// convert 16s_C1 result images to binary 8u_C1 output images using constant value to adjust amount of visible detail
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstX.data(), oDeviceDstX.pitch(), 32,
oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
NPP_CHECK_NPP(
nppiCompareC_16s_C1R(oDeviceDstY.data(), oDeviceDstY.pitch(), 32,
oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
oSizeROI, NPP_CMP_GREATER_EQ));
// create additional output files
std::string sResultXMixedBordersFilename = sResultBaseFilename;
std::string sResultYMixedBordersFilename = sResultBaseFilename;
sResultXMixedBordersFilename += "_gradientVectorPrewittBorderX_Vertical_WithMixedBorders.pgm";
sResultYMixedBordersFilename += "_gradientVectorPrewittBorderY_Horizontal_WithMixedBorders.pgm";
// copy the device result data into the host output images
oDeviceDstOutXMixedBorders.copyTo(oHostDstX.data(), oHostDstX.pitch());
oDeviceDstOutYMixedBorders.copyTo(oHostDstY.data(), oHostDstY.pitch());
saveImage(sResultXMixedBordersFilename, oHostDstX);
std::cout << "Saved image: " << sResultXMixedBordersFilename << std::endl;
saveImage(sResultYMixedBordersFilename, oHostDstY);
std::cout << "Saved image: " << sResultYMixedBordersFilename << std::endl;
// diff the original 8u_C1 result images with border control and the mixed border control images, they should match (diff image will be all black)
NPP_CHECK_NPP(
nppiAbsDiff_8u_C1R(oDeviceDstOutXMixedBorders.data(), oDeviceDstOutXMixedBorders.pitch(),
oDeviceDstOutX.data(), oDeviceDstOutX.pitch(),
oDeviceDstOutXDiff.data(), oDeviceDstOutXDiff.pitch(),
oSizeROI));
NPP_CHECK_NPP(
nppiAbsDiff_8u_C1R(oDeviceDstOutYMixedBorders.data(), oDeviceDstOutYMixedBorders.pitch(),
oDeviceDstOutY.data(), oDeviceDstOutY.pitch(),
oDeviceDstOutYDiff.data(), oDeviceDstOutYDiff.pitch(),
oSizeROI));
// create additional output files
std::string sResultXMixedDiffFilename = sResultBaseFilename;
std::string sResultYMixedDiffFilename = sResultBaseFilename;
sResultXMixedDiffFilename += "_gradientVectorPrewittBorderX_Vertical_MixedBorderDiffs.pgm";
sResultYMixedDiffFilename += "_gradientVectorPrewittBorderY_Horizontal_MixedBorderDiffs.pgm";
// copy the device result data into the host output images
oDeviceDstOutXDiff.copyTo(oHostDstX.data(), oHostDstX.pitch());
oDeviceDstOutYDiff.copyTo(oHostDstY.data(), oHostDstY.pitch());
saveImage(sResultXMixedDiffFilename, oHostDstX);
std::cout << "Saved image: " << sResultXMixedDiffFilename << std::endl;
saveImage(sResultYMixedDiffFilename, oHostDstY);
std::cout << "Saved image: " << sResultYMixedDiffFilename << std::endl;
nppiFree(oDeviceSrc.data());
nppiFree(oDeviceDstX.data());
nppiFree(oDeviceDstY.data());
nppiFree(oDeviceDstOutX.data());
nppiFree(oDeviceDstOutY.data());
nppiFree(oDeviceDstOutXNoBorders.data());
nppiFree(oDeviceDstOutYNoBorders.data());
nppiFree(oDeviceDstOutXDiff.data());
nppiFree(oDeviceDstOutYDiff.data());
nppiFree(oDeviceDstOutXMixedBorders.data());
nppiFree(oDeviceDstOutYMixedBorders.data());
nppiFree(oEnlargedDeviceSrc.data());
//cudaDeviceReset();
//exit(EXIT_SUCCESS);
}
catch (npp::Exception &rException)
{
std::cerr << "Program error! The following exception occurred: \n";
std::cerr << rException << std::endl;
std::cerr << "Aborting." << std::endl;
cudaDeviceReset();
exit(EXIT_FAILURE);
}
catch (...)
{
std::cerr << "Program error! An unknow type of exception occurred. \n";
std::cerr << "Aborting." << std::endl;
cudaDeviceReset();
exit(EXIT_FAILURE);
return -1;
}
}
return 0;
}
I have experiencing this issue in other examples.
Does anybody know how to repeat calls to NPP functions several times without getting these errors?