Hi,
I am focused to filter an image with NppiFilter_8u_C1 but I have some troubles; when NppiFilter_8u_C1 is called NppStatus is setted to -24 (NPP_TEXTURE_BIND_ERROR). I cannot figure out what is wrong with my code, this is my snippet:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <nppdefs.h>
#include <nppcore.h>
#include <npp.h>
#include <nppi.h>
#include <npps.h>
#include <stdio.h>
#include <iostream>
using namespace std;
#define LENGTH 10
#define KERNEL 5
void main(char *str[])
{
Npp8u
*hImage = (Npp8u *)malloc(LENGTH * LENGTH * sizeof(Npp8u)),
*hOutput = (Npp8u *)malloc(LENGTH * LENGTH * sizeof(Npp8u));
Npp8u
*dImage,
*dOutput;
Npp32s
*hKernel = (Npp32s *)malloc(KERNEL * KERNEL * sizeof(Npp32s)),
*dKernel;
size_t
pImage,
pOutput;
NppiSize
sizeImage,
sizeKernel;
sizeImage.height =
LENGTH;
sizeImage.width =
LENGTH;
sizeKernel.height =
KERNEL;
sizeKernel.width =
KERNEL;
for( int i = 0; i < LENGTH; i++ )
for( int j = 0; j < LENGTH; j++)
hImage[i * LENGTH + j] = i;
for( int i = 0 ; i < KERNEL; i++)
for( int j = 0; j < LENGTH; j++)
hKernel[i * KERNEL + j] = 2;
cudaMalloc<Npp32s>((Npp32s **)&dKernel, KERNEL * KERNEL * sizeof(Npp32s));
cudaMallocPitch<Npp8u>((Npp8u **)&dImage, &pImage, LENGTH, LENGTH);
cudaMallocPitch<Npp8u>((Npp8u **)&dOutput, &pOutput, LENGTH, LENGTH);
cudaMemcpy2D( dKernel, KERNEL * sizeof(Npp32s), hKernel, KERNEL * sizeof(Npp32s), KERNEL, KERNEL, cudaMemcpyHostToDevice );
cudaMemcpy2D( dImage, pImage, hImage, LENGTH * sizeof(Npp8u), LENGTH, LENGTH, cudaMemcpyHostToDevice );
NppiPoint k;
k.x =
0;
k.y =
0;
NppStatus p =
nppiFilter_8u_C1R(dImage, pImage, dOutput, pOutput, sizeImage, dKernel, sizeKernel, k, 1);
cout << "NppSatus: " << p << "\n";
cudaMemcpy2D( hOutput, LENGTH * sizeof(Npp8u), dOutput, pOutput, LENGTH, LENGTH, cudaMemcpyDeviceToHost );
cout << "Kernel" << "\n";
for( int i = 0; i < KERNEL; i++ )
{
for( int j = 0; j < KERNEL; j++)
{
cout << (int)hKernel[i * KERNEL + j] << " ";
}
cout << "\n";
}
cout << "Image" << "\n";
for( int i = 0; i < LENGTH; i++ )
{
for( int j = 0; j < LENGTH; j++)
{
cout << (int)hImage[i * LENGTH + j] << " ";
}
cout << "\n";
}
cudaMemcpy2D( hKernel, KERNEL * sizeof(Npp32s), dKernel, KERNEL * sizeof(Npp32s), KERNEL, KERNEL, cudaMemcpyDeviceToHost );
cout << "\nKernel" << "\n";
for( int i = 0; i < KERNEL; i++ )
{
for( int j = 0; j < KERNEL; j++)
{
cout << (int)hKernel[i * KERNEL + j] << " ";
}
cout << "\n";
}
cudaMemcpy2D( hImage, LENGTH * sizeof(Npp8u), dImage, pImage, LENGTH, LENGTH, cudaMemcpyDeviceToHost );
cout << "Image" << "\n";
for( int i = 0; i < LENGTH; i++ )
{
for( int j = 0; j < LENGTH; j++)
{
cout << (int)hImage[i * LENGTH + j] << " ";
}
cout << "\n";
}
cout << "Output" << "\n";
for( int i = 0; i < LENGTH; i++ )
{
for( int j = 0; j < LENGTH; j++)
{
cout << (int)hOutput[i * LENGTH + j] << " ";
}
cout << "\n";
}
getchar();
}
Please help me