Problem using NPP sum Having trouble using reduction sum with NPP

Hi all,

I can’t seem to get NPP to do a reduction sum. Can you help? I have tried to make a simple example.

/*

 * reductiontest.cu

 *

 *  Created on: 2 Aug 2011

 *      Author: tim

 */

#include <stdio.h>

#include <cuda.h>

#include <cuda_runtime_api.h>

#include <cuda_runtime.h>

#include <npp.h>

#include <nppcore.h>

static void HandleError( cudaError_t err,

                         const char *file,

                         int line ) {

    if (err != cudaSuccess) {

        printf( "%s in %s at line %d\n", cudaGetErrorString( err ),

                file, line );

        exit( EXIT_FAILURE );

    }

}

#define HANDLE_ERROR( err ) (HandleError( err, __FILE__, __LINE__ ))

const int THREAD_DIM = 2;

const int BLOCK_DIM = 2;

const int GRID_DIM = THREAD_DIM * BLOCK_DIM;

__global__ void init_array(Npp32f *data_d)

{

	int tid = threadIdx.x + blockIdx.x * blockDim.x;

	data_d[tid] = 1.0;

}

int main()

{

	printf("Start test\n");

	Npp32f *data_h;

	Npp32f *data_d;

	Npp32f *res_h;

	Npp32f *res_d;

	int bufferSize_h = 0;

	Npp8u *buffer_d;

	HANDLE_ERROR( cudaHostAlloc((void**) &data_h,sizeof(Npp32f)*GRID_DIM,

			cudaHostAllocDefault) );

	HANDLE_ERROR( cudaMalloc((void**) &data_d, sizeof(Npp32f)*GRID_DIM) );

	HANDLE_ERROR( cudaHostAlloc((void**) &res_h,sizeof(Npp32f),

			cudaHostAllocDefault) );

	HANDLE_ERROR( cudaMalloc((void**) &res_d, sizeof(Npp32f)) );

	nppsReductionGetBufferSize_32f(10, &bufferSize_h);

	buffer_d = nppsMalloc_8u( bufferSize_h );

	//call init kernel

	init_array<<<BLOCK_DIM,THREAD_DIM>>>(data_d);

	//Copy result array to host

	HANDLE_ERROR( cudaMemcpy(data_h, data_d,

			sizeof(Npp32f)*GRID_DIM, cudaMemcpyDeviceToHost) );

	for (unsigned int i = 0; i < GRID_DIM; i++) {

		printf("%f\n",data_h[i]);

	}

//	Sum all values together

	nppsSum_32f(data_d, GRID_DIM, res_d, nppAlgHintNone, buffer_d);

	//Copy result array to host

	HANDLE_ERROR( cudaMemcpy(res_h, res_d,

			sizeof(Npp32f), cudaMemcpyDeviceToHost) );

	printf("%.7f\n",(float) *res_h);

	return EXIT_SUCCESS;

}

Thanks in advance for any help ;)

Kisty