cudpp access violation

My program is terminating with the following Application Error.

The application failed to initialize properly (0xc0000005). Click on OK to terminate the application.

I think that this has something to do with CUDPP. Any suggestions or thoughts would be appreciated.

the following is my scan.cu file that I am linking to from my main file.

#include <cutil.h> 

#include <cutil_math.h>

#include <cudpp/cudpp.h>  

#include <stdio.h> 

#include <math.h>

#include <string.h> 

#include <float.h>   

extern "C" void run_scan(void);   

extern "C" void computSumScanGold( float* reference, float* h_idata, int 

     numElements, CUDPPConfiguration config);  

void run_scan(void) 

{  

    printf("initializing CUDA\n");  

     CUT_DEVICE_INIT(0, &"");    

     unsigned int numElements = 32768;  

     unsigned int memSize = sizeof(float) * numElements;    

    printf("allocating memory\n");  

     float* h_idata = (float*) malloc(memSize);    

     for(unsigned int i = 0; i < numElements; ++i)  

     {  	

          h_idata[i] = (float) (rand() & 0xf);  

     }    

     float* d_idata;  

    CUDA_SAFE_CALL( cudaMalloc( (void**) &d_idata, memSize) );         

     CUDA_SAFE_CALL( cudaMemcpy( d_idata, 

          h_idata, 

          memSize, 

          cudaMemcpyHostToDevice) );      

    	

     float* d_odata;  

    CUDA_SAFE_CALL( cudaMalloc( (void**) &d_odata, 

          memSize) );          

     printf("initializing CUDPP\n");  

    CUDPPConfiguration config;  

     config.op = CUDPP_ADD;  

     config.datatype = CUDPP_FLOAT;  

     config.algorithm = CUDPP_SCAN;  

     config.options = 

          CUDPP_OPTION_FORWARD | CUDPP_OPTION_EXCLUSIVE;         

     

     CUDPPHandle scanplan = 0;  

    CUDPPResult result = cudppPlan(&scanplan, config, numElements, 1, 0);    

    if(CUDPP_SUCCESS != result)  

     {  	

          printf("error creating CUDPPPlan\n");  	

          return;  

     }    

    printf("executing\n");  

     cudppScan(scanplan, d_odata, d_idata, numElements);    

    float* h_odata = (float*) malloc(memSize);  

     CUDA_SAFE_CALL( cudaMemcpy( h_odata, 

          d_odata,                

          memSize,           

          cudaMemcpyDeviceToHost) );  

        	

     float* reference = (float*) malloc(memSize);  

    printf("computing reference\n"); 

     computSumScanGold( reference, h_idata, numElements, config);    

    CUTBoolean res = cutComparef( reference, h_odata, numElements);  

     printf( "Test %s\n", (1 == res) ? "PASSED" : "FAILED");    

    printf("freeing memory\n");  

     cudaFree(d_idata);  

     cudaFree(d_odata);    

    return; 

}  

void computSumScanGold( float* reference, 

     float* h_idata, int numElements,	CUDPPConfiguration config) 

{  

     reference[0] = 0;  

     for(unsigned int i = 1; i < numElements; ++i)  

     {  	

          reference[i] = h_idata[i - 1] + reference[i - 1];  

     }    

     return; 

}