Can anyone help me out with this following code? The timer function for any one method is running f

/*

  • Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
  • Please refer to the NVIDIA end user license agreement (EULA) associated
  • with this source code for terms and conditions that govern your use of
  • this software. Any use, reproduction, disclosure, or distribution of
  • this software and related documentation outside the terms of the EULA
  • is strictly prohibited.

*/

/*

  • This sample demonstrates two adaptive image denoising technqiues:
  • KNN and NLM, based on computation of both geometric and color distance
  • between texels. While both techniques are already implemented in the
  • DirectX SDK using shaders, massively speeded up variation
  • of the latter techique, taking advantage of shared memory, is implemented
  • in addition to DirectX counterparts.
  • See supplied whitepaper for more explanations.
    */

// OpenGL Graphics includes
#include <GL/glew.h>
#if defined(APPLE) || defined(MACOSX)
#include <GLUT/glut.h>
#else
#include <GL/freeglut.h>
#endif

// CUDA utilities and system includes
#include <cuda_runtime.h>
#include <cuda_gl_interop.h>

// Includes
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include “imageDenoising.h”

// includes, project
#include <helper_functions.h> // includes for helper utility functions
#include <helper_cuda.h> // includes for cuda error checking and initialization

const char *sSDKsample = “CUDA ImageDenoising”;

const char *filterMode =
{
“Passthrough”,
“KNN method”,
“NLM method”,
“Quick NLM(NLM2) method”,
NULL
};

// Define the files that are to be save and the reference images for validation
const char *sOriginal =
{
“image_passthru.ppm”,
“image_knn.ppm”,
“image_nlm.ppm”,
“image_nlm2.ppm”,
NULL
};

const char *sReference =
{
“ref_passthru.ppm”,
“ref_knn.ppm”,
“ref_nlm.ppm”,
“ref_nlm2.ppm”,
NULL
};

////////////////////////////////////////////////////////////////////////////////
// Global data handlers and parameters
////////////////////////////////////////////////////////////////////////////////
//OpenGL PBO and texture “names”
GLuint gl_PBO, gl_Tex;
struct cudaGraphicsResource *cuda_pbo_resource; // handles OpenGL-CUDA exchange
//Source image on the host side
uchar4 *h_Src;
int imageW, imageH;
GLuint shader;

////////////////////////////////////////////////////////////////////////////////
// Main program
////////////////////////////////////////////////////////////////////////////////
int g_Kernel = 0;
bool g_FPS = false;
bool g_Diag = false;
StopWatchInterface *timer = NULL;

//Algorithms global parameters
const float noiseStep = 0.025f;
const float lerpStep = 0.025f;
static float knnNoise = 0.32f;
static float nlmNoise = 1.45f;
static float lerpC = 0.2f;

const int frameN = 24;
int frameCounter = 0;

#define BUFFER_DATA(i) ((char *)0 + i)

// Auto-Verification Code
const int frameCheckNumber = 4;
int fpsCount = 0; // FPS count for averaging
int fpsLimit = 1; // FPS limit for sampling
unsigned int frameCount = 0;
unsigned int g_TotalErrors = 0;

int *pArgc = NULL;
char **pArgv = NULL;

#define MAX_EPSILON_ERROR 5
#define REFRESH_DELAY 10 //ms

void computeFPS()
{
frameCount++;
fpsCount++;

if (fpsCount == fpsLimit)
{
    char fps[256];
    float ifps = 1.f / (sdkGetAverageTimerValue(&timer) / 1000.f);
    sprintf(fps, "<%s>: %3.1f fps", filterMode[g_Kernel], ifps);

    glutSetWindowTitle(fps);
    fpsCount = 0;

    //fpsLimit = (int)MAX(ifps, 1.f);
    sdkResetTimer(&timer);
}

}

void runImageFilters(TColor *d_dst)
{
sdkResetTimer(&timer);
sdkStartTimer(&timer);

 switch (g_Kernel)
{
    case 0:
        cuda_Copy(d_dst, imageW, imageH);
        break;

    case 1:
        if (!g_Diag)
        {
            cuda_KNN(d_dst, imageW, imageH, 1.0f / (knnNoise * knnNoise), lerpC);
        }
        else
        {
            cuda_KNNdiag(d_dst, imageW, imageH, 1.0f / (knnNoise * knnNoise), lerpC);
        }

        break;

    case 2:
        if (!g_Diag)
        {
            cuda_NLM(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
        }
        else
        {
            cuda_NLMdiag(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
        }

        break;

    case 3:
        if (!g_Diag)
        {
            cuda_NLM2(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
        }
        else
        {
            cuda_NLM2diag(d_dst, imageW, imageH, 1.0f / (nlmNoise * nlmNoise), lerpC);
        }

        break;
}

   printf("TimeTaken for execution: %f \n  seconds", sdkGetTimerValue(&timer) * 0.001);
   sdkStopTimer(&timer);
getLastCudaError("Filtering kernel execution failed.\n");

}

void displayFunc(void)
{
sdkStartTimer(&timer);
TColor *d_dst = NULL;
size_t num_bytes;

if (frameCounter++ == 0)
{
    sdkResetTimer(&timer);
}

// DEPRECATED: checkCudaErrors(cudaGLMapBufferObject((void**)&d_dst, gl_PBO));
checkCudaErrors(cudaGraphicsMapResources(1, &cuda_pbo_resource, 0));
getLastCudaError("cudaGraphicsMapResources failed");
checkCudaErrors(cudaGraphicsResourceGetMappedPointer((void **)&d_dst, &num_bytes, cuda_pbo_resource));
getLastCudaError("cudaGraphicsResourceGetMappedPointer failed");

checkCudaErrors(CUDA_Bind2TextureArray());

runImageFilters(d_dst);

checkCudaErrors(CUDA_UnbindTexture());
// DEPRECATED: checkCudaErrors(cudaGLUnmapBufferObject(gl_PBO));
checkCudaErrors(cudaGraphicsUnmapResources(1, &cuda_pbo_resource, 0));

// Common display code path
{
    glClear(GL_COLOR_BUFFER_BIT);

    glTexSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageW, imageH, GL_RGBA, GL_UNSIGNED_BYTE, BUFFER_DATA(0));
    glBegin(GL_TRIANGLES);
    glTexCoord2f(0, 0);
    glVertex2f(-1, -1);
    glTexCoord2f(2, 0);
    glVertex2f(+3, -1);
    glTexCoord2f(0, 2);
    glVertex2f(-1, +3);
    glEnd();
    glFinish();
}

if (frameCounter == frameN)
{
    frameCounter = 0;

    if (g_FPS)
    {
        printf("FPS: %3.1f\n", frameN / (sdkGetTimerValue(&timer) * 0.001));
        g_FPS = false;
    }
}

glutSwapBuffers();

sdkStopTimer(&timer);
computeFPS();

}

void timerEvent(int value)
{
glutPostRedisplay();
glutTimerFunc(REFRESH_DELAY, timerEvent, 0);
}

void shutDown(unsigned char k, int /x/, int /y/)
{
switch (k)
{
case ‘\033’:
case ‘q’:
case ‘Q’:
printf(“Shutting down…\n”);

        sdkStopTimer(&timer);
        sdkDeleteTimer(&timer);

        checkCudaErrors(CUDA_FreeArray());
        free(h_Src);

        exit(EXIT_SUCCESS);
        break;

    case '1':
        printf("Passthrough.\n");
        g_Kernel = 0;
        break;

    case '2':
        printf("KNN method \n");
        g_Kernel = 1;
        break;

    case '3':
        printf("NLM method\n");
        g_Kernel = 2;
        break;

    case '4':
        printf("Quick NLM(NLM2) method\n");
        g_Kernel = 3;
        break;

    case '*':
        printf(g_Diag ? "LERP highlighting mode.\n" : "Normal mode.\n");
        g_Diag = !g_Diag;
        break;

    case 'n':
        printf("Decrease noise level.\n");
        knnNoise -= noiseStep;
        nlmNoise -= noiseStep;
        break;

    case 'N':
        printf("Increase noise level.\n");
        knnNoise += noiseStep;
        nlmNoise += noiseStep;
        break;

    case 'l':
        printf("Decrease LERP quotent.\n");
        lerpC = MAX(lerpC - lerpStep, 0.0f);
        break;

    case 'L':
        printf("Increase LERP quotent.\n");
        lerpC = MIN(lerpC + lerpStep, 1.0f);
        break;

    case 'f' :
    case 'F':
        g_FPS = true;
        break;

    case '?':
        printf("lerpC = %5.5f\n", lerpC);
        printf("knnNoise = %5.5f\n", knnNoise);
        printf("nlmNoise = %5.5f\n", nlmNoise);
        break;
}

}

int initGL(int *argc, char **argv)
{
printf(“Initializing GLUT…\n”);
glutInit(argc, argv);
glutInitDisplayMode(GLUT_RGBA | GLUT_DOUBLE);
glutInitWindowSize(imageW, imageH);
glutInitWindowPosition(512 - imageW / 2, 384 - imageH / 2);
glutCreateWindow(argv[0]);
printf(“OpenGL window created.\n”);

glewInit();
printf("Loading extensions: %s\n", glewGetErrorString(glewInit()));

if (!glewIsSupported("GL_VERSION_1_5 GL_ARB_vertex_buffer_object GL_ARB_pixel_buffer_object"))
{
    fprintf(stderr, "Error: failed to get minimal extensions for demo\n");
    fprintf(stderr, "This sample requires:\n");
    fprintf(stderr, "  OpenGL version 1.5\n");
    fprintf(stderr, "  GL_ARB_vertex_buffer_object\n");
    fprintf(stderr, "  GL_ARB_pixel_buffer_object\n");
    fflush(stderr);
    return false;
}

return 0;

}

// shader for displaying floating-point texture
static const char *shader_code =
“!!ARBfp1.0\n”
“TEX result.color, fragment.texcoord, texture[0], 2D; \n”
“END”;

GLuint compileASMShader(GLenum program_type, const char *code)
{
GLuint program_id;
glGenProgramsARB(1, &program_id);
glBindProgramARB(program_type, program_id);
glProgramStringARB(program_type, GL_PROGRAM_FORMAT_ASCII_ARB, (GLsizei) strlen(code), (GLubyte *) code);

GLint error_pos;
glGetIntegerv(GL_PROGRAM_ERROR_POSITION_ARB, &error_pos);

if (error_pos != -1)
{
    const GLubyte *error_string;
    error_string = glGetString(GL_PROGRAM_ERROR_STRING_ARB);
    fprintf(stderr, "Program error at position: %d\n%s\n", (int)error_pos, error_string);
    return 0;
}

return program_id;

}

void initOpenGLBuffers()
{
printf(“Creating GL texture…\n”);
glEnable(GL_TEXTURE_2D);
glGenTextures(1, &gl_Tex);
glBindTexture(GL_TEXTURE_2D, gl_Tex);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA8, imageW, imageH, 0, GL_RGBA, GL_UNSIGNED_BYTE, h_Src);
printf(“Texture created.\n”);

printf("Creating PBO...\n");
glGenBuffers(1, &gl_PBO);
glBindBuffer(GL_PIXEL_UNPACK_BUFFER_ARB, gl_PBO);
glBufferData(GL_PIXEL_UNPACK_BUFFER_ARB, imageW * imageH * 4, h_Src, GL_STREAM_COPY);
//While a PBO is registered to CUDA, it can't be used
//as the destination for OpenGL drawing calls.
//But in our particular case OpenGL is only used
//to display the content of the PBO, specified by CUDA kernels,
//so we need to register/unregister it only once.
// DEPRECATED: checkCudaErrors(cudaGLRegisterBufferObject(gl_PBO) );
checkCudaErrors(cudaGraphicsGLRegisterBuffer(&cuda_pbo_resource, gl_PBO,
                                             cudaGraphicsMapFlagsWriteDiscard));
GLenum gl_error = glGetError();

if (gl_error != GL_NO_ERROR)
{

#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
char tmpStr[512];
// NOTE: "%s(%i) : " allows Visual Studio to directly jump to the file at the right line
// when the user double clicks on the error line in the Output pane. Like any compile error.
sprintf_s(tmpStr, 255, “\n%s(%i) : GL Error : %s\n\n”, FILE, LINE, gluErrorString(gl_error));
OutputDebugString(tmpStr);
#endif
fprintf(stderr, “GL Error in file ‘%s’ in line %d :\n”, FILE, LINE);
fprintf(stderr, “%s\n”, gluErrorString(gl_error));
exit(EXIT_FAILURE);
}

printf("PBO created.\n");

// load shader program
shader = compileASMShader(GL_FRAGMENT_PROGRAM_ARB, shader_code);

}

void cleanup()
{
sdkDeleteTimer(&timer);

glDeleteProgramsARB(1, &shader);

}

void runAutoTest(int argc, char **argv, const char *filename, int kernel_param)
{
printf(“[%s] - (automated testing w/ readback)\n”, sSDKsample);

int devID = findCudaDevice(argc, (const char **)argv);

// First load the image, so we know what the size of the image (imageW and imageH)
printf("Allocating host and CUDA memory and loading image file...\n");
const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]);

if (image_path == NULL)
{
    printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
    exit(EXIT_FAILURE);
}

LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
printf("Data init done.\n");

checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH));

TColor *d_dst = NULL;
unsigned char *h_dst = NULL;
checkCudaErrors(cudaMalloc((void **)&d_dst, imageW*imageH*sizeof(TColor)));
h_dst = (unsigned char *)malloc(imageH*imageW*4);

{
    g_Kernel = kernel_param;
    printf("[AutoTest]: %s <%s>\n", sSDKsample, filterMode[g_Kernel]);
    checkCudaErrors(CUDA_Bind2TextureArray());
    runImageFilters(d_dst);
    checkCudaErrors(CUDA_UnbindTexture());
    checkCudaErrors(cudaDeviceSynchronize());

    checkCudaErrors(cudaMemcpy(h_dst, d_dst, imageW*imageH*sizeof(TColor), cudaMemcpyDeviceToHost));
    sdkSavePPM4ub(filename, h_dst, imageW, imageH);
}

checkCudaErrors(CUDA_FreeArray());
free(h_Src);

checkCudaErrors(cudaFree(d_dst));
free(h_dst);

printf("\n[%s] -> Kernel %d, Saved: %s\n", sSDKsample, kernel_param, filename);

// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice.  It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(g_TotalErrors == 0 ? EXIT_SUCCESS : EXIT_FAILURE);

}

int main(int argc, char **argv)
{
char *dump_file = NULL;

pArgc = &argc;
pArgv = argv;

printf("%s Starting...\n\n", sSDKsample);

if (checkCmdLineFlag(argc, (const char **)argv, "file"))
{
    getCmdLineArgumentString(argc, (const char **)argv,
                             "file", (char **) &dump_file);

    int kernel = 1;

    if (checkCmdLineFlag(argc, (const char **)argv, "kernel"))
    {
        kernel = getCmdLineArgumentInt(argc, (const char **)argv, "kernel");
    }

    runAutoTest(argc, argv, dump_file, kernel);
}
else
{
    printf("[%s]\n", sSDKsample);

    // use command-line specified CUDA device, otherwise use device with highest Gflops/s
    if (checkCmdLineFlag(argc, (const char **)argv, "device"))
    {
        printf("[%s]\n", argv[0]);
        printf("   Does not explicitly support -device=n in OpenGL mode\n");
        printf("   To use -device=n, the sample must be running w/o OpenGL\n\n");
        printf(" > %s -device=n -qatest\n", argv[0]);
        printf("exiting...\n");
        exit(EXIT_SUCCESS);
    }

    // First load the image, so we know what the size of the image (imageW and imageH)
    printf("Allocating host and CUDA memory and loading image file...\n");
    const char *image_path = sdkFindFilePath("portrait_noise.bmp", argv[0]);

    if (image_path == NULL)
    {
        printf("imageDenoisingGL was unable to find and load image file <portrait_noise.bmp>.\nExiting...\n");
        exit(EXIT_FAILURE);
    }

    LoadBMPFile(&h_Src, &imageW, &imageH, image_path);
    printf("Data init done.\n");

    // First initialize OpenGL context, so we can properly set the GL for CUDA.
    // This is necessary in order to achieve optimal performance with OpenGL/CUDA interop.
    initGL(&argc, argv);
    cudaGLSetGLDevice(gpuGetMaxGflopsDeviceId());

    checkCudaErrors(CUDA_MallocArray(&h_Src, imageW, imageH));

    initOpenGLBuffers();
}

printf("Starting GLUT main loop...\n");
printf("Press [1] to view noisy image\n");
printf("Press [2] to view image restored with knn filter\n");
printf("Press [3] to view image restored with nlm filter\n");
printf("Press [4] to view image restored with modified nlm filter\n");
printf("Press [*] to view smooth/edgy areas [RED/BLUE] Ct's when a filter is active\n");
printf("Press [f] to print frame rate\n");
printf("Press [?] to print Noise and Lerp Ct's\n");
printf("Press [q] to exit\n");

glutDisplayFunc(displayFunc);
glutKeyboardFunc(shutDown);

sdkCreateTimer(&timer);
sdkStartTimer(&timer);

glutTimerFunc(REFRESH_DELAY, timerEvent,10000000);
glutMainLoop();

// cudaDeviceReset causes the driver to clean up all state. While
// not mandatory in normal operation, it is good practice.  It is also
// needed to ensure correct operation when the application is being
// profiled. Calling cudaDeviceReset causes all profile data to be
// flushed before the application exits
cudaDeviceReset();
exit(EXIT_SUCCESS);

}