kernel modification........help!

Hi,

I have written a kernel for Monte Carlo option pricing.Can anyone tell me how can I improve this code so that i can run this code for N*N grid size as well as for 3 dimensional block size.

[codebox]static global void MonteCarlokernel(int optN, float *d_Samples, int pathN)

{

//Thread index

const int      tid = blockDim.x * blockIdx.x + threadIdx.x;

//Total number of threads in execution grid

const int INC = blockDim.x * gridDim.x;

for(int opt = tid; opt < optN; opt += INC)

{

const int optionIndex =opt;

const real        S = d_OptionData[optionIndex].S;

const real        X = d_OptionData[optionIndex].X;

const real    MuByT = d_OptionData[optionIndex].MuByT;

const real VBySqrtT = d_OptionData[optionIndex].VBySqrtT;

__TOptionValue sumCall = {0, 0};

    for(int pos = 0; pos < pathN; pos ++){

        real              r = d_Samples[pos];

        real      callValue = endCallValue(S, X, r, MuByT, VBySqrtT);

        sumCall.Expected   += callValue;

        sumCall.Confidence += callValue * callValue;

    }

   d_CallValue[opt].Expected = sumCall.Expected;

   d_CallValue[opt].Confidence= sumCall.Confidence;

}

}

[/codebox]

I am calling the kernel in this way:

MonteCarloMultipleOptionsPerBlock<<<BLOCK, THREAD_N>>>(plan->optionCount,

        plan->d_Samples,

        plan->pathN

    );

where d_Sample is an array with pathN random numbers and pathN=1<<18.