Hi,

I have written a kernel for binomial option.Can anyone tell me how can I improve this code so that i can run this code for N*N grid size as well as for 3 dimensional block size.

[codebox]static **global** void bOptionsKernel(const unsigned int optN){

const int BlockIndex = blockIdx.y * gridDim.x + blockIdx.x;

//Global memory frame for current option (thread block)

```
if (BlockIndex >= optN) {
return;
}
```

// const int tid=threadIdx.x;

//Thread index

```
const int tid = blockDim.x * blockIdx.x + threadIdx.x;
//Total number of threads in execution grid
const int THREAD_N = blockDim.x * gridDim.x;
```

for(int i=tid; i<optN; i+=THREAD_N)

{

// real *const Call = &d_CallBuffer[(NUM_STEPS + 1)];tid*(NUM_STEPS + 16)

```
// double Call[NUM_STEPS + 1];
```

real *const d_Call = &d_CallBuffer[threadIdx.x* (NUM_STEPS + 1)];

```
const real S = d_OptionData[i].S;
const real X = d_OptionData[i].X;
const real vDt = d_OptionData[i].vDt;
const real puByDf = d_OptionData[i].puByDf;
const real pdByDf = d_OptionData[i].pdByDf;
```

for(int i = 0; i <= NUM_STEPS; i++)

```
d_Call[i] = expiryCallValue(S, X, vDt, i);
```

for(int i = NUM_STEPS; i > 0; i–)

```
for(int j = 0; j <= i - 1; j++)
d_Call[j] = puByDf * d_Call[j + 1] + pdByDf * d_Call[j];
```

d_CallValue[i] = (float)d_Call[0];

}

}[/codebox]

I am calling my kernel in this way:

bOptionsKernel<<<theGrid, THREAD>>>(optN);

theGrid is a 2 dimensional grid.