Hi ,
I want to change the Nvidia CUDA SDK’s monte carlo Kernel ,so that it can handle lakhs of options with one block having single thread initially. So that ,later I can change the number of blocks without changing the number of options.
In monte carlo we are using one of the two kernel functions MonteCarloKernel() or MonteCarloOneBlockPerOption() based on the condition doMultiBlock = (plan->pathN / plan->optionCount) >= 8192. where pathN= 1<<18 and optionCount=256.
[codebox] if(doMultiBlock){
const int blocksPerOption = (plan->optionCount < 16) ? 64 : 16;
const int accumN = THREAD_N * blocksPerOption;
const dim3 gridMain(blocksPerOption, plan->optionCount);
MonteCarloKernel<<<gridMain, THREAD_N>>>(
(__TOptionV MonteCarloOneBlockPerOption<<<1, THREAD_N>>>(plan->optionCount,alue *)plan->d_Buffer,
plan->d_Samples,
plan->pathN
);
cutilCheckMsg("MonteCarloKernel() execution failed\n");
MonteCarloReduce<<<plan->optionCount, THREAD_N>>>(
(__TOptionValue *)plan->d_Buffer,
accumN
);
cutilCheckMsg("MonteCarloReduce() execution failed\n");
}else{
MonteCarloOneBlockPerOption<<<plan->optionCount, THREAD_N>>>(
plan->d_Samples,
plan->pathN
);}[/codebox]
Kernel function for MonteCarloKernel() is
[codebox]static global void MonteCarloKernel(
__TOptionValue *d_Buffer,
float *d_Samples,
int pathN
){
const int optionIndex = blockIdx.y;
const real S = d_OptionData[optionIndex].S;
const real X = d_OptionData[optionIndex].X;
const real MuByT = d_OptionData[optionIndex].MuByT;
const real VBySqrtT = d_OptionData[optionIndex].VBySqrtT;
//One thread per partial integral
const int iSum = blockIdx.x * blockDim.x + threadIdx.x;
const int accumN = blockDim.x * gridDim.x;
//Cycle through the entire samples array:
//derive end stock price for each path
//accumulate into intermediate global memory array
__TOptionValue sumCall = {0, 0};
for(int i = iSum; i < pathN; i += accumN){
real r = d_Samples[i];
real callValue = endCallValue(S, X, r, MuByT, VBySqrtT);
sumCall.Expected += callValue;
sumCall.Confidence += callValue * callValue;
}
d_Buffer[optionIndex * accumN + iSum] = sumCall;
}
[/codebox]
The Kernel function for MonteCarloOneBlockPerOption() is
[codebox]static global void MonteCarloOneBlockPerOption(
float *d_Samples,
int pathN
){
const int SUM_N = THREAD_N;
__shared__ real s_SumCall[SUM_N];
__shared__ real s_Sum2Call[SUM_N];
const int optionIndex = blockIdx.x;
const real S = d_OptionData[optionIndex].S;
const real X = d_OptionData[optionIndex].X;
const real MuByT = d_OptionData[optionIndex].MuByT;
const real VBySqrtT = d_OptionData[optionIndex].VBySqrtT;
//Cycle through the entire samples array:
//derive end stock price for each path
//accumulate partial integrals into intermediate shared memory buffer
for(int iSum = threadIdx.x; iSum < SUM_N; iSum += blockDim.x){
__TOptionValue sumCall = {0, 0};
for(int i = iSum; i < pathN; i += SUM_N){
real r = d_Samples[i];
real callValue = endCallValue(S, X, r, MuByT, VBySqrtT);
sumCall.Expected += callValue;
sumCall.Confidence += callValue * callValue;
}
s_SumCall[iSum] = sumCall.Expected;
s_Sum2Call[iSum] = sumCall.Confidence;
}
//Reduce shared memory accumulators
//and write final result to global memory
sumReduce<real, SUM_N, THREAD_N>(s_SumCall, s_Sum2Call);
if(threadIdx.x == 0){
__TOptionValue t = {s_SumCall[0], s_Sum2Call[0]};
d_CallValue[optionIndex] = t;
}
}[/codebox]
Can any one give me some idea to do these changes.
Thank you in advance.