I am running the cuda program below to perform a monte carlo simulation but once my kernel finishes, it gets stuck and doesn’t move on from cudaDeviceSynchronize()
. Please help.
Program:
/**
* NEED to WRITE
*/
#include <thrust/device_vector.h>
#include <thrust/functional.h>
#include <thrust/random.h>
#include <thrust/reduce.h>
#include <algorithm> // Needed for the "max" function
#include <chrono>
#include <cmath>
#include <iostream>
using namespace std::chrono;
__global__ void gaussian_box_muller(int n, float *gaussBMArray, float *S_cur,
double S_adjust, const double v,
const double T, const double K,
float *payoff_sum, float *rand1,
float *rand2) {
int index = blockIdx.x * blockDim.x + threadIdx.x;
int stride = blockDim.x * gridDim.x;
double x = 0.0;
double y = 0.0;
double euclid_sq = 0.0;
thrust::maximum<float> mx;
for (int i = index; i < n; i += stride) {
do {
x = 2.0 * rand1[i] / static_cast<double>(RAND_MAX) - 1;
y = 2.0 * rand2[i] / static_cast<double>(RAND_MAX) - 1;
euclid_sq = x * x + y * y;
} while (euclid_sq >= 1.0);
printf(" euclid_sq: %f \n", euclid_sq);
gaussBMArray[i] = (x * sqrt(-2 * log(euclid_sq) / euclid_sq));
printf(" gaussBMArray: %f \n", gaussBMArray[i]);
S_cur[i] = S_adjust * exp(sqrt(v * v * T) * gaussBMArray[i]);
printf(" S_cur[i]: %f \n", S_cur[i]);
payoff_sum[i] = mx(K - S_cur[i], 0.0);
printf(" payoff_sum[i]: %f \n", payoff_sum[i]);
}
}
__global__ void plus(float sum, float*arr) {
sum+=arr[blockIdx.x];
}
// Pricing a European vanilla call option with a Monte Carlo method
double monte_carlo_call_price(const int num_sims, const double S,
const double K, const double r, const double v,
const double T) {
printf("\nENTERING monte_carlo_call_price\n");
double S_adjust = S * exp(T * (r - 0.5 * v * v));
float *gaussBMArray;
float *S_cur;
float *payoff_sum;
float *rand1;
float *rand2;
printf("---allocating shared memory---\n");
cudaMallocManaged(&gaussBMArray, num_sims * sizeof(float));
cudaMallocManaged(&S_cur, num_sims * sizeof(float));
cudaMallocManaged(&payoff_sum, num_sims * sizeof(float));
cudaMallocManaged(&rand1, num_sims * sizeof(float));
cudaMallocManaged(&rand2, num_sims * sizeof(float));
printf("---filling random values in arrays---\n");
for (int i = 0; i < num_sims; i++) {
rand1[i] = rand();
rand2[i] = rand();
}
printf("\nENTERING gausian_box_muller (device func)\n");
// int blockSize = 256;
// int numBlocks = (num_sims + blockSize - 1) / blockSize;
gaussian_box_muller<<<num_sims, 1>>>(num_sims, gaussBMArray, S_cur, S_adjust, v, T, K, payoff_sum, rand1, rand2);
cudaDeviceSynchronize();
printf("\nENTERING monte_carlo_call_price\n");
// for (int i = 0; i < num_sims; i++) {
// std::cout << " final payoff: " << payoff_sum[i] << std::endl;
// } // for (int i = 0; i < num_sims; i++) {
// std::cout << " final payoff: " << payoff_sum[i] << std::endl;
// }
float price = 0;
plus<<<num_sims,1>>>(price, payoff_sum);
std::cout << " price is " << price << std::endl;
printf("---freeing memory---\n");
cudaFree(gaussBMArray);
cudaFree(S_cur);
cudaFree(payoff_sum);
cudaFree(rand1);
cudaFree(rand2);
return 0;
// return (payoff_sum / static_cast<double>(num_sims)) * exp(-r * T);
}
int main(int argc, char **argv) {
printf("ENTERING: main\n");
int num_sims = 1 << 2; // Number of simulated asset paths
double S = 100.0; // Option price
double K = 100.0; // Strike price
double r = 0.05; // Risk-free rate (5%)
double v = 0.2; // Volatility of the underlying (20%)
double T = 1.0; // One year until expiry
printf("================================\n");
printf("PARAMS:\n");
std::cout << " Number of Paths: " << num_sims << std::endl;
std::cout << " Underlying: " << S << std::endl;
std::cout << " Strike: " << K << std::endl;
std::cout << " Risk-Free Rate: " << r << std::endl;
std::cout << " Volatility: " << v << std::endl;
std::cout << " Maturity: " << T << std::endl;
printf("================================\n");
printf("ACTION: monte carlo call pricing\n");
printf("================================\n");
double call = monte_carlo_call_price(num_sims, S, K, r, v, T);
printf("\nENTERING: main\n");
printf("================================\n");
printf("FINAL OUTPUT:\n");
std::cout << " Call Price: " << call << std::endl;
printf("================================\n");
return 0;
}
Output:
ENTERING: main
================================
PARAMS:
Number of Paths: 4
Underlying: 100
Strike: 100
Risk-Free Rate: 0.05
Volatility: 0.2
Maturity: 1
================================
ACTION: monte carlo call pricing
================================
ENTERING monte_carlo_call_price
---allocating shared memory---
---filling random values in arrays---
ENTERING gausian_box_muller (device func)
euclid_sq: 0.396395
euclid_sq: 0.676847
euclid_sq: 0.507531
gaussBMArray: -0.712082
gaussBMArray: 0.608056
gaussBMArray: 1.112272
S_cur[i]: 89.367203
S_cur[i]: 116.370781
S_cur[i]: 128.718063
payoff_sum[i]: 10.632797
payoff_sum[i]: 0.000000
payoff_sum[i]: 0.000000
It gets stuck over here and doesn’t move on. It should print “ENTERING monte_carlo_call_price”