/* * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. * * Please refer to the NVIDIA end user license agreement (EULA) associated * with this source code for terms and conditions that govern your use of * this software. Any use, reproduction, disclosure, or distribution of * this software and related documentation outside the terms of the EULA * is strictly prohibited. * */ /* * This sample evaluates fair call price for a * given set of European options using Monte Carlo approach. * See supplied whitepaper for more explanations. */ #include #include #include #include #include #include #include #include #include "MonteCarlo_common.h" int *pArgc = NULL; char **pArgv = NULL; #ifdef WIN32 #define strcasecmp strcmpi #endif //////////////////////////////////////////////////////////////////////////////// // Common functions //////////////////////////////////////////////////////////////////////////////// float randFloat(float low, float high){ float t = (float)rand() / (float)RAND_MAX; return (1.0f - t) * low + t * high; } /////////////////////////////////////////////////////////////////////////////// // CPU reference functions /////////////////////////////////////////////////////////////////////////////// extern "C" void MonteCarloCPU( TOptionValue& callValue, TOptionData optionData, float *h_Random, int pathN ); //Black-Scholes formula for call options extern "C" void BlackScholesCall( float& CallResult, TOptionData optionData ); extern "C" double NormalDistribution(unsigned int i, unsigned int pathN); extern "C" double MoroInvCND(double prob); //////////////////////////////////////////////////////////////////////////////// // GPU kernel code //////////////////////////////////////////////////////////////////////////////// extern "C" void initMonteCarlo_SM10(TOptionPlan *plan); extern "C" void closeMonteCarlo_SM10(TOptionPlan *plan); extern "C" void MonteCarlo_SM10(TOptionPlan *plan, cudaStream_t stream=0); extern "C" void inverseCND_SM10(float *d_Output, float *d_Input, unsigned int N, cudaStream_t stream=0 ); extern "C" void initMonteCarlo_SM13(TOptionPlan *plan); extern "C" void closeMonteCarlo_SM13(TOptionPlan *plan); extern "C" void MonteCarlo_SM13(TOptionPlan *plan, cudaStream_t stream=0); extern "C" void inverseCND_SM13(float *d_Output, float *d_Input, unsigned int N, cudaStream_t stream=0 ); //////////////////////////////////////////////////////////////////////////////// // GPU-driving host thread //////////////////////////////////////////////////////////////////////////////// unsigned int useDoublePrecision; //Timer const int MAX_GPU_COUNT = 8; unsigned int hTimer[MAX_GPU_COUNT]; static CUT_THREADPROC solverThread(TOptionPlan *plan) { //Init GPU cutilSafeCall( cudaSetDevice(plan->device) ); cudaDeviceProp deviceProp; cutilSafeCall(cudaGetDeviceProperties(&deviceProp, plan->device)); int version = deviceProp.major * 10 + deviceProp.minor; if(useDoublePrecision && version < 13){ printf("Double precision is not supported on device %i.\n", plan->device); shrQAFinishExit(*pArgc, (const char **)pArgv, QA_PASSED); } //Allocate memory for normally distributed samples cutilSafeCall( cudaMalloc( (void **)&plan->d_Samples, plan->pathN * sizeof(float) ) ); //Start the timer cutilCheckError( cutStartTimer(hTimer[plan->device]) ); //Generate normally distributed samples if(useDoublePrecision) inverseCND_SM13(plan->d_Samples, NULL, plan->pathN); else inverseCND_SM10(plan->d_Samples, NULL, plan->pathN); //Allocate intermediate memory for MC integrator if(useDoublePrecision) initMonteCarlo_SM13(plan); else initMonteCarlo_SM10(plan); //Main computations if(useDoublePrecision) MonteCarlo_SM13(plan); else MonteCarlo_SM10(plan); cutilSafeCall( cutilDeviceSynchronize() ); //Stop the timer cutilCheckError( cutStopTimer(hTimer[plan->device]) ); //Shut down this GPU if(useDoublePrecision) closeMonteCarlo_SM13(plan); else closeMonteCarlo_SM10(plan); cutilSafeCall( cudaFree(plan->d_Samples) ); cudaStreamSynchronize(0); printf("Resetting device %d\n", plan->device ); cutilDeviceReset(); CUT_THREADEND; } static void multiSolver(TOptionPlan *plan, int nPlans ){ // allocate and initialize an array of stream handles cudaStream_t *streams = (cudaStream_t*) malloc(nPlans * sizeof(cudaStream_t)); cudaEvent_t *events = (cudaEvent_t*)malloc(nPlans * sizeof(cudaEvent_t)); for(int i = 0; i < nPlans; i++) { cutilSafeCall( cudaSetDevice(plan[i].device) ); cutilSafeCall( cudaStreamCreate(&(streams[i])) ); cutilSafeCall( cudaEventCreate(&(events[i])) ); } //Init Each GPU // In CUDA 4.0 we can call cudaSetDevice multiple times to target each device // Set the device desired, then perform initializations on that device for( int i=0 ; i 1e-6) sumReserve += callValueGPU[i].Confidence / delta; #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; } if( !use_threads || bqatest ) { multiSolver( optionSolver, GPU_N ); printf("main(): GPU statistics, streamed\n"); for(i = 0; i < GPU_N; i++){ printf("GPU #%i\n", optionSolver[i].device); printf("Options : %i\n", optionSolver[i].optionCount); printf("Simulation paths: %i\n", optionSolver[i].pathN); } time = cutGetTimerValue(hTimer[0]); printf("\nTotal time (ms.): %f\n", time); printf("\tNote: This is elapsed time for all to compute.\n"); printf("Options per sec.: %f\n", OPT_N / (time * 0.001)); printf("main(): comparing Monte Carlo and Black-Scholes results...\n"); sumDelta = 0; sumRef = 0; sumReserve = 0; for(i = 0; i < OPT_N; i++){ BlackScholesCall( callValueBS[i], optionData[i] ); delta = fabs(callValueBS[i] - callValueGPU[i].Expected); ref = callValueBS[i]; sumDelta += delta; sumRef += fabs(ref); if(delta > 1e-6) sumReserve += callValueGPU[i].Confidence / delta; #ifdef PRINT_RESULTS printf("BS: %f; delta: %E\n", callValueBS[i], delta); #endif } sumReserve /= OPT_N; } #ifdef DO_CPU printf("main(): running CPU MonteCarlo...\n"); TOptionValue callValueCPU; sumDelta = 0; sumRef = 0; for(i = 0; i < OPT_N; i++){ MonteCarloCPU( callValueCPU, optionData[i], NULL, PATH_N ); delta = fabs(callValueCPU.Expected - callValueGPU[i].Expected); ref = callValueCPU.Expected; sumDelta += delta; sumRef += fabs(ref); printf("Exp : %f | %f\t", callValueCPU.Expected, callValueGPU[i].Expected); printf("Conf: %f | %f\n", callValueCPU.Confidence, callValueGPU[i].Confidence); } printf("L1 norm: %E\n", sumDelta / sumRef); #endif printf("Shutting down...\n"); for( int i=0; i 1.0f) ? QA_PASSED : QA_FAILED); }