I am fairly new to this engine and while exploring the sample optixTriangle program , I made the following changes to the optixTriangle.cpp program -
-
I changed some camera properties
-
And I made some changes to triangle build input by changing the total no of triangles from 1 to a randomly generated 10k triangles all of which would lie in the range (-10,-10,-10) to (10,10,10)
-
Changed the miss record data to { 0.7f, 0.8f, 1.0f } so that we can have a sky blue color as our background and in the
extern "C" __global__ void __closesthit__ch()
in the optixTriangle.cu I am setting the payload to a constant value ofsetPayload( make_float3(0.98f, 0.360f, 0.360f) )
so that I can have a uniform color on all of my rendered triangles.
I am attaching the optixTriangle.cpp code for reference , but other than the above changes I have not changed anything else from the sample project.
#include <optix.h>
#include <optix_function_table_definition.h>
#include <optix_stack_size.h>
#include <optix_stubs.h>
#include <cuda_runtime.h>
#include <sampleConfig.h>
#include <sutil/CUDAOutputBuffer.h>
#include <sutil/Exception.h>
#include <sutil/sutil.h>
#include "optixTriangle.h"
#include <array>
#include <iomanip>
#include <iostream>
#include <string>
#include <sutil/Camera.h>
#include <sutil/Trackball.h>
template <typename T>
struct SbtRecord
{
__align__( OPTIX_SBT_RECORD_ALIGNMENT ) char header[OPTIX_SBT_RECORD_HEADER_SIZE];
T data;
};
typedef SbtRecord<RayGenData> RayGenSbtRecord;
typedef SbtRecord<MissData> MissSbtRecord;
typedef SbtRecord<HitGroupData> HitGroupSbtRecord;
void configureCamera( sutil::Camera& cam, const uint32_t width, const uint32_t height )
{
cam.setEye( {-5.0f, 0.0f, -25.0f} );
cam.setLookat( {0.0f, 0.0f, 0.0f} );
cam.setUp( {0.0f, 1.0f, 0.0f} );
cam.setFovY( 65.0f );
cam.setAspectRatio( (float)width / (float)height );
}
void printUsageAndExit( const char* argv0 )
{
std::cerr << "Usage : " << argv0 << " [options]\n";
std::cerr << "Options: --file | -f <filename> Specify file for image output\n";
std::cerr << " --help | -h Print this usage message\n";
std::cerr << " --dim=<width>x<height> Set image dimensions; defaults to 512x384\n";
exit( 1 );
}
static void context_log_cb( unsigned int level, const char* tag, const char* message, void* /*cbdata */)
{
std::cerr << "[" << std::setw( 2 ) << level << "][" << std::setw( 12 ) << tag << "]: "
<< message << "\n";
}
int main( int argc, char* argv[] )
{
std::string outfile;
int width = 1024;
int height = 768;
for( int i = 1; i < argc; ++i )
{
const std::string arg( argv[i] );
if( arg == "--help" || arg == "-h" )
{
printUsageAndExit( argv[0] );
}
else if( arg == "--file" || arg == "-f" )
{
if( i < argc - 1 )
{
outfile = argv[++i];
}
else
{
printUsageAndExit( argv[0] );
}
}
else if( arg.substr( 0, 6 ) == "--dim=" )
{
const std::string dims_arg = arg.substr( 6 );
sutil::parseDimensions( dims_arg.c_str(), width, height );
}
else
{
std::cerr << "Unknown option '" << arg << "'\n";
printUsageAndExit( argv[0] );
}
}
try
{
//
// Initialize CUDA and create OptiX context
//
OptixDeviceContext context = nullptr;
{
// Initialize CUDA
CUDA_CHECK( cudaFree( 0 ) );
// Initialize the OptiX API, loading all API entry points
OPTIX_CHECK( optixInit() );
// Specify context options
OptixDeviceContextOptions options = {};
options.logCallbackFunction = &context_log_cb;
options.logCallbackLevel = 4;
// Associate a CUDA context (and therefore a specific GPU) with this
// device context
CUcontext cuCtx = 0; // zero means take the current context
OPTIX_CHECK( optixDeviceContextCreate( cuCtx, &options, &context ) );
}
//
// accel handling
//
OptixTraversableHandle gas_handle;
CUdeviceptr d_gas_output_buffer;
{
// Use default options for simplicity. In a real use case we would want to
// enable compaction, etc
OptixAccelBuildOptions accel_options = {};
accel_options.buildFlags = OPTIX_BUILD_FLAG_NONE;
accel_options.operation = OPTIX_BUILD_OPERATION_BUILD;
//accel_options.acceleratorDescriptor.type = OPTIX_ACCELERATION_LBVH;
// Triangle build input: simple list of three vertices
std::vector<float3> vertices;
for (int i = 0; i < 10000; i++)
{
float3 rand_point = make_float3(static_cast<float>(rand()) / RAND_MAX * 20.0f - 10.0f,
static_cast<float>(rand()) / RAND_MAX * 20.0f - 10.0f,
static_cast<float>(rand()) / RAND_MAX * 20.0f - 10.0f);
float3 v1 = rand_point - make_float3(static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f);
float3 v2 = rand_point - make_float3(static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f);
float3 v3 = rand_point - make_float3(static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f,
static_cast<float>(rand()) / RAND_MAX - 0.5f);
vertices.push_back(v1);
vertices.push_back(v2);
vertices.push_back(v3);
}
const size_t vertices_size = sizeof( float3 )*vertices.size();
CUdeviceptr d_vertices=0;
CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &d_vertices ), vertices_size ) );
CUDA_CHECK( cudaMemcpy(
reinterpret_cast<void*>( d_vertices ),
vertices.data(),
vertices_size,
cudaMemcpyHostToDevice
) );
// Our build input is a simple list of non-indexed triangle vertices
const uint32_t triangle_input_flags[1] = { OPTIX_GEOMETRY_FLAG_NONE };
OptixBuildInput triangle_input = {};
triangle_input.type = OPTIX_BUILD_INPUT_TYPE_TRIANGLES;
triangle_input.triangleArray.vertexFormat = OPTIX_VERTEX_FORMAT_FLOAT3;
triangle_input.triangleArray.numVertices = static_cast<uint32_t>( vertices.size() );
triangle_input.triangleArray.vertexBuffers = &d_vertices;
triangle_input.triangleArray.flags = triangle_input_flags;
triangle_input.triangleArray.numSbtRecords = 1;
OptixAccelBufferSizes gas_buffer_sizes;
OPTIX_CHECK( optixAccelComputeMemoryUsage(
context,
&accel_options,
&triangle_input,
1, // Number of build inputs
&gas_buffer_sizes
) );
CUdeviceptr d_temp_buffer_gas;
CUDA_CHECK( cudaMalloc(
reinterpret_cast<void**>( &d_temp_buffer_gas ),
gas_buffer_sizes.tempSizeInBytes
) );
CUDA_CHECK( cudaMalloc(
reinterpret_cast<void**>( &d_gas_output_buffer ),
gas_buffer_sizes.outputSizeInBytes
) );
OPTIX_CHECK( optixAccelBuild(
context,
0, // CUDA stream
&accel_options,
&triangle_input,
1, // num build inputs
d_temp_buffer_gas,
gas_buffer_sizes.tempSizeInBytes,
d_gas_output_buffer,
gas_buffer_sizes.outputSizeInBytes,
&gas_handle,
nullptr, // emitted property list
0 // num emitted properties
) );
// We can now free the scratch space buffer used during build and the vertex
// inputs, since they are not needed by our trivial shading method
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( d_temp_buffer_gas ) ) );
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( d_vertices ) ) );
}
//
// Create module
//
OptixModule module = nullptr;
OptixPipelineCompileOptions pipeline_compile_options = {};
{
OptixModuleCompileOptions module_compile_options = {};
#if !defined( NDEBUG )
module_compile_options.optLevel = OPTIX_COMPILE_OPTIMIZATION_LEVEL_0;
module_compile_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
#endif
pipeline_compile_options.usesMotionBlur = false;
pipeline_compile_options.traversableGraphFlags = OPTIX_TRAVERSABLE_GRAPH_FLAG_ALLOW_SINGLE_GAS;
pipeline_compile_options.numPayloadValues = 3;
pipeline_compile_options.numAttributeValues = 3;
#ifdef DEBUG // Enables debug exceptions during optix launches. This may incur significant performance cost and should only be done during development.
pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_DEBUG | OPTIX_EXCEPTION_FLAG_TRACE_DEPTH | OPTIX_EXCEPTION_FLAG_STACK_OVERFLOW;
#else
pipeline_compile_options.exceptionFlags = OPTIX_EXCEPTION_FLAG_NONE;
#endif
pipeline_compile_options.pipelineLaunchParamsVariableName = "params";
pipeline_compile_options.usesPrimitiveTypeFlags = OPTIX_PRIMITIVE_TYPE_FLAGS_TRIANGLE;
size_t inputSize = 0;
const char* input = sutil::getInputData( OPTIX_SAMPLE_NAME, OPTIX_SAMPLE_DIR, "optixTriangle.cu", inputSize );
OPTIX_CHECK_LOG( optixModuleCreateFromPTX(
context,
&module_compile_options,
&pipeline_compile_options,
input,
inputSize,
LOG, &LOG_SIZE,
&module
) );
}
//
// Create program groups
//
OptixProgramGroup raygen_prog_group = nullptr;
OptixProgramGroup miss_prog_group = nullptr;
OptixProgramGroup hitgroup_prog_group = nullptr;
{
OptixProgramGroupOptions program_group_options = {}; // Initialize to zeros
OptixProgramGroupDesc raygen_prog_group_desc = {}; //
raygen_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_RAYGEN;
raygen_prog_group_desc.raygen.module = module;
raygen_prog_group_desc.raygen.entryFunctionName = "__raygen__rg";
OPTIX_CHECK_LOG( optixProgramGroupCreate(
context,
&raygen_prog_group_desc,
1, // num program groups
&program_group_options,
LOG, &LOG_SIZE,
&raygen_prog_group
) );
OptixProgramGroupDesc miss_prog_group_desc = {};
miss_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_MISS;
miss_prog_group_desc.miss.module = module;
miss_prog_group_desc.miss.entryFunctionName = "__miss__ms";
OPTIX_CHECK_LOG( optixProgramGroupCreate(
context,
&miss_prog_group_desc,
1, // num program groups
&program_group_options,
LOG, &LOG_SIZE,
&miss_prog_group
) );
OptixProgramGroupDesc hitgroup_prog_group_desc = {};
hitgroup_prog_group_desc.kind = OPTIX_PROGRAM_GROUP_KIND_HITGROUP;
hitgroup_prog_group_desc.hitgroup.moduleCH = module;
hitgroup_prog_group_desc.hitgroup.entryFunctionNameCH = "__closesthit__ch";
OPTIX_CHECK_LOG( optixProgramGroupCreate(
context,
&hitgroup_prog_group_desc,
1, // num program groups
&program_group_options,
LOG, &LOG_SIZE,
&hitgroup_prog_group
) );
}
//
// Link pipeline
//
OptixPipeline pipeline = nullptr;
{
const uint32_t max_trace_depth = 1;
OptixProgramGroup program_groups[] = { raygen_prog_group, miss_prog_group, hitgroup_prog_group };
OptixPipelineLinkOptions pipeline_link_options = {};
pipeline_link_options.maxTraceDepth = max_trace_depth;
pipeline_link_options.debugLevel = OPTIX_COMPILE_DEBUG_LEVEL_FULL;
OPTIX_CHECK_LOG( optixPipelineCreate(
context,
&pipeline_compile_options,
&pipeline_link_options,
program_groups,
sizeof( program_groups ) / sizeof( program_groups[0] ),
LOG, &LOG_SIZE,
&pipeline
) );
OptixStackSizes stack_sizes = {};
for( auto& prog_group : program_groups )
{
OPTIX_CHECK( optixUtilAccumulateStackSizes( prog_group, &stack_sizes ) );
}
uint32_t direct_callable_stack_size_from_traversal;
uint32_t direct_callable_stack_size_from_state;
uint32_t continuation_stack_size;
OPTIX_CHECK( optixUtilComputeStackSizes( &stack_sizes, max_trace_depth,
0, // maxCCDepth
0, // maxDCDEpth
&direct_callable_stack_size_from_traversal,
&direct_callable_stack_size_from_state, &continuation_stack_size ) );
OPTIX_CHECK( optixPipelineSetStackSize( pipeline, direct_callable_stack_size_from_traversal,
direct_callable_stack_size_from_state, continuation_stack_size,
1 // maxTraversableDepth
) );
}
//
// Set up shader binding table
//
OptixShaderBindingTable sbt = {};
{
CUdeviceptr raygen_record;
const size_t raygen_record_size = sizeof( RayGenSbtRecord );
CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &raygen_record ), raygen_record_size ) );
RayGenSbtRecord rg_sbt;
OPTIX_CHECK( optixSbtRecordPackHeader( raygen_prog_group, &rg_sbt ) );
CUDA_CHECK( cudaMemcpy(
reinterpret_cast<void*>( raygen_record ),
&rg_sbt,
raygen_record_size,
cudaMemcpyHostToDevice
) );
CUdeviceptr miss_record;
size_t miss_record_size = sizeof( MissSbtRecord );
CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &miss_record ), miss_record_size ) );
MissSbtRecord ms_sbt;
ms_sbt.data = { 0.7f, 0.8f, 1.0f };
OPTIX_CHECK( optixSbtRecordPackHeader( miss_prog_group, &ms_sbt ) );
CUDA_CHECK( cudaMemcpy(
reinterpret_cast<void*>( miss_record ),
&ms_sbt,
miss_record_size,
cudaMemcpyHostToDevice
) );
CUdeviceptr hitgroup_record;
size_t hitgroup_record_size = sizeof( HitGroupSbtRecord );
CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &hitgroup_record ), hitgroup_record_size ) );
HitGroupSbtRecord hg_sbt;
OPTIX_CHECK( optixSbtRecordPackHeader( hitgroup_prog_group, &hg_sbt ) );
CUDA_CHECK( cudaMemcpy(
reinterpret_cast<void*>( hitgroup_record ),
&hg_sbt,
hitgroup_record_size,
cudaMemcpyHostToDevice
) );
sbt.raygenRecord = raygen_record;
sbt.missRecordBase = miss_record;
sbt.missRecordStrideInBytes = sizeof( MissSbtRecord );
sbt.missRecordCount = 1;
sbt.hitgroupRecordBase = hitgroup_record;
sbt.hitgroupRecordStrideInBytes = sizeof( HitGroupSbtRecord );
sbt.hitgroupRecordCount = 1;
}
sutil::CUDAOutputBuffer<uchar4> output_buffer( sutil::CUDAOutputBufferType::CUDA_DEVICE, width, height );
//
// launch
//
{
CUstream stream;
CUDA_CHECK( cudaStreamCreate( &stream ) );
sutil::Camera cam;
configureCamera( cam, width, height );
Params params;
params.image = output_buffer.map();
params.image_width = width;
params.image_height = height;
params.handle = gas_handle;
params.cam_eye = cam.eye();
cam.UVWFrame( params.cam_u, params.cam_v, params.cam_w );
CUdeviceptr d_param;
CUDA_CHECK( cudaMalloc( reinterpret_cast<void**>( &d_param ), sizeof( Params ) ) );
CUDA_CHECK( cudaMemcpy(
reinterpret_cast<void*>( d_param ),
¶ms, sizeof( params ),
cudaMemcpyHostToDevice
) );
OPTIX_CHECK( optixLaunch( pipeline, stream, d_param, sizeof( Params ), &sbt, width, height, /*depth=*/1 ) );
CUDA_SYNC_CHECK();
output_buffer.unmap();
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( d_param ) ) );
}
//
// Display results
//
{
sutil::ImageBuffer buffer;
buffer.data = output_buffer.getHostPointer();
buffer.width = width;
buffer.height = height;
buffer.pixel_format = sutil::BufferImageFormat::UNSIGNED_BYTE4;
if( outfile.empty() )
sutil::displayBufferWindow( argv[0], buffer );
else
sutil::saveImage( outfile.c_str(), buffer, false );
}
//
// Cleanup
//
{
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( sbt.raygenRecord ) ) );
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( sbt.missRecordBase ) ) );
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( sbt.hitgroupRecordBase ) ) );
CUDA_CHECK( cudaFree( reinterpret_cast<void*>( d_gas_output_buffer ) ) );
OPTIX_CHECK( optixPipelineDestroy( pipeline ) );
OPTIX_CHECK( optixProgramGroupDestroy( hitgroup_prog_group ) );
OPTIX_CHECK( optixProgramGroupDestroy( miss_prog_group ) );
OPTIX_CHECK( optixProgramGroupDestroy( raygen_prog_group ) );
OPTIX_CHECK( optixModuleDestroy( module ) );
OPTIX_CHECK( optixDeviceContextDestroy( context ) );
}
}
catch( std::exception& e )
{
std::cerr << "Caught exception: " << e.what() << "\n";
return 1;
}
return 0;
}
I got the following output -
Now I wanted to make some more changes but have no idea how to proceed further -
-
I am not sure which acceleration structure this program is using, I checked the accel options but didnt find anything concrete. I wanted to use a BVH structure so what changes should I make in the code which would warantee that I am using a BVH acceleration structure.
-
I wanted to change the samples pex pixel , from the output I would assume that by default the samples per pixel for the program is very low and I wanted to set the samples per pixel to 64
-
Even after I change the max trace depth to a higher no other than 1, I get no visible changes , I was expecting shadows/depth but I am getting the same output each and every time.
// Link pipeline // OptixPipeline pipeline = nullptr; { const uint32_t max_trace_depth = 10; OptixProgramGroup program_groups[] = { raygen_prog_group, miss_prog_group, hitgroup_prog_group }; ```
I had built a custom ray tracer a while back and with the above variables I was expecting an output similar to this -
But in the output that I got from my optix program , I see no depth and no shadows. I know i might be missing a lot of points but as I said I am fairly new to this engine and finding it very difficult to navigate around.