Here is my kernel code and host code. The context “rtc” is declared as a class member.
I updated my driver today to 347.09 but that didn’t help.
I changed the output bugger to uchar. I must be missing something very obvious. I don’t think I am using too much memory.
I have 2GB of memory for each GPU.
Thanks.
Kernel:
#include "Helper.cuh"
#include <optix.h>
#include <optix_world.h>
// Payload for ray type 0: visible rays
struct RadiancePL
{
float3 color;
int recursion_depth;
float importance;
};
// Payload for ray type 1: shadow rays
struct ShadowPL
{
float3 attenuation;
};
// Optix semantic variables
rtDeclareVariable(uint2, launch_index, rtLaunchIndex, "GPU thread index");
rtDeclareVariable(optix::Ray, cur_Ray, rtCurrentRay, "Ray in execution");
rtDeclareVariable(ShadowPL, shadow_PL, rtPayload, "Shadow Ray Payload");
rtDeclareVariable(RadiancePL, radiance_PL, rtPayload, "Radiance Ray Payload");
rtDeclareVariable(float, intersect_dist, rtIntersectionDistance, "Parametric intersection distance");
// Host variables
rtDeclareVariable(rtObject, top_object, , "Root node where Ray-Tracing context is launched");
rtDeclareVariable(rtObject, top_shadower, , "Root node where Ray-Tracing context is launched");
rtDeclareVariable(float, epsilon, , "Scene Epsilon");
rtDeclareVariable(int, shader, , "Shading mode");
rtDeclareVariable(int, max_depth, , "Max. recursion depth");
rtDeclareVariable(float3, bg_color, , "Background Color");
rtDeclareVariable(float3, bad_color, , "Error Color");
rtDeclareVariable(unsigned int, radianceRay, , "Radiance Ray type");
rtDeclareVariable(unsigned int, shadowRay, , "Shadow Ray type");
rtDeclareVariable(float3, cam_source, , "Camera source");
rtDeclareVariable(float3, cam_target, , "Camera target");
rtDeclareVariable(float, cam_fovX, , "Camera Horizontal Field of View");
rtDeclareVariable(float, cam_fovY, , "Camera Vertical Field of View");
rtDeclareVariable(float3, cam_up, , "Camera up");
rtDeclareVariable(int2, img_dim, , "Image Dimensions");
rtDeclareVariable(float3, Ka, , "Ambient");
rtDeclareVariable(float3, Kd, , "Diffuse");
rtDeclareVariable(float3, Ks, , "Specular");
rtDeclareVariable(float3, Ke, , "Emissive");
rtDeclareVariable(float, Sh, , "Shininess");
rtDeclareVariable(float, Rg, , "Roughness");
rtDeclareVariable(float, Re, , "Refraction Index");
// Attributes
rtDeclareVariable(float3, normal, attribute normalVec, "Geometry Normal");
rtDeclareVariable(float3, texcoord, attribute textureIdx, "Texture Coordinates");
// Textures
//rtTextureSampler<float4, 2> transmissive_map;
// Buffers
rtBuffer<uchar4, 2> output_buffer;
rtBuffer<erLight> lights;
rtBuffer<float4> vertex_buffer;
rtBuffer<int3> index_buffer;
rtBuffer<float3> uv;
RT_PROGRAM void RayGen_Pinhole()
{
RadiancePL payload;
payload.color = make_float3(0.0f);
payload.importance = 1.0f;
payload.recursion_depth = 0; // initialize recursion depth
// Create ray
const float width = img_dim.x;
const float height = img_dim.y;
const float i = launch_index.x;
const float j = launch_index.y;
const float halfX = tanf((M_PIf / 180.0f) * cam_fovX / 2.0f);
const float halfY = tanf((M_PIf / 180.0f) * cam_fovY / 2.0f);
const float dx = 2.0f * halfX / width;
const float dy = 2.0f * halfY / height;
const float normalized_i = dx * (i - width / 2.0f);
const float normalized_j = dy * (j - height / 2.0f);
const float3 camera_direction = optix::normalize(cam_target - cam_source);
const float3 camera_right = -optix::normalize(optix::cross(camera_direction, cam_up));
const float3 dir = optix::normalize(camera_direction + normalized_i * camera_right + normalized_j * cam_up);
optix::Ray ray = optix::make_Ray(cam_source, dir, radianceRay, 0.0f, RT_DEFAULT_MAX);
// Trace ray
rtTrace(top_object, ray, payload);
// Write result to output buffer
output_buffer[launch_index] = make_uchar4(255 * payload.color.x, 255 * payload.color.y, 255 * payload.color.z, 0);
}
RT_PROGRAM void ClosestHit_Radiance()
{
const float3 surfacePos = cur_Ray.origin + intersect_dist * cur_Ray.direction;
// Global ambient contribution
float3 result = make_float3(0.02f);
// Direct lighting
const int nLights = lights.size();
for(int i = 0; i < nLights; i++)
{
if(!lights[i].turnedOn)
continue;
float3 surfaceToLight = make_float3(lights[i].lightPos) - surfacePos;
const float NdotS = optix::dot(normal, surfaceToLight);
// cast shadow ray
float3 light_attenuation = make_float3(static_cast<float>(NdotS > 0.0f));
if (NdotS > 0.0f)
{
ShadowPL payload;
payload.attenuation = make_float3(1.0f);
optix::Ray shadow_ray = optix::make_Ray(surfacePos, optix::normalize(surfaceToLight), shadowRay, epsilon, optix::length(surfaceToLight));
rtTrace(top_shadower, shadow_ray, payload);
light_attenuation = payload.attenuation;
}
// If not completely shadowed, light the hit point
if(optix::fmaxf(light_attenuation) > 0.0f)
{
// Ambient
result += lights[i].ambientCoefficient * Ka * lights[i].color + Ke;
// Color after attenuation
float3 lightCol = lights[i].color * light_attenuation;
// Diffuse
result += Kd * NdotS * lightCol;
// Specular
const float NdotH = optix::dot(normal, optix::normalize(optix::normalize(surfaceToLight) - cur_Ray.direction));
const float RdotV = optix::dot(-cur_Ray.direction, optix::reflect(-surfaceToLight, normal));
if(NdotH > 0)
{
float specularCoefficient;
switch(shader)
{
case 1: // Phong
specularCoefficient = pow(RdotV, Sh);
break;
case 2: // Blinn-Phong
specularCoefficient = pow(NdotH, Sh);
break;
case 3: // Beckmann
specularCoefficient = pow(M_Ef, (pow(NdotH, 2.0f) - 1) / pow(NdotH * Rg, 2.0f)) /
(M_PIf * pow(Rg, 2.0f) * pow(NdotH, 4));
break;
case 4: // Gaussian
specularCoefficient = pow(M_Ef, -pow(NdotH / Rg, 2.0f));
break;
}
result += Ks * specularCoefficient * lightCol;
}
}
}
// Attenuate incoming light and add the contribution to the current radiance ray’s payload
radiance_PL.color = result;
}
RT_PROGRAM void Miss_Radiance()
{
radiance_PL.color = bg_color;
}
RT_PROGRAM void AnyHit_Shadow()
{
float fresnel_lo = 0.1f;
float fresnel_hi = 0.5f;
float fresnel = fresnel_lo + (fresnel_hi - fresnel_lo) * pow(1.0f - optix::dot(normal, -cur_Ray.direction), 5);
float3 Kt = make_float3(0);//make_float3(tex2D(transmissive_map, texcoord.x, texcoord.y));
shadow_PL.attenuation = shadow_PL.attenuation * Kt * (1.0f - fresnel);
if (optix::fmaxf(shadow_PL.attenuation) < 0.001f)
rtTerminateRay();
else
rtIgnoreIntersection();
}
RT_PROGRAM void Intersect(int primIdx)
{
const erVertex v0 = vertex_buffer[index_buffer[primIdx].x];
const erVertex v1 = vertex_buffer[index_buffer[primIdx].y];
const erVertex v2 = vertex_buffer[index_buffer[primIdx].z];
const float3 e0 = make_float3(v1 - v0);
const float3 e1 = make_float3(v2 - v1);
const float3 n = optix::normalize(optix::cross(e0, e1));
const float3 secondPointOnRay = cur_Ray.origin + 5.0f * cur_Ray.direction;
const float3 raySeg = secondPointOnRay - cur_Ray.origin;
// Check if the line is parallel to plane
if(optix::dot(n, raySeg) != 0.0f)
{
// Compute plane-line intersection (http:\mathworld.wolfram.com/Line-PlaneIntersection.html)
optix::Matrix4x4 num;
num.setCol(0, make_float4(1.0f, make_float3(v0)));
num.setCol(1, make_float4(1.0f, make_float3(v1)));
num.setCol(2, make_float4(1.0f, make_float3(v2)));
num.setCol(3, make_float4(1.0f, cur_Ray.origin));
optix::Matrix4x4 den;
den.setCol(0, make_float4(1.0f, make_float3(v0)));
den.setCol(1, make_float4(1.0f, make_float3(v1)));
den.setCol(2, make_float4(1.0f, make_float3(v2)));
den.setCol(3, make_float4(0.0f, raySeg));
float t = -(num.det() / den.det());
const float3 p = optix::lerp(cur_Ray.origin, secondPointOnRay, t);
// Check if point lies in primitive (barycenter approach)
const float area2 = optix::dot(e0, e1);
const float u = optix::dot(make_float3(v1) - p, make_float3(v2) - p) / area2;
const float v = optix::dot(make_float3(v2) - p, make_float3(v0) - p) / area2;
if(u > 0.0f && v > 0.0f && u + v <= 1.0f && t > 0.0f)
{
const float hit_distance = optix::length(p - cur_Ray.origin);
if(rtPotentialIntersection(hit_distance))
{
normal = n;
/*const float a1 = uv[index_buffer[primIdx].x].x * u + uv[index_buffer[primIdx].y].x * v + uv[index_buffer[primIdx].z].x * (1 - u - v);
const float a2 = uv[index_buffer[primIdx].x].y * u + uv[index_buffer[primIdx].y].y * v + uv[index_buffer[primIdx].z].y * (1 - u - v);
const float a3 = uv[index_buffer[primIdx].x].z * u + uv[index_buffer[primIdx].y].z * v + uv[index_buffer[primIdx].z].z * (1 - u - v);
texcoord = make_float3(a1, a2, a3);*/
rtReportIntersection(0);
}
}
}
}
RT_PROGRAM void Bounds(int primIdx, float result[6])
{
const erVertex v0 = vertex_buffer[index_buffer[primIdx].x];
const erVertex v1 = vertex_buffer[index_buffer[primIdx].y];
const erVertex v2 = vertex_buffer[index_buffer[primIdx].z];
result[0] = optix::fminf(optix::fminf(v0.x, v1.x), v2.x);
result[1] = optix::fminf(optix::fminf(v0.y, v1.y), v2.y);
result[2] = optix::fminf(optix::fminf(v0.z, v1.z), v2.z);
result[3] = optix::fmaxf(optix::fmaxf(v0.x, v1.x), v2.x);
result[4] = optix::fmaxf(optix::fmaxf(v0.y, v1.y), v2.y);
result[5] = optix::fmaxf(optix::fmaxf(v0.z, v1.z), v2.z);
}
RT_PROGRAM void Exception()
{
//rtPrintf("Caught exception 0x%X at launch index (%d,%d)\n", rtGetExceptionCode(), launch_index.x, launch_index.y);
output_buffer[launch_index] = make_uchar4(255 * bad_color.x, 255 * bad_color.y, 255 * bad_color.z, 0);
}
Host code:
void loadRTEngine()
{
std::cout<<"Loading EasyRay...\n";
// Global states
RTvariable max_depth, radianceRay, shadowRay, epsilon, bg_color, bad_color, shader;
Optix_error(rtContextSetRayTypeCount(rtc, 2));
Optix_error(rtContextSetEntryPointCount(rtc, 1));
Optix_error(rtContextSetStackSize(rtc, 1520));
int devices[] = {0, 1};
Optix_error(rtContextSetDevices(rtc, 2, devices));
//rtContextSetAttribute(rtc, RT_CONTEXT_ATTRIBUTE_GPU_PAGING_FORCED_OFF, sizeof(int), 0);
Optix_error(rtContextDeclareVariable(rtc, "max_depth", &max_depth));
Optix_error(rtContextDeclareVariable(rtc, "radianceRay", &radianceRay));
Optix_error(rtContextDeclareVariable(rtc, "shadowRay", &shadowRay));
Optix_error(rtContextDeclareVariable(rtc, "epsilon", &epsilon));
Optix_error(rtContextDeclareVariable(rtc, "bg_color", &bg_color));
Optix_error(rtContextDeclareVariable(rtc, "bad_color", &bad_color));
Optix_error(rtContextDeclareVariable(rtc, "shader", &shader));
Optix_error(rtVariableSet1i(max_depth, 2));
Optix_error(rtVariableSet1ui(radianceRay, 0));
Optix_error(rtVariableSet1ui(shadowRay, 1));
Optix_error(rtVariableSet1f(epsilon, 1.e-3f));
Optix_error(rtVariableSet3f(bg_color, 0.0f, 0.0f, 0.0f));
Optix_error(rtVariableSet3f(bad_color, 1.0f, 0.0f, 0.0f));
Optix_error(rtVariableSet1i(shader, 1));
// Context Programs
RTprogram raygen, miss, exception;
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "RayGen_Pinhole", &raygen));
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "Miss_Radiance", &miss));
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "Exception", &exception));
Optix_error(rtContextSetRayGenerationProgram(rtc, 0, raygen));
Optix_error(rtContextSetExceptionProgram(rtc, 0, exception));
Optix_error(rtContextSetMissProgram(rtc, 0, miss));
std::cout<<"Committing EasyRay...\n";
RTprogram ch, ah, inter, bbox;
// Material programs
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "ClosestHit_Radiance", &ch));
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "AnyHit_Shadow", &ah));
// Geometry programs
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "Intersect", &inter));
Optix_error(rtProgramCreateFromPTXFile(rtc, engine.c_str(), "Bounds", &bbox));
RTgeometrygroup geomgrp;
Optix_error(rtGeometryGroupCreate(rtc, &geomgrp));
Optix_error(rtGeometryGroupSetChildCount(geomgrp, scene->geomInstances.size()));
int offset = 0;
for(int inst = 0; inst < scene->geomInstances.size(); inst++)
{
// Geometry node
RTgeometry geo;
Optix_error(rtGeometryCreate(rtc, &geo));
Optix_error(rtGeometrySetPrimitiveCount(geo, scene->geomInstances[inst]->geom->nTriangles));
Optix_error(rtGeometrySetPrimitiveIndexOffset(geo, offset));
Optix_error(rtGeometrySetBoundingBoxProgram(geo, bbox));
Optix_error(rtGeometrySetIntersectionProgram(geo, inter));
offset += scene->geomInstances[inst]->geom->nTriangles;
// Material node
RTmaterial mate;
Optix_error(rtMaterialCreate(rtc, &mate));
Optix_error(rtMaterialSetAnyHitProgram(mate, 1, ah));
Optix_error(rtMaterialSetClosestHitProgram(mate, 0, ch));
const erMaterial *mat = scene->geomInstances[inst]->mat;
RTvariable Ka, Kd, Ks, Ke, Sh, Rg, Re;
Optix_error(rtMaterialDeclareVariable(mate, "Ka", &Ka));
Optix_error(rtVariableSet3f(Ka, mat->ambtColor.x, mat->ambtColor.y, mat->ambtColor.z));
Optix_error(rtMaterialDeclareVariable(mate, "Kd", &Kd));
Optix_error(rtVariableSet3f(Kd, mat->diffColor.x, mat->diffColor.y, mat->diffColor.z));
Optix_error(rtMaterialDeclareVariable(mate, "Ks", &Ks));
Optix_error(rtVariableSet3f(Ks, mat->specColor.x, mat->specColor.y, mat->specColor.z));
Optix_error(rtMaterialDeclareVariable(mate, "Ke", &Ke));
Optix_error(rtVariableSet3f(Ke, mat->emsvColor.x, mat->emsvColor.y, mat->emsvColor.z));
Optix_error(rtMaterialDeclareVariable(mate, "Sh", &Sh));
Optix_error(rtVariableSet1f(Sh, mat->shininess));
Optix_error(rtMaterialDeclareVariable(mate, "Rg", &Rg));
Optix_error(rtVariableSet1f(Rg, mat->roughness));
Optix_error(rtMaterialDeclareVariable(mate, "Re", &Re));
Optix_error(rtVariableSet1f(Re, mat->refractionIndex));
// Geometry Instance node
RTgeometryinstance instance;
Optix_error(rtGeometryInstanceCreate(rtc, &instance));
Optix_error(rtGeometryInstanceSetGeometry(instance, geo));
Optix_error(rtGeometryInstanceSetMaterialCount(instance, 1));
Optix_error(rtGeometryInstanceSetMaterial(instance, 0, mate));
// Add to heirarchy
Optix_error(rtGeometryGroupSetChild(geomgrp, inst, instance));
}
// Acceleration node
RTacceleration acc;
Optix_error(rtAccelerationCreate(rtc, &acc));
Optix_error(rtAccelerationSetBuilder(acc, "NoAccel"));
Optix_error(rtAccelerationSetTraverser(acc, "NoAccel"));
Optix_error(rtGeometryGroupSetAcceleration(geomgrp, acc));
RTvariable topobj, topshad;
Optix_error(rtContextDeclareVariable(rtc, "top_object", &topobj));
Optix_error(rtContextDeclareVariable(rtc, "top_shadower", &topshad));
Optix_error(rtVariableSetObject(topobj, geomgrp));
Optix_error(rtVariableSetObject(topshad, geomgrp));
//RT buffers
void* data;
RTvariable var;
RTbuffer ligh;
Optix_error(rtBufferCreate(rtc, RT_BUFFER_INPUT, &ligh));
Optix_error(rtBufferSetFormat(ligh, RT_FORMAT_USER));
Optix_error(rtBufferSetElementSize(ligh, sizeof(erLight)));
Optix_error(rtBufferSetSize1D(ligh, nLights));
Optix_error(rtBufferMap(ligh, &data));
cudaMemcpy(data, lights, nLights * sizeof(erLight), cudaMemcpyDefault);
Optix_error(rtBufferUnmap(ligh));
Optix_error(rtContextDeclareVariable(rtc, "lights", &var));
Optix_error(rtVariableSetObject(var, ligh));
RTbuffer verts;
Optix_error(rtBufferCreate(rtc, RT_BUFFER_INPUT, &verts));
Optix_error(rtBufferSetFormat(verts, RT_FORMAT_FLOAT4));
Optix_error(rtBufferSetSize1D(verts, scene->nVertices));
Optix_error(rtBufferMap(verts, &data));
cudaMemcpy(data, scene->vertices, scene->nVertices * sizeof(erVertex), cudaMemcpyDefault);
Optix_error(rtBufferUnmap(verts));
Optix_error(rtContextDeclareVariable(rtc, "vertex_buffer", &var));
Optix_error(rtVariableSetObject(var, verts));
RTbuffer indi;
Optix_error(rtBufferCreate(rtc, RT_BUFFER_INPUT, &indi));
Optix_error(rtBufferSetFormat(indi, RT_FORMAT_INT3));
Optix_error(rtBufferSetSize1D(indi, scene->nTriangles));
Optix_error(rtBufferMap(indi, &data));
cudaMemcpy(data, scene->indices, scene->nTriangles * sizeof(erTriangle), cudaMemcpyDefault);
Optix_error(rtBufferUnmap(indi));
Optix_error(rtContextDeclareVariable(rtc, "index_buffer", &var));
Optix_error(rtVariableSetObject(var, indi));
RTbuffer uvs;
Optix_error(rtBufferCreate(rtc, RT_BUFFER_INPUT, &uvs));
Optix_error(rtBufferSetFormat(uvs, RT_FORMAT_FLOAT3));
Optix_error(rtBufferSetSize1D(uvs, scene->uv.size()));
Optix_error(rtBufferMap(uvs, &data));
cudaMemcpy(data, scene->DEVuv, scene->uv.size() * sizeof(float3), cudaMemcpyDefault);
Optix_error(rtBufferUnmap(uvs));
Optix_error(rtContextDeclareVariable(rtc, "uv", &var));
Optix_error(rtVariableSetObject(var, uvs));
}
RTbuffer draw()
{
loadRTEngine();
std::cout<<"Rendering...\n";
// Camera info
RTvariable cams, camt, camX, camY, camu, dims, var;
Optix_error(rtContextDeclareVariable(rtc, "cam_source", &cams));
Optix_error(rtContextDeclareVariable(rtc, "cam_target", &camt));
Optix_error(rtContextDeclareVariable(rtc, "cam_fovX", &camX));
Optix_error(rtContextDeclareVariable(rtc, "cam_fovY", &camY));
Optix_error(rtContextDeclareVariable(rtc, "cam_up", &camu));
Optix_error(rtContextDeclareVariable(rtc, "img_dim", &dims));
Optix_error(rtVariableSet3f(cams, cam.source.x, cam.source.y, cam.source.z));
Optix_error(rtVariableSet3f(camt, cam.target.x, cam.target.y, cam.target.z));
Optix_error(rtVariableSet1f(camX, cam.fovX));
Optix_error(rtVariableSet1f(camY, cam.fovY));
Optix_error(rtVariableSet3f(camu, cam.up.x, cam.up.y, cam.up.z));
Optix_error(rtVariableSet2i(dims, dimensions.x, dimensions.y));
Optix_error(rtContextValidate(rtc));
RTbuffer output;
Optix_error(rtBufferCreate(rtc, RT_BUFFER_OUTPUT, &output));
Optix_error(rtBufferSetFormat(output, RT_FORMAT_UNSIGNED_BYTE4));
Optix_error(rtBufferSetSize2D(output, dimensions.x, dimensions.y));
Optix_error(rtContextDeclareVariable(rtc, "output_buffer", &var));
Optix_error(rtVariableSetObject(var, output));
Optix_error(rtContextLaunch2D(rtc, 0, (RTsize)dimensions.x, (RTsize)dimensions.y));
return output;
}