Optix device code returning ptxas error

Here is the CUDA (v6) code which I am trying to compile but I am getting this error. Any help is greatly appreciated:

error MSB3721: The command ““E:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\bin\nvcc.exe” -gencode=arch=compute_20,code=“sm_20,compute_20” --use-local-env --cl-version 2012 -ccbin “C:\Program Files (x86)\Microsoft Visual Studio 11.0\VC\bin” -I"E:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include” -I./ -I…/…/common/inc -I"E:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include" -I"E:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v6.0\include" -G --keep-dir Debug -maxrregcount=0 --machine 32 --compile -cudart static -g -DWIN32 -D_DEBUG -D_CONSOLE -D_CRT_SECURE_NO_WARNINGS -D"-D_SCL_SECURE_NO_WARNINGS" -DNOMINMAX -D_MBCS -Xcompiler “/EHsc /W3 /nologo /Od /Zi /RTC1 /MDd " -o Debug\SubGP.cu.obj “C:\SubGP.cu”” exited with code -1.

here is the code:
#include <optix.h>
#include <optix_world.h>

using namespace optix;

// Payload for ray type 0: gamma rays
struct GammaPL
int recursion_depth;
// Payload for ray type 1: visible rays
struct VisiblePL
int primID;

rtDeclareVariable(uint2, launch_index2, rtLaunchIndex, );
rtDeclareVariable(uint, launch_index1, rtLaunchIndex, );
rtDeclareVariable(Ray, curRay, rtCurrentRay, );
rtDeclareVariable(GammaPL, gammaPL, rtPayload, );
rtDeclareVariable(VisiblePL, visiblePL, rtPayload, );
rtDeclareVariable(rtObject, top_object, , );
rtDeclareVariable(int, hitPrimID, attribute hitPrimID, );
rtBuffer<UCHAR, 2> Intersection;
rtBuffer<int, 1> HitCount;
rtBuffer<float3, 1> Guards;
rtBuffer<float3, 1> InteriorGuards;
rtBuffer<float3, 1> OptVertices;
rtBuffer<uint3, 1> OptTriangles;

RT_PROGRAM void CountHits()
float3 origin = Guards[launch_index1];
float3 direction = make_float3(1, 1, 1);
Ray ray = make_Ray(origin, direction, 0, 0.0001f, RT_DEFAULT_MAX);
GammaPL payload;
payload.recursion_depth = 0;
rtTrace(top_object, ray, payload);
HitCount[launch_index1] = payload.recursion_depth;

RT_PROGRAM void Visibility()
float3 origin = InteriorGuards[launch_index2.y];
float3 v0 = OptVertices[OptTriangles[launch_index2.x].x];
float3 v1 = OptVertices[OptTriangles[launch_index2.x].y];
float3 v2 = OptVertices[OptTriangles[launch_index2.x].z];
float3 centroid = (v0 + v1 + v2) / 3;
float3 direction = centroid - origin;
Ray ray = make_Ray(origin, direction, 1, 0.0001f, RT_DEFAULT_MAX);
VisiblePL payload;
payload.primID = ‘o’;
rtTrace(top_object, ray, payload);
if(payload.primID == launch_index2.x)
Intersection[launch_index2] = ‘x’;

RT_PROGRAM void ExceptionIntersectionCheck()
const unsigned int code = rtGetExceptionCode();
Intersection[launch_index2] = ‘-’;

RT_PROGRAM void ExceptionInteriorCheck()
const unsigned int code = rtGetExceptionCode();
HitCount[launch_index1] = -1;

RT_PROGRAM void intersection_program(int prim_index)
float3 v0 = OptVertices[OptTriangles[prim_index].x];
float3 v1 = OptVertices[OptTriangles[prim_index].y];
float3 v2 = OptVertices[OptTriangles[prim_index].z];

float3 v0v1 = v1 - v0;
float3 v0v2 = v2 - v0;
float3 normal = cross(v0v1, v0v2);
float3 secondPointOnRay = curRay.origin + 5.0 * curRay.direction;
float3 raySeg = secondPointOnRay - curRay.origin;

// Check if the line is parallel to plane
if(dot(normal, raySeg) != 0)
	// Compute plane-line intersection (http://mathworld.wolfram.com/Line-PlaneIntersection.html)
	Matrix4x4* num = new Matrix4x4();
	num->setCol(0, make_float4(1, v0));
	num->setCol(1, make_float4(1, v1));
	num->setCol(2, make_float4(1, v2));
	num->setCol(3, make_float4(1, curRay.origin));

	Matrix4x4* den = new Matrix4x4();
	den->setCol(0, make_float4(1, v0));
	den->setCol(1, make_float4(1, v1));
	den->setCol(2, make_float4(1, v2));
	den->setCol(3, make_float4(0, raySeg));

	float t = -(num->det() / den->det());
	float3 intersection = lerp(curRay.origin, secondPointOnRay, t);

	// Check if point lies in primitive (barycenter approach)
	float area2 = dot(v0v1, v0v2);
	float u = dot(v1 - intersection, v2 - intersection) / area2;
	float v = dot(v2 - intersection, v0 - intersection) / area2;
	if(u > 0.0 && v > 0.0 && u + v <= 1.0 && t > 0.0)
		float hit_distance = length(intersection - curRay.origin);
			hitPrimID = prim_index;


RT_PROGRAM void bounding_box_program(int prim_index, float result[6])
float3 v0 = OptVertices[OptTriangles[prim_index].x];
float3 v1 = OptVertices[OptTriangles[prim_index].y];
float3 v2 = OptVertices[OptTriangles[prim_index].z];

result[0] = fminf(fminf(v0.x, v1.x), v2.x);
result[1] = fminf(fminf(v0.y, v1.y), v2.y);
result[2] = fminf(fminf(v0.z, v1.z), v2.z);
result[3] = fmaxf(fmaxf(v0.x, v1.x), v2.x);
result[4] = fmaxf(fmaxf(v0.y, v1.y), v2.y);
result[5] = fmaxf(fmaxf(v0.z, v1.z), v2.z);


RT_PROGRAM void closest_hit_program()
visiblePL.primID = hitPrimID;

RT_PROGRAM void any_hit_program()

You’re compiling it to a CUDA obj file, but you must compile it to a PTX file.
Means instead of “-o Debug\SubGP.cu.obj” there should be something like “–ptx -o <your_ptx_path_used_inside_your_app>\SubGP.ptx” on the nvcc command line.

You should also not compile to PTX code with debug information enabled, OptiX doesn’t handle those.
That is, remove the -g and -G commands.

Thanks for the hints. Now I am successfully able to create the ptx file. But now there is a new problem - rtProgramCreateFromPTXFile(context, ptx_filename, “intersection_program”, &intersection) crashes in the host code with access violation error. you can find the function definition of “intersection_program” in my post above.


Not enough information.

What is your exact command line to compile the PTX code now?
You’re missing the --use_fast_math command and your code contains double immediate values.
Every floating point immediate value should have an ‘f’ suffix.

Fetching the other program entry-points works?

You’re running inside the debugger with a debug executable?
Have you successfully removed the debug flags from the nvcc command line?
In which module crashes the host code?
That is, is everything correct with the parameters of your rtProgramCreateFromPTXFile(context, ptx_filename, “intersection_program”, &intersection) call?

Since you’re using the OptiX C-API directly, have you checked that all OptiX calls before that succeeded?

In addition to the CUDA version, you’re using 6.0, please provide the following information when reporting issues:
OS version, OS bitness, installed GPU(s), display driver version, OptiX version, application bitness.

This problem is solved now.