Data of individual ray gets modified unintentionally

Sorry in advance if this post is inappropriate for this topic, or if the information is not enough. I’ve just started getting my hands on programming/ray tracing, and I just want to get as many help as I can get. I’ll delete/edit this post if it’s inappropriate in any way.

#include <optix.h>

#include "friskSphereandPlane.h"
#include <cuda/helpers.h>

#include <sutil/vec_math.h>

#include "sphere.h"

#define float3_as_ints( u ) float_as_int( u.x ), float_as_int( u.y ), float_as_int( u.z )

extern "C" {
__constant__ Params params;

// ポインタをunsigned long longに変換してから、前側32bitをi0に、後側32bitをi1に格納する
static __forceinline__ __device__ void packPointer( void* ptr, unsigned int& i0, unsigned int& i1 )
    const unsigned long long uptr = reinterpret_cast<unsigned long long>( ptr );
    i0 = uptr >> 32;
    i1 = uptr & 0x00000000ffffffff;

static __forceinline__ __device__ void* unpackPointer( unsigned int i0, unsigned int i1 )
    const unsigned long long uptr = static_cast<unsigned long long>( i0 ) << 32 | i1;
    void* ptr = reinterpret_cast<void*>( uptr );
    return ptr;

struct Payload {
  unsigned int ray_id; // unique id of the ray
  float tpath;         // total length of the path with multiple bounces
  float ref_idx;
  float receive;

static __forceinline__ __device__ void trace2(
        OptixTraversableHandle handle,
        float3                 ray_origin,
        float3                 ray_direction,
        float                  tmin,
        float                  tmax,
        Payload*               prd,
        int                    offset,
        int                    stride,
        int                    miss
    unsigned int p0, p1;
    packPointer(prd, p0, p1);
            0.0f,                // rayTime
            OptixVisibilityMask( 1 ),
            offset,                   // SBT offset
            stride,                   // SBT stride(obj_count - 1)
            miss,                     // missSBTIndex
            p0, p1);

static __forceinline__ __device__ Payload* getPayload2()
    unsigned int p0, p1;
    p0 = optixGetPayload_0();
    p1 = optixGetPayload_1();
    Payload *prd;
    prd = static_cast<Payload*>(unpackPointer(p0, p1));
    return prd;

static __forceinline__ __device__ void computeRay( uint3 idx, uint3 dim, float3& origin, float3& direction )
    float theta = static_cast<float>( idx.x ) / static_cast<float>( dim.x );
    float phi = static_cast<float>( idx.y ) / static_cast<float>( dim.y );
    float ele = M_PIf * theta;
    float azi = 2.0f * M_PIf * phi;
    //origin    = params.cam_eye;
    //origin	= make_float3(1.0f, 1.0f, 0.0f);              //for sphere
    //origin	= make_float3(-1.0f, 0.0f, 0.0f);              //for planes1  
    //origin	= make_float3(0.0f, 1.0f, 0.0f);              //for planes0	
    //origin	= make_float3(0.1f, 0.0f, 0.0f);              //for cow
    origin = make_float3(0.0f, -1.0f, 0.0f); // make_float3(0.0f, 0.0f, 2.0f); //make_float3(1.0f, 0.0f, 1.0f);              //for SphereandPlane
    //origin      = make_float3(-18161.0f, -93727.0f, 150.0f);  //for meidaiITC
    direction = make_float3(sinf(ele)*cosf(azi), sinf(ele)*sinf(azi), cosf(ele));

// TODO: to improve performance, pre-compute and pack the normals.
// but here we compute them while tracing
__device__ __forceinline__ float3 getnormal(const unsigned int triId) {

    float3 vertex[3];
    OptixTraversableHandle gas_handle = optixGetGASTraversableHandle();
    optixGetTriangleVertexData(gas_handle, triId, 0, 0, vertex);

    float3 normal = cross((vertex[1] - vertex[0]), (vertex[2] - vertex[0]));

    return normal;

extern "C" __global__ void __raygen__rg()

    // get payload id
    // OptixPayloadTypeID payloadTypeID;
//  printf("payload id of raygen: %d\n", payloadTypeID);

    // Lookup our location within the launch grid
    const uint3 idx = optixGetLaunchIndex();
    const uint3 dim = optixGetLaunchDimensions();
    // Map our launch idx to a screen location and create a ray from the camera
    // location through the screen

    float dx = static_cast<float>( idx.x ) /  static_cast<float>( dim.x );
    float dy = static_cast<float>( idx.y ) /  static_cast<float>( dim.y );
    //create a ray sphere for rwpl
    float3 ray_origin;
    float3 ray_direction;
    computeRay( idx, dim, ray_origin, ray_direction );
    //printf("ray_direction: (%f, %f, %f)\n", ray_direction.x, ray_direction.y, ray_direction.z);
    //printf("ray_length = %f\n", dot(ray_direction, ray_direction));
    // setting the per ray data (payload)
    Payload pld;
    pld.tpath = 0.0f;
    pld.ray_id = idx.x + dim.x * idx.y;
    pld.ref_idx = 0.0f;
    pld.receive = 0.0f;
    Payload *pldptr = &pld;
    //printf("ray id: %u, %d, %d, %d, %d\n", pldptr->ray_id, idx.x, idx.y, dim.x, dim.y);

    float tmin = 1e-10f;
    float tmax = 20000.0f;

//printf("line: %d\n", __LINE__);
//printf("handle: %llu\n", params.handle);

            tmin,  // tmin
            tmax,  // tmax
            0              );

//printf("line: %d\n", __LINE__);    

extern "C" __global__ void __miss__ms()
   unsigned int p0, p1;
    Payload *pldptr = getPayload2();
    packPointer(pldptr, p0, p1);

extern "C" __global__ void __closesthit__triangle()
//printf("line: %d\n", __LINE__); 

    unsigned int tri_id = optixGetPrimitiveIndex();
    unsigned int sbt_id = optixGetSbtGASIndex();
    float time = optixGetRayTime();
    //printf("tri[%d] = sbt[%d]\n", tri_id, sbt_id);

    float3 ray_dir = optixGetWorldRayDirection();
    float3 ray_ori = optixGetWorldRayOrigin();
    //printf("dir = (%f, %f, %f)\n", ray_dir.x, ray_dir.y, ray_dir.z);
    //printf("ori = (%f, %f, %f)\n", ray_ori.x, ray_ori.y, ray_ori.z);
    const float3 out_normal =
                int_as_float( optixGetAttribute_0() ),
                int_as_float( optixGetAttribute_1() ),
                int_as_float( optixGetAttribute_2() )
    float3 vertex[3];
    OptixTraversableHandle gas_handle = optixGetGASTraversableHandle();
    optixGetTriangleVertexData(gas_handle, tri_id, sbt_id, time, vertex);
    float3 out_normal = cross((vertex[1] - vertex[0]), (vertex[2] - vertex[0]));
    // printf("prim[%d] = vec(%f, %f, %f)\n", sbt_id, out_normal.x, out_normal.y, out_normal.z);
    // We defined out geometry as a triangle geometry. In this case the
    // We add the t value of the intersection
    float ray_tmax = optixGetRayTmax();

    Payload *pldptr = getPayload2();    
    float total_path_length = ray_tmax + pldptr->tpath;
    pldptr->tpath = total_path_length;

    //printf("total path length: %f\n", pldptr->tpath);
    //printf("ray_id: %d\n", pldptr->ray_id);
    // report individual bounces
    //printf("line: %d, ray_id = %d, pathlen = %f\n", __LINE__, pldptr->ray_id, total_path_length);
    //printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);

    //get vertice data from SBT and compute normal
    HitGroupData *data = (HitGroupData*)optixGetSbtDataPointer();
    const MeshData* mesh_data = (MeshData*)data->shape_data;
    const uint3 index = mesh_data->indices[tri_id];

//printf("line: %d, ray_id = %d, pathlen = %f\n", __LINE__, pldptr->ray_id, total_path_length);

    const float3 v0 = mesh_data->vertices[ index.x ];
    const float3 v1 = mesh_data->vertices[ index.y ];
    const float3 v2 = mesh_data->vertices[ index.z ];

printf("line: %d, ray_id = %d, pathlen = %f\n", __LINE__, pldptr->ray_id, total_path_length);

    float3 edge1 = v1 - v0;
    float3 edge2 = v2 - v0;
    float3 edge_cross = {edge1.y*edge2.z - edge1.z*edge2.y,
                         edge1.z*edge2.x - edge1.x*edge2.z,
                         edge1.x*edge2.y - edge1.y*edge2.x};
    float3 edge_cross = cross(edge1, edge2);

printf("line: %d, ray_id = %d, pathlen = %f\n", __LINE__, pldptr->ray_id, total_path_length);

    //float invLen = (1.0f / sqrtf(edge_cross.x*edge_cross.x + edge_cross.y*edge_cross.y + edge_cross.z*edge_cross.z));

printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);

    //const float3 out_normal = edge_cross * invLen;
    const float3 out_normal = normalize(edge_cross);

printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);

    float3 hit_point = ray_ori + ray_tmax * ray_dir;

printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);

    float3 reflect_dir = reflect(ray_dir, out_normal);
    //float3 reflect_dir = ray_dir - 2.0f * out_normal * (ray_dir.x*out_normal.x + ray_dir.y*out_normal.y + ray_dir.z*out_normal.z);

    //printf("triangle vertex =(%f,%f,%f), (%f,%f,%f), (%f,%f,%f)\n", v0.x, v0.y, v0.z, v1.x, v1.y, v1.z, v2.x, v2.y, v2.z);
    //printf("vec(%f, %f, %f)\n", reflect_dir.x, reflect_dir.y, reflect_dir.z);
//printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);
    // cos1
    float cos1 = -1.0f * dot(ray_dir, out_normal) / (length(ray_dir) * length(out_normal));

    float n_ij = 1.5f;
    float cos2 = sqrtf((n_ij * n_ij) - (1.0f - cos1 * cos1)) / n_ij;
    //printf("cos2 = %f\n", cos2);

    float u1 = 1.0f;
    float u2 = 2.0f;

    //float Rp = (u1 * n_ij * cos1 - u2 * cos2)/(u1 * n_ij * n_ij * cos1 + u2 * cos2);
    float Rv = (u2 * n_ij * cos1 - u1 * cos2) / (u2 * n_ij * n_ij * cos1 + u1 * cos2);

    float Rv_total = pldptr->ref_idx + Rv;
    pldptr->ref_idx = Rv_total;
//printf("line: %d, pldptr: %d, %f, %f, %f\n", __LINE__, pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);   
//printf("Rv: %f\n", Rv);    
    // Minimal distance the ray has to travel to report next hit
    float tmin = 1e-5;
    float tmax = 20000.0f;

//printf("line: %d\n", __LINE__); 

//printf("hit_point: %f, %f, %f\n", hit_point.x, hit_point.y, hit_point.z);
//printf("reflect_dir: %f, %f, %f\n", reflect_dir.x, reflect_dir.y, reflect_dir.z);
//printf("handle: %llu\n", params.handle);
//printf("pldptr: %d, %f, %f, %f\n", pldptr->ray_id, pldptr->tpath, pldptr->ref_idx, pldptr->receive);
            tmin,  // tmin
            tmax,  // tmax
            0               );
//printf("line: %d, ray_id: %f\n", __LINE__, pldptr->ray_id);

    // printf("Ray = %d, pathlen = %f\n", pldptr->ray_id, pldptr->tpath);

extern "C" __global__ void __closesthit__sphere()
    Payload* pldptr = getPayload2();
    unsigned int sphe_id = optixGetPrimitiveIndex();

//printf("line: %d\n", __LINE__); 
    // We defined out geometry as a triangle geometry. In this case the
    // We add the t value of the intersection
    float ray_tmax = optixGetRayTmax();

    //float4 payload = getPayload();
    float total_path_length = ray_tmax + pldptr->tpath;
    //float total_path_length = ray_tmax + payload.y;
    if (pldptr->ref_idx == 0.0f){
      pldptr->ref_idx = 1.0f; 

    //float result = ((1.0 * 1.0 * 1.0) / (16 * M_PIf * M_PIf)) * (payload.z/total_path_length);
    float result = ((1.0f * 1.0f * 1.0f) / (16.0f * M_PIf * M_PIf)) * (pldptr->ref_idx/total_path_length);
    pldptr->tpath = total_path_length;
    pldptr->receive = result;

    //float* output = params.result;
    //atomicAdd(output + sphe_id, result);
    //params.result[prdptr->ray_id] = result;

    printf("Sphe[%d], result = %f\n", sphe_id, pldptr->receive);
    //printf("Sphe[%d], result = %f\n", sphe_id, params.result[sphe_id]);
    //printf("Sphe[%d], result = %f\n", sphe_id, total_path_length);
    //printf("ray[%d], result = %f\n", pldptr->ray_id, pldptr->receive);
    //printf("%d\n", pldptr->ray_id);
    //printf("%f\n", pldptr->receive);
    // checking for debug
    if (result < 0.005f) {
      printf("Ray = %d, pathlen = %f\n", __float_as_uint(payload.x), result);
      printf("Sphe[%d], result = %f\n", sphe_id, params.result[sphe_id]);

extern "C" __global__ void __intersection__sphere()
    HitGroupData* data = (HitGroupData*)optixGetSbtDataPointer();
    const int prim_idx = optixGetPrimitiveIndex();
    const SphereData sphere_data = ((SphereData*)data->shape_data)[prim_idx];

//printf("line: %d\n", __LINE__); 

    const float3 center =;
    const float radius = sphere_data.radius;

    const float3 origin = optixGetObjectRayOrigin();
    const float3 direction = optixGetObjectRayDirection();
    // レイの最小距離と最大距離を取得
    const float tmin = optixGetRayTmin();
    const float tmax = optixGetRayTmax();

    const float3 oc = origin - center;
    const float a = dot(direction, direction);
    const float half_b = dot(oc, direction);
    const float c = dot(oc, oc) - radius * radius;

    const float discriminant = half_b * half_b - a * c;
    if (discriminant < 0) return;
    const float sqrtd = sqrtf(discriminant);

    float root = (-half_b - sqrtd) / a;
    if (root < tmin || tmax < root)
        root = (-half_b + sqrtd) / a;
        if (root < tmin || tmax < root)

    const float3 P = origin + root * direction;
    const float3 normal = (P - center) / radius;

    float phi = atan2(normal.y, normal.x);
    if (phi < 0) phi += 2.0f * M_PIf;
    const float theta = acosf(normal.z);
    const float2 texcoord = make_float2(phi / (2.0f * M_PIf), theta / M_PIf);

    optixReportIntersection(root, 0, 
        __float_as_int(normal.x), __float_as_int(normal.y), __float_as_int(normal.z),
        __float_as_int(texcoord.x), __float_as_int(texcoord.y)

    printf("line: %d\n", __LINE__); 

In this CUDA code(which is a part of an OptiX project), I tried to include a struct named Payload in every single ray generated, which has 4 values(ray_id, tpath, ref_idx, and receive). Then, in __closesthit__triangle(), when the ray hits an .obj 3d model (which is made up of traingles, obviously), it should be reflected to a certain direction, and the calculation has nothing to do with the values of Payload except tpath, as far as I’m concerned.
However, executing a completely irrelevant calculation somehow corrupt the values inside Payload, which leads to an illegal memory access error. Here is a screenshot of the error:

You can see that from line 287, the values inside pldptr changes into a weird one. At first I suspected a memory corruption has occured, but as far as I know, there is nothing I can do about it because OptiX is supposed to automatically assign the necessary memory for you? Feel free to correct me if I’m wrong.

Also, this error only occurs in Windows 10 / RTX A4000, and doesn’t occur in Linux / RTX A6000. Those two are the only environment I can test my code for the time being, so I’m not sure whether the problem is the OS or the GPU.

Any insights would be much appreciated.


I would say you’re running into an incorrect recursion and exceeded the stack space calculation maxTraceDepth value.

  • Your __closesthit__triangle() calls optixTrace recursively (with the same ray type as in the ray generation program) without checking for any recursion limit.
  • The time variable is not used.
  • The __miss_ms() does nothing useful.

Did you enable the validation mode during the debugging to see if OptiX complains about anything?

For performance reasons, I would not calculate the sphere normal and texture coordinates inside the custom intersection program but defer that to the hit programs.
Also there are built-in sphere primitives inside OptiX SDK since version 7.5.0.
Use a newer OptiX SDK version version if you can.

1 Like

Thank you so much for the reply.

This is an old and very messy code that used to “work” on OptiX 7.3.0, and I’m trying to make it work in the latest OptiX. But, I’ve never done programming with OptiX(not even used to C++, just a little bit of experience with Python) and didn’t really have anyone who could teach me anything about ray tracing, so I didn’t even know where to start.

I’ll definitely check out the problems you have pointed out. Thank you.

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.