Issue with precision, floats and doubles in hit program


I am new to CUDA, GPU programing and OptiX in general and have a precision problem in my calculations. I have tried converting my float variables in my hit program to doubles, as I need higher precision, which compiles into .ptx-files fine. However, I get errors when running my program (with doubles, floats works fine) and I do not understand why or what I should do to fix it. Any suggestions on what I could do?

Please let me know if information is missing and I can post that as well.

Errors in console

[DISKCACHE] Cache hit for key: ptx-10207-keyf7a090663539551078321e3d07663c28-sm_89-rtc1-drv538.08

[DISKCACHE] Cache miss for key: ptx-83571-key9d0c3f38176841f99a2ef91e90e50538-sm_89-rtc1-drv538.08

[COMPILER] Info: Pipeline parameter "params" size is 112 bytes
Warning: Program is doing double precision computations. No source location available. The input PTX may not contain debug information (nvcc option: -lineinfo), OptixModuleCompileOptions::debugLevel set to OPTIX_COMPILE_DEBUG_LEVEL_NONE, or no useful information is present for the current block.
Warning: Program is doing double precision computations. No source location available. The input PTX may not contain debug information (nvcc option: -lineinfo), OptixModuleCompileOptions::debugLevel set to OPTIX_COMPILE_DEBUG_LEVEL_NONE, or no useful information is present for the current block.
Warning: Program is doing double precision computations. No source location available. The input PTX may not contain debug information (nvcc option: -lineinfo), OptixModuleCompileOptions::debugLevel set to OPTIX_COMPILE_DEBUG_LEVEL_NONE, or no useful information is present for the current block.

Hit program with floats

#include <optix.h>
#include "../src/CommonStructs.h"
#include "../sutil/vec_math.h"
#include <cuComplex.h>

extern "C" {
__constant__ LaunchParams params;

__device__ void calculateOuterProduct(const float3& a, const float3& b, float c[3][3]) {
    c[0][0] = a.x * b.x;
    c[0][1] = a.x * b.y;
    c[0][2] = a.x * b.z;
    c[1][0] = a.y * b.x;
    c[1][1] = a.y * b.y;
    c[1][2] = a.y * b.z;
    c[2][0] = a.z * b.x;
    c[2][1] = a.z * b.y;
    c[2][2] = a.z * b.z;

__device__ cuFloatComplex cuCexpf(cuFloatComplex z) {
    float expReal = expf(cuCrealf(z));
    float imagPart = fmodf(cuCimagf(z), 2.0f * M_PIf); // Reduce the angle
    //printf("imagpart: %f\n", imagPart);
    return make_cuFloatComplex(expReal * cosf(imagPart), expReal * sinf(imagPart));

__device__ __forceinline__ cuFloatComplex my_cexpf (cuFloatComplex z) {
    cuFloatComplex res;
    float t = expf (z.x);
    sincos (z.y, &res.y, &res.x);
    res.x *= t;
    res.y *= t;
    return res;

extern "C" {

__device__ float3* vertexBuffer;

__global__ void __closesthit__ch() {
    const unsigned int primIdx = optixGetPrimitiveIndex();

    // Fetch the three indices for the current triangle
    int idx0 = params.indexBuffer[primIdx * 3];
    int idx1 = params.indexBuffer[primIdx * 3 + 1];
    int idx2 = params.indexBuffer[primIdx * 3 + 2];

    // Fetch vertex positions using the indices
    const float3 v0 = params.vertexBuffer[idx0];
    const float3 v1 = params.vertexBuffer[idx1];
    const float3 v2 = params.vertexBuffer[idx2];

    float3 edge1 = v1 - v0;
    float3 edge2 = v2 - v0;
    float3 normal = normalize(cross(edge1, edge2));
    const float3 rayDir = optixGetWorldRayDirection();

    if (dot(normal, rayDir) > 0.0f) normal = -normal;

    float3 hitPoint = optixGetWorldRayOrigin() + rayDir * optixGetRayTmax();
    float3 reflectDir = normalize(reflect(normalize(rayDir), normal));

    float distance = length(hitPoint - optixGetWorldRayOrigin());
    float3 k_i = rayDir;

    // @@@@@@@@@@@@@@@@@@@ REMEMBER THIS MAY CAUSE ISSUES @@@@@@@@@@@@@@@@@@@
    if(k_i.x == -normal.x && k_i.y == -normal.y && k_i.z == -normal.z) {
        normal.x = 1.0f;
        normal.y = 0.0f;
        normal.z = 0.0f;

    float3 n_row = normal;
    float3 n_col = normal;

    float nn[3][3] = {0}; 

    calculateOuterProduct(n_row, n_col, nn);

    float identityMatrix[3][3] = {
    {1, 0, 0},
    {0, 1, 0},
    {0, 0, 1}

    float I_minus_2_nn[3][3] = {0};

    for(int i = 0; i < 3; i++) {
        for(int j = 0; j < 3; j++) {
            I_minus_2_nn[i][j] = identityMatrix[i][j] - 2 * nn[i][j];

    float3 k_r;
    k_r.x = I_minus_2_nn[0][0] * k_i.x + I_minus_2_nn[0][1] * k_i.y + I_minus_2_nn[0][2] * k_i.z;
    k_r.y = I_minus_2_nn[1][0] * k_i.x + I_minus_2_nn[1][1] * k_i.y + I_minus_2_nn[1][2] * k_i.z;
    k_r.z = I_minus_2_nn[2][0] * k_i.x + I_minus_2_nn[2][1] * k_i.y + I_minus_2_nn[2][2] * k_i.z;

    // Calculating e_i_perp
    float3 e_i_perp = normalize(cross(k_i, normal));
    //printf("e_i_perp = [%f, %f, %f]\n", e_i_perp.x, e_i_perp.y, e_i_perp.z);

    // Calculating e_i_par
    float3 e_i_par = cross(e_i_perp, k_i);
    //printf("e_i_par = [%f, %f, %f]\n", e_i_par.x, e_i_par.y, e_i_par.z);

    // Calculating e_r_perp
    float3 e_r_perp = normalize(cross(k_r, normal));
    //printf("e_r_perp = [%f, %f, %f]\n", e_r_perp.x, e_r_perp.y, e_r_perp.z);

    // Calculating e_r_par
    float3 e_r_par = cross(e_r_perp, k_r);
    //printf("e_r_par = [%f, %f, %f]\n", e_r_par.x, e_r_par.y, e_r_par.z);

    // Calculating R
    const float R_perp = -1;
    const float R_par = -1;
    //printf("R: %f\n", R);

    float ee1[3][3] = {0};
    calculateOuterProduct(e_i_perp, e_r_perp, ee1);

    float ee2[3][3] = {0};
    calculateOuterProduct(e_i_par, e_r_par, ee2);

    for(int i = 0; i < 3; i++) {
        for(int j = 0; j < 3; j++) {
            //printf("Element [%d, %d]: %f || ", i, j, ee1[i][j]);

    for(int i = 0; i < 3; i++) {
        for(int j = 0; j < 3; j++) {
            //printf("Element [%d, %d]: %f || ", i, j, ee2[i][j]);

    float R[3][3] = {0};
    R[0][0] = ee1[0][0] * R_perp + ee2[0][0] * R_par;
    R[0][1] = ee1[0][1] * R_perp + ee2[0][1] * R_par;
    R[0][2] = ee1[0][2] * R_perp + ee2[0][2] * R_par;
    R[1][0] = ee1[1][0] * R_perp + ee2[1][0] * R_par;
    R[1][1] = ee1[1][1] * R_perp + ee2[1][1] * R_par;
    R[1][2] = ee1[1][2] * R_perp + ee2[1][2] * R_par;
    R[2][0] = ee1[2][0] * R_perp + ee2[2][0] * R_par;
    R[2][1] = ee1[2][1] * R_perp + ee2[2][1] * R_par;
    R[2][2] = ee1[2][2] * R_perp + ee2[2][2] * R_par;

    // Initial Electric field E_i
    float3 E_i = make_float3(__uint_as_float(optixGetPayload_9()), __uint_as_float(optixGetPayload_10()), __uint_as_float(optixGetPayload_11()));
    float3 E_i_imaginary =  make_float3(__uint_as_float(optixGetPayload_12()), __uint_as_float(optixGetPayload_13()), __uint_as_float(optixGetPayload_14()));

    // Calculate the phase term
    float frequency = 10e9;
    cuFloatComplex I = make_cuFloatComplex(0.0f, 1.0f);
    float k = 2 * M_PIf * frequency / params.antenna.c;
    printf("k: %f\n", k);

    float normpq = sqrt((optixGetWorldRayOrigin().x - hitPoint.x) * (optixGetWorldRayOrigin().x - hitPoint.x) + 
                        (optixGetWorldRayOrigin().y - hitPoint.y) * (optixGetWorldRayOrigin().y - hitPoint.y) + 
                        (optixGetWorldRayOrigin().z - hitPoint.z) * (optixGetWorldRayOrigin().z - hitPoint.z));
    printf("normpq: %f\n", normpq);

    float k_normpq = k * normpq;
    printf("k_normpq: %f\n", k_normpq);
    printf("cos(k_normpq): %f\n", cos(k_normpq));
    printf("sin(k_normpq): %f\n", sin(k_normpq));

    cuFloatComplex exponent_x = cuCmulf(I, make_cuFloatComplex(k * normpq, 0));
    cuFloatComplex exponent_y = cuCmulf(I, make_cuFloatComplex(k * normpq, 0));
    cuFloatComplex exponent_z = cuCmulf(I, make_cuFloatComplex(k * normpq, 0));

    printf("exponent_x: [%f, %f]\n", cuCrealf(exponent_x), cuCimagf(exponent_x));
    printf("exponent_y: [%f, %f]\n", cuCrealf(exponent_y), cuCimagf(exponent_y));
    printf("exponent_z: [%f, %f]\n", cuCrealf(exponent_z), cuCimagf(exponent_z));

    cuFloatComplex phase_x = my_cexpf(exponent_x); //my_cexpf, cuCexpf
    cuFloatComplex phase_y = my_cexpf(exponent_y);
    cuFloatComplex phase_z = my_cexpf(exponent_z);
    printf("exp_term_real: [%f, %f, %f]\n", cuCrealf(phase_x), cuCrealf(phase_y), cuCrealf(phase_z));
    printf("exp_term_imag: [%f, %f, %f]\n", cuCimagf(phase_x), cuCimagf(phase_y), cuCimagf(phase_z));

    // Apply the phase term to the electric field
    cuFloatComplex E_i_complex_x = make_cuFloatComplex(E_i.x, E_i_imaginary.x);
    cuFloatComplex E_i_complex_y = make_cuFloatComplex(E_i.y, E_i_imaginary.y);
    cuFloatComplex E_i_complex_z = make_cuFloatComplex(E_i.z, E_i_imaginary.z);
    // printf("E_i_complex_real: [%f, %f, %f]\n", cuCrealf(E_i_complex_x), cuCrealf(E_i_complex_y), cuCrealf(E_i_complex_z));
    // printf("E_i_complex_imag: [%f, %f, %f]\n", cuCimagf(E_i_complex_x), cuCimagf(E_i_complex_y), cuCimagf(E_i_complex_z));

    E_i_complex_x = cuCmulf(E_i_complex_x, phase_x);
    E_i_complex_y = cuCmulf(E_i_complex_y, phase_y);
    E_i_complex_z = cuCmulf(E_i_complex_z, phase_z);
    printf("E_i_complex_real*phasefactor: [%f, %f, %f]\n", cuCrealf(E_i_complex_x), cuCrealf(E_i_complex_y), cuCrealf(E_i_complex_z));
    printf("E_i_complex_imag*phasefactor: [%f, %f, %f]\n\n\n", cuCimagf(E_i_complex_x), cuCimagf(E_i_complex_y), cuCimagf(E_i_complex_z));

    // Calculate E_r
    cuFloatComplex E_r_x = cuCaddf(
        cuCaddf(cuCmulf(make_cuFloatComplex(R[0][0], 0), E_i_complex_x),
                cuCmulf(make_cuFloatComplex(R[0][1], 0), E_i_complex_y)),
                cuCmulf(make_cuFloatComplex(R[0][2], 0), E_i_complex_z));

    cuFloatComplex E_r_y = cuCaddf(
        cuCaddf(cuCmulf(make_cuFloatComplex(R[1][0], 0), E_i_complex_x),
                cuCmulf(make_cuFloatComplex(R[1][1], 0), E_i_complex_y)),
                cuCmulf(make_cuFloatComplex(R[1][2], 0), E_i_complex_z));

    cuFloatComplex E_r_z = cuCaddf(
        cuCaddf(cuCmulf(make_cuFloatComplex(R[2][0], 0), E_i_complex_x),
                cuCmulf(make_cuFloatComplex(R[2][1], 0), E_i_complex_y)),
                cuCmulf(make_cuFloatComplex(R[2][2], 0), E_i_complex_z));
    // Split E_r components into real and imaginary parts
    float E_r_x_real = cuCrealf(E_r_x);
    float E_r_x_imag = cuCimagf(E_r_x);
    float E_r_y_real = cuCrealf(E_r_y);
    float E_r_y_imag = cuCimagf(E_r_y);
    float E_r_z_real = cuCrealf(E_r_z);
    double E_r_z_imag = cuCimagf(E_r_z);

    printf("E_r_x_real: [%f, %f, %f]\n", E_r_x_real, E_r_y_real, E_r_z_real);
    printf("E_r_x_imag: [%f, %f, %f]\n", E_r_x_imag, E_r_y_imag, E_r_z_imag);

    //printf("E_r: [%f, %f, %f]\n", E_r.x, E_r.y, E_r.z);

    optixSetPayload_0(__float_as_uint(length(hitPoint - optixGetWorldRayOrigin())));
    optixSetPayload_7(optixGetPayload_7() + 1); // Increment bounce counter
    optixSetPayload_8(optixGetPayload_8()); // Preserve previous intensity or other data

Hit program with doubles

#include <optix.h>
#include "../src/CommonStructs.h"
#include "../sutil/vec_math.h"
#include <cuComplex.h>

extern "C" {
__constant__ LaunchParams params;

__device__ void calculateOuterProduct(const double3& a, const double3& b, double c[3][3]) {
    c[0][0] = a.x * b.x;
    c[0][1] = a.x * b.y;
    c[0][2] = a.x * b.z;
    c[1][0] = a.y * b.x;
    c[1][1] = a.y * b.y;
    c[1][2] = a.y * b.z;
    c[2][0] = a.z * b.x;
    c[2][1] = a.z * b.y;
    c[2][2] = a.z * b.z;

__device__ double3 make_double3_from_float3(const float3& f) {
    return make_double3(static_cast<double>(f.x), static_cast<double>(f.y), static_cast<double>(f.z));

// Vector subtraction for double3
__device__ double3 operator-(const double3& a, const double3& b) {
    return make_double3(a.x - b.x, a.y - b.y, a.z - b.z);

// Vector addition for double3
__device__ double3 operator+(const double3& a, const double3& b) {
    return make_double3(a.x + b.x, a.y + b.y, a.z + b.z);

// Scalar multiplication for double3
__device__ double3 operator*(const double3& a, double b) {
    return make_double3(a.x * b, a.y * b, a.z * b);

// Cross product for double3
__device__ double3 cross(const double3& a, const double3& b) {
    return make_double3(
        a.y * b.z - a.z * b.y,
        a.z * b.x - a.x * b.z,
        a.x * b.y - a.y * b.x

// Dot product for double3
__device__ double dot(const double3& a, const double3& b) {
    return a.x * b.x + a.y * b.y + a.z * b.z;

// Normalize function for double3
__device__ double3 normalize(const double3& v) {
    double invLen = 1.0 / sqrt(dot(v, v));
    return v * invLen;

// Reflect function for double3
__device__ double3 reflect(const double3& I, const double3& N) {
    return I - N * 2.0 * dot(N, I);

__device__ double length(const double3& v) {
    return sqrt(dot(v, v));

__device__ cuDoubleComplex cuCexp(cuDoubleComplex z) {
    double expReal = exp(cuCreal(z));
    double imagPart = fmod(cuCimag(z), 2.0 * M_PIf); // Reduce the angle
    return make_cuDoubleComplex(expReal * cos(imagPart), expReal * sin(imagPart));

__device__ __forceinline__ cuDoubleComplex my_cexp(cuDoubleComplex z) {
    cuDoubleComplex res;
    double t = exp(z.x);
    sincos(z.y, &res.y, &res.x);
    res.x *= t;
    res.y *= t;
    return res;

extern "C" {

__device__ double3* vertexBuffer;

__global__ void __closesthit__ch() {
    const unsigned int primIdx = optixGetPrimitiveIndex();

    // Fetch the three indices for the current triangle
    int idx0 = params.indexBuffer[primIdx * 3];
    int idx1 = params.indexBuffer[primIdx * 3 + 1];
    int idx2 = params.indexBuffer[primIdx * 3 + 2];

    // Fetch vertex positions using the indices
    const double3 v0 = make_double3_from_float3(params.vertexBuffer[idx0]);
    const double3 v1 = make_double3_from_float3(params.vertexBuffer[idx1]);
    const double3 v2 = make_double3_from_float3(params.vertexBuffer[idx2]);

    double3 edge1 = v1 - v0;
    double3 edge2 = v2 - v0;
    double3 normal = normalize(cross(edge1, edge2));
    const double3 rayDir = make_double3_from_float3(optixGetWorldRayDirection());

    if (dot(normal, rayDir) > 0.0) normal = normal*(-1);

    double3 hitPoint = make_double3_from_float3(optixGetWorldRayOrigin()) + rayDir * optixGetRayTmax();
    double3 reflectDir = normalize(reflect(normalize(rayDir), normal));

    double distance = length(hitPoint - make_double3_from_float3(optixGetWorldRayOrigin()));
    double3 k_i = rayDir;

    if(k_i.x == -normal.x && k_i.y == -normal.y && k_i.z == -normal.z) {
        normal.x = 1.0;
        normal.y = 0.0;
        normal.z = 0.0;

    double3 n_row = normal;
    double3 n_col = normal;

    double nn[3][3] = {0}; 

    calculateOuterProduct(n_row, n_col, nn);

    double identityMatrix[3][3] = {
    {1, 0, 0},
    {0, 1, 0},
    {0, 0, 1}

    double I_minus_2_nn[3][3] = {0};

    for(int i = 0; i < 3; i++) {
        for(int j = 0; j < 3; j++) {
            I_minus_2_nn[i][j] = identityMatrix[i][j] - 2 * nn[i][j];

    double3 k_r;
    k_r.x = I_minus_2_nn[0][0] * k_i.x + I_minus_2_nn[0][1] * k_i.y + I_minus_2_nn[0][2] * k_i.z;
    k_r.y = I_minus_2_nn[1][0] * k_i.x + I_minus_2_nn[1][1] * k_i.y + I_minus_2_nn[1][2] * k_i.z;
    k_r.z = I_minus_2_nn[2][0] * k_i.x + I_minus_2_nn[2][1] * k_i.y + I_minus_2_nn[2][2] * k_i.z;

    double3 e_i_perp = normalize(cross(k_i, normal));
    double3 e_i_par = cross(e_i_perp, k_i);
    double3 e_r_perp = normalize(cross(k_r, normal));
    double3 e_r_par = cross(e_r_perp, k_r);

    const double R_perp = -1;
    const double R_par = -1;

    double ee1[3][3] = {0};
    calculateOuterProduct(e_i_perp, e_r_perp, ee1);

    double ee2[3][3] = {0};
    calculateOuterProduct(e_i_par, e_r_par, ee2);

    double R[3][3] = {0};
    R[0][0] = ee1[0][0] * R_perp + ee2[0][0] * R_par;
    R[0][1] = ee1[0][1] * R_perp + ee2[0][1] * R_par;
    R[0][2] = ee1[0][2] * R_perp + ee2[0][2] * R_par;
    R[1][0] = ee1[1][0] * R_perp + ee2[1][0] * R_par;
    R[1][1] = ee1[1][1] * R_perp + ee2[1][1] * R_par;
    R[1][2] = ee1[1][2] * R_perp + ee2[1][2] * R_par;
    R[2][0] = ee1[2][0] * R_perp + ee2[2][0] * R_par;
    R[2][1] = ee1[2][1] * R_perp + ee2[2][1] * R_par;
    R[2][2] = ee1[2][2] * R_perp + ee2[2][2] * R_par;

    double3 E_i = make_double3(__uint_as_float(optixGetPayload_9()), __uint_as_float(optixGetPayload_10()), __uint_as_float(optixGetPayload_11()));
    double3 E_i_imaginary =  make_double3(__uint_as_float(optixGetPayload_12()), __uint_as_float(optixGetPayload_13()), __uint_as_float(optixGetPayload_14()));

    double frequency = 10e9;
    cuDoubleComplex I = make_cuDoubleComplex(0.0, 1.0);

    double k = 2 * M_PIf * frequency / params.antenna.c;

    double normpq = length(make_double3_from_float3(optixGetWorldRayOrigin()) - hitPoint);

    cuDoubleComplex exponent_x = cuCmul(I, make_cuDoubleComplex(k * normpq, 0));
    cuDoubleComplex exponent_y = cuCmul(I, make_cuDoubleComplex(k * normpq, 0));
    cuDoubleComplex exponent_z = cuCmul(I, make_cuDoubleComplex(k * normpq, 0));

    cuDoubleComplex phase_x = my_cexp(exponent_x);
    cuDoubleComplex phase_y = my_cexp(exponent_y);
    cuDoubleComplex phase_z = my_cexp(exponent_z);

    cuDoubleComplex E_i_complex_x = make_cuDoubleComplex(E_i.x, E_i_imaginary.x);
    cuDoubleComplex E_i_complex_y = make_cuDoubleComplex(E_i.y, E_i_imaginary.y);
    cuDoubleComplex E_i_complex_z = make_cuDoubleComplex(E_i.z, E_i_imaginary.z);

    E_i_complex_x = cuCmul(E_i_complex_x, phase_x);
    E_i_complex_y = cuCmul(E_i_complex_y, phase_y);
    E_i_complex_z = cuCmul(E_i_complex_z, phase_z);

    cuDoubleComplex E_r_x = cuCadd(
        cuCadd(cuCmul(make_cuDoubleComplex(R[0][0], 0), E_i_complex_x),
                cuCmul(make_cuDoubleComplex(R[0][1], 0), E_i_complex_y)),
                cuCmul(make_cuDoubleComplex(R[0][2], 0), E_i_complex_z));

    cuDoubleComplex E_r_y = cuCadd(
        cuCadd(cuCmul(make_cuDoubleComplex(R[1][0], 0), E_i_complex_x),
                cuCmul(make_cuDoubleComplex(R[1][1], 0), E_i_complex_y)),
                cuCmul(make_cuDoubleComplex(R[1][2], 0), E_i_complex_z));

    cuDoubleComplex E_r_z = cuCadd(
        cuCadd(cuCmul(make_cuDoubleComplex(R[2][0], 0), E_i_complex_x),
                cuCmul(make_cuDoubleComplex(R[2][1], 0), E_i_complex_y)),
                cuCmul(make_cuDoubleComplex(R[2][2], 0), E_i_complex_z));
    double E_r_x_real = cuCreal(E_r_x);
    double E_r_x_imag = cuCimag(E_r_x);
    double E_r_y_real = cuCreal(E_r_y);
    double E_r_y_imag = cuCimag(E_r_y);
    double E_r_z_real = cuCreal(E_r_z);
    double E_r_z_imag = cuCimag(E_r_z);

    optixSetPayload_0(__float_as_uint(static_cast<float>(length(hitPoint - make_double3_from_float3(optixGetWorldRayOrigin())))));
    optixSetPayload_7(optixGetPayload_7() + 1); // Increment bounce counter
    optixSetPayload_8(optixGetPayload_8()); // Preserve previous intensity or other data

Warning: Program is doing double precision computations.

That are no errors, that are warnings from the OptiX validation telling you that you use potentially slow double calculations inside your device code.
If these go away when disabling the validation mode or when reducing the logCallbackLevel then all works as intended.

No source location available. The input PTX may not contain debug information (nvcc option: -lineinfo), OptixModuleCompileOptions::debugLevel set to OPTIX_COMPILE_DEBUG_LEVEL_NONE, or no useful information is present for the current block.

This warning says you either have not compiled the module input code with line information, or your OptixModuleCompileOptions::debugLevel is set to not retain debug information.
This is also not a problem, unless you want to profile or debug your device code.

Code comments:

The device local vertexBuffer is unused and wrong.
You need to change your LaunchParams.vertexBuffer type to get rid of the float to double conversions.

const double3 v0 = make_double3_from_float3(params.vertexBuffer[idx0]);
const double3 v1 = make_double3_from_float3(params.vertexBuffer[idx1]);
const double3 v2 = make_double3_from_float3(params.vertexBuffer[idx2]);

That cannot be the input to the optixAccelBuild then! That needs float3.

Also if you want to track the full double precision inside your ray payload you would need to split the doubles into two uints and use twice the amount of payload registers. If the available ones are insufficient I would recommend using a payload structure defined inside the ray generation program and track struct that as 64-bit pointer in two payload registers. Makes handling doubles a lot simpler then.

Please read at least these two threads about double precision in OptiX:
From this search: