Let’s consider my case.
---- From iso351pp.cpp
ptx_path = ptxPath( "exp_ch4.cu" );
Program solve_4x6 = m_context->createProgramFromPTXFile(ptx_path, "solve_4x6");
m_context["callable_solve_4x6"]->set(solve_4x6);
ptx_path = ptxPath( "exp_ch.cu" );
Program exp_ch = m_context->createProgramFromPTXFile( ptx_path, "back_ray" );
m_context["back_ray"]->set(exp_ch);
struct ExpData4
{
int res; // 4
int col_b, n_cols; // + 8 = 12
float f; // + 4 = 16
float jj[4], Im_jj[4]; // + 32 = 48
float aa[24], aim[24]; // + 192 = 240 bytes
};
struct PerRayData_radiance
{
float3 result;
float3 dir_from;
float3 dir;
float3 u, v, q, g; // = 108 bytes
int from_reflected;
int depth;
int break_depth;
float ni1, ni2; // + 20 = 128 bytes
float jj[4], Im_jj[4]; // + 32 = 160
};
---- file clos_h.cu
rtCallableProgram(PerRayData_radiance, back_ray, (PerRayData_radiance));
RT_PROGRAM void closest_hit_radiance()
{ ...
PerRayData_radiance prd_ref;
...
prd_ref = back_ray(prd_ref);
}
---- file exp_ch.cu
rtCallableProgram(ExpData4, callable_solve_4x6, (ExpData4));
RT_CALLABLE_PROGRAM PerRayData_radiance back_ray(PerRayData_radiance prd_in)
{
PerRayData_radiance prd;
memcpy((void *)&prd, (void *)&prd_in, sizeof(PerRayData_radiance));
...
prd4 = callable_solve_4x6(prd4); // this is line 42
...
return prd;
}
Info from CustomBuild:
1>D:/CUDA_OPTIX/OptiX SDK 3.5.1/iso351pp/iso351pp/exp_ch.cu(42):
warning : Function _Z18callable_solve_4x68ExpData4 has a large return size,
so overriding noinline attribute.
The function may be inlined when called.
---- file exp_ch4.cu
RT_CALLABLE_PROGRAM ExpData4 solve_4x6(ExpData4 prd4_in) // this is line 25
{
ExpData4 prd4;
memcpy((void *)&prd4, (void *)&prd4_in, sizeof(ExpData4));
...
return prd4;
}
Info from CustomBuild:
1>D:/CUDA_OPTIX/OptiX SDK 3.5.1/iso351pp/iso351pp/exp_ch4.cu(25):
warning : Function _Z9solve_4x68ExpData4 has a large return size,
so overriding noinline attribute.
The function may be inlined when called.
Description. Closest_hit() calls the callable program back_ray(), that calls the callable program callable_solve_4x6(). Accordingly to rtPrintfs the line “prd_ref = back_ray(prd_ref);” works. The line “prd4 = callable_solve_4x6(prd4);” does not work, no rtPrintfs.
The warnings obtained via build informs about “a large return size” and “noinline attribute”.
Also, what does it mean: “The function may be inlined when called”?
Size 160 bytes is allowed, size 240 bytes is not. Where are limits declared?
By the way, I thought that callable programs run as separate threads but a part of closest_hit() code.
Probably I did an error, where? Help, please.