Hello,
I have a stack overflow (CUDA error = 719 at C:\Users\flarive\Documents\Visual Studio 2022\Projects\RTC2\core\src\gpu.cu:581 ‘cudaDeviceSynchronize()’ unspecified launch Failure) in my raytracer implementation in CUDA.
I have a render kernel to calculate each pixel of the image in a ray_color device method and i think the problem is that i create too many pointers in my loop.
hittable_pdf* hpdf = new hittable_pdf(_lights, rec.hit_point);
mixture_pdf *mpdf = new mixture_pdf(hpdf, srec.pdf_ptr);
Even if i free this pointers after the pixerl color is calculated i have a stackoverflow or a address is out of bounds errors (it depends).
How could i create quite a lot new pointers in a device loop (and free them) without having such memory error ?
I suppose it’s more a heap overflow problem because pointers are stored in the heap and not in the stack, but i have both when trying to fix.
Thanks a lot for your help !
Extract from compute-sanitizer :
ray_color returns 33/16 2 0.000000 0.000000 0.000000
ray_color returns 36/16 2 0.000000 0.000000 0.000000
ray_color returns 38/16 2 0.000000 0.000000 0.000000
ray_color returns 41/16 2 0.000000 0.000000 0.000000
ray_color returns 45/16 2 0.000000 0.000000 0.000000
ray_color returns 37/17 2 0.000000 0.000000 0.000000
ray_color returns 38/17 2 0.000000 0.000000 0.000000
ray_color returns 42/17 2 0.000000 0.000000 0.000000
ray_color returns 47/17 2 0.000000 0.000000 0.000000
ray_color returns 224/34 2 0.000000 0.000000 0.000000
ray_color returns 225/34 2 0.000000 0.000000 0.000000
ray_color returns 237/34 2 0.000000 0.000000 0.000000
ray_color returns 238/34 2 0.000000 0.000000 0.000000
ray_color returns 226/35 2 0.000000 0.000000 0.000000
ray_color returns 232/35 2 0.000000 0.000000 0.000000
ray_color returns 36/27 2 0.000000 0.000000 0.000000
ray_color returns 40/27 2 0.000000 0.000000 0.000000
ray_color returns 375/28 2 0.000000 0.000000 0.000000
ray_color returns 382/28 2 0.000000 0.000000 0.000000
ray_color returns 371/29 2 0.000000 0.000000 0.000000
ray_color returns 375/29 2 0.000000 0.000000 0.000000
ray_color returns 379/29 2 0.000000 0.000000 0.000000
ray_color returns 381/29 2 0.000000 0.000000 0.000000
ray_color returns 383/29 2 0.000000 0.000000 0.000000
ray_color returns 380/26 2 0.000000 0.000000 0.000000
ray_color returns 378/27 2 0.000000 0.000000 0.000000
ray_color returns 382/20 2 0.000000 0.000000 0.000000
ray_color returns 120/88 2 0.000000 0.000000 0.000000
ray_color returns 125/89 2 0.000000 0.000000 0.000000
ray_color returns 127/89 2 nan nan nan
ray_color returns 373/22 2 0.000000 0.000000 0.000000
ray_color returns 379/22 2 0.000000 0.000000 0.000000
ray_color returns 383/23 2 0.000000 0.000000 0.000000
ray_color returns 160/14 2 0.000000 0.000000 0.000000
ray_color returns 165/15 2 0.000000 0.000000 0.000000
ray_color returns 171/10 2 0.000000 0.000000 0.000000
ray_color returns 173/11 2 0.000000 0.000000 0.000000
ray_color returns 174/11 2 0.000000 0.000000 0.000000
ray_color returns 376/16 2 0.000000 0.000000 0.000000
ray_color returns 373/17 2 0.000000 0.000000 0.000000
ray_color returns 176/13 2 0.000000 0.000000 0.000000
ray_color returns 179/13 2 0.000000 0.000000 0.000000
ray_color returns 37/37 2 0.000000 0.000000 0.000000
ray_color returns 185/11 2 0.000000 0.000000 0.000000
ray_color returns 167/12 2 0.000000 0.000000 0.000000
ray_color returns 160/13 2 0.000000 0.000000 0.000000
ray_color returns 173/13 2 0.000000 0.000000 0.000000
ray_color returns 175/13 2 0.000000 0.000000 0.000000
ray_color returns 112/94 2 0.000000 0.000000 0.000000
ray_color returns 121/94 2 0.000000 0.000000 0.000000
ray_color returns 123/94 2 0.000000 0.000000 0.000000
ray_color returns 124/94 2 0.000000 0.000000 0.000000
ray_color returns 126/94 2 0.000000 0.000000 0.000000
ray_color returns 127/94 2 0.000000 0.000000 0.000000
ray_color returns 113/95 2 0.000000 0.000000 0.000000
ray_color returns 116/95 2 0.000000 0.000000 0.000000
ray_color returns 117/95 2 0.000000 0.000000 0.000000
ray_color returns 120/95 2 0.000000 0.000000 0.000000
ray_color returns 121/95 2 0.000000 0.000000 0.000000
ray_color returns 123/95 2 0.000000 0.000000 0.000000
ray_color returns 126/95 2 0.000000 0.000000 0.000000
ray_color returns 375/11 2 0.000000 0.000000 0.000000
ray_color returns 380/11 2 0.000000 0.000000 0.000000
ray_color returns 376/18 2 0.000000 0.000000 0.000000
ray_color returns 371/19 2 0.000000 0.000000 0.000000
ray_color returns 383/19 2 0.000000 0.000000 0.000000
ray_color returns 120/28 2 0.000000 0.000000 0.000000
ray_color returns 121/28 2 0.000000 0.000000 0.000000
ray_color returns 119/29 2 0.000000 0.000000 0.000000
ray_color returns 358/12 2 0.000000 0.000000 0.000000
ray_color returns 377/24 2 0.000000 0.000000 0.000000
ray_color returns 381/24 2 0.000000 0.000000 0.000000
ray_color returns 378/25 2 0.000000 0.000000 0.000000
ray_color returns 381/25 2 0.000000 0.000000 0.000000
ray_color returns 58/45 2 0.000000 0.000000 0.000000
ray_color returns 123/87 2 0.000000 0.000000 0.000000
ray_color returns 38/41 2 0.000000 0.000000 0.000000
ray_color returns 230/32 2 0.000000 0.000000 0.000000
ray_color returns 225/33 2 0.000000 0.000000 0.000000
ray_color returns 230/33 2 0.000000 0.000000 0.000000
ray_color returns 136/80 2 0.000000 0.000000 0.000000
ray_color returns 142/81 2 0.000000 0.000000 0.000000
ray_color returns 128/88 2 0.000000 0.000000 0.000000
ray_color returns 132/88 2 0.000000 0.000000 0.000000
ray_color returns 133/88 2 0.000000 0.000000 0.000000
ray_color returns 134/88 2 0.000000 0.000000 0.000000
ray_color returns 138/88 2 0.000000 0.000000 0.000000
ray_color returns 143/88 2 nan nan nan
ray_color returns 129/89 2 0.000000 0.000000 0.000000
ray_color returns 130/89 2 0.000000 0.000000 0.000000
ray_color returns 133/89 2 0.000000 0.000000 0.000000
ray_color returns 136/89 2 0.000000 0.000000 0.000000
ray_color returns 137/89 2 0.000000 0.000000 0.000000
ray_color returns 142/89 2 0.000000 0.000000 0.000000
ray_color returns 113/83 2 0.000000 0.000000 0.000000
ray_color returns 124/83 2 0.000000 0.000000 0.000000
ray_color returns 40/18 2 0.000000 0.000000 0.000000
ray_color returns 138/87 2 0.000000 0.000000 0.000000
ray_color returns 54/34 2 0.000000 0.000000 0.000000
ray_color returns 374/30 2 0.000000 0.000000 0.000000
ray_color returns 375/30 2 0.000000 0.000000 0.000000
ray_color returns 382/30 2 0.000000 0.000000 0.000000
ray_color returns 383/30 2 0.000000 0.000000 0.000000
ray_color returns 373/31 2 0.000000 0.000000 0.000000
ray_color returns 377/31 2 0.000000 0.000000 0.000000
ray_color returns 380/31 2 0.000000 0.000000 0.000000
ray_color returns 381/31 2 0.000000 0.000000 0.000000
ray_color returns 44/39 2 0.000000 0.000000 0.000000
ray_color returns 47/39 2 0.000000 0.000000 0.000000
ray_color returns 276/26 2 0.000000 0.000000 0.000000
ray_color returns 280/26 2 0.000000 0.000000 0.000000
ray_color returns 273/27 2 0.000000 0.000000 0.000000
ray_color returns 274/27 2 0.000000 0.000000 0.000000
ray_color returns 50/33 2 0.000000 0.000000 0.000000
ray_color returns 58/33 2 0.000000 0.000000 0.000000
ray_color returns 135/83 2 0.000000 0.000000 0.000000
ray_color returns 57/42 2 0.000000 0.000000 0.000000
ray_color returns 58/42 2 0.000000 0.000000 0.000000
ray_color returns 63/42 2 0.000000 0.000000 0.000000
ray_color returns 49/43 2 0.000000 0.000000 0.000000
ray_color returns 133/90 2 0.000000 0.000000 0.000000
ray_color returns 134/90 2 0.000000 0.000000 0.000000
ray_color returns 139/90 2 0.000000 0.000000 0.000000
ray_color returns 141/90 2 0.000000 0.000000 0.000000
ray_color returns 130/91 2 0.000000 0.000000 0.000000
ray_color returns 135/91 2 0.000000 0.000000 0.000000
ray_color returns 143/91 2 0.000000 0.000000 0.000000
ray_color returns 179/14 2 0.000000 0.000000 0.000000
ray_color returns 182/14 2 0.000000 0.000000 0.000000
ray_color returns 184/14 2 0.000000 0.000000 0.000000
ray_color returns 176/15 2 0.000000 0.000000 0.000000
ray_color returns 181/15 2 0.000000 0.000000 0.000000
ray_color returns 382/84 2 0.000000 0.000000 0.000000
ray_color returns 243/32 2 0.000000 0.000000 0.000000
ray_color returns 246/32 2 0.000000 0.000000 0.000000
ray_color returns 249/32 2 0.000000 0.000000 0.000000
ray_color returns 251/32 2 0.000000 0.000000 0.000000
ray_color returns 244/33 2 0.000000 0.000000 0.000000
ray_color returns 255/33 2 0.000000 0.000000 0.000000
ray_color returns 240/34 2 0.000000 0.000000 0.000000
ray_color returns 242/35 2 0.000000 0.000000 0.000000
ray_color returns 248/35 2 0.000000 0.000000 0.000000
ray_color returns 277/16 2 0.000000 0.000000 0.000000
ray_color returns 283/16 2 0.000000 0.000000 0.000000
ray_color returns 323/44 2 0.000000 0.000000 0.000000
ray_color returns 329/44 2 0.000000 0.000000 0.000000
ray_color returns 331/44 2 0.000000 0.000000 0.000000
ray_color returns 323/45 2 0.000000 0.000000 0.000000
ray_color returns 74/44 2 0.000000 0.000000 0.000000
ray_color returns 67/45 2 0.000000 0.000000 0.000000
ray_color returns 75/45 2 0.000000 0.000000 0.000000
ray_color returns 369/34 2 0.000000 0.000000 0.000000
ray_color returns 375/35 2 0.000000 0.000000 0.000000
ray_color returns 383/35 2 0.000000 0.000000 0.000000
ray_color returns 51/46 2 0.000000 0.000000 0.000000
ray_color returns 61/46 2 0.000000 0.000000 0.000000
ray_color returns 51/36 2 0.000000 0.000000 0.000000
ray_color returns 57/36 2 0.000000 0.000000 0.000000
ray_color returns 63/37 2 0.000000 0.000000 0.000000
ray_color returns 52/40 2 0.000000 0.000000 0.000000
ray_color returns 57/40 2 0.000000 0.000000 0.000000
ray_color returns 50/41 2 0.000000 0.000000 0.000000
ray_color returns 56/41 2 0.000000 0.000000 0.000000
ray_color returns 42/28 2 0.000000 0.000000 0.000000
ray_color returns 272/28 2 0.000000 0.000000 0.000000
ray_color returns 273/28 2 0.000000 0.000000 0.000000
ray_color returns 279/28 2 0.000000 0.000000 0.000000
ray_color returns 285/28 2 0.000000 0.000000 0.000000
ray_color returns 278/29 2 0.000000 0.000000 0.000000
ray_color returns 378/85 2 1.300000 0.100000 0.100000
ray_color returns 340/18 2 0.000000 0.000000 0.000000
ray_color returns 341/19 2 0.000000 0.000000 0.000000
ray_color returns 343/19 2 0.000000 0.000000 0.000000
ray_color returns 347/19 2 0.000000 0.000000 0.000000
ray_color returns 349/19 2 0.000000 0.000000 0.000000
ray_color returns 147/16 2 0.000000 0.000000 0.000000
ray_color returns 152/17 2 0.000000 0.000000 0.000000
ray_color returns 156/17 2 0.000000 0.000000 0.000000
ray_color returns 120/24 2 0.000000 0.000000 0.000000
ray_color returns 121/24 2 0.000000 0.000000 0.000000
ray_color returns 115/25 2 0.000000 0.000000 0.000000
ray_color returns 119/25 2 0.000000 0.000000 0.000000
ray_color returns 122/25 2 0.000000 0.000000 0.000000
ray_color returns 126/25 2 0.000000 0.000000 0.000000
ray_color returns 382/14 2 0.000000 0.000000 0.000000
ray_color returns 381/15 2 0.000000 0.000000 0.000000
ray_color returns 179/27 2 0.000000 0.000000 0.000000
ray_color returns 122/16 2 0.000000 0.000000 0.000000
ray_color returns 128/22 2 0.000000 0.000000 0.000000
ray_color returns 370/86 2 0.000000 0.000000 0.000000
ray_color returns 371/86 2 0.000000 0.000000 0.000000
ray_color returns 373/86 2 0.000000 0.000000 0.000000
ray_color returns 377/86 2 0.000000 0.000000 0.000000
ray_color returns 378/86 2 0.000000 0.000000 0.000000
ray_color returns 380/86 2 0.000000 0.000000 0.000000
ray_color returns 369/87 2 0.000000 0.000000 0.000000
ray_color returns 370/87 2 0.000000 0.000000 0.000000
ray_color returns 371/87 2 0.000000 0.000000 0.000000
ray_color returns 375/87 2 0.000000 0.000000 0.000000
ray_color returns 376/87 2 0.000000 0.000000 0.000000
ray_color returns 380/87 2 0.000000 0.000000 0.000000
ray_color returns 381/87 2 0.000000 0.000000 0.000000
ray_color returns 198/10 2 0.000000 0.000000 0.000000
ray_color returns 207/11 2 0.000000 0.000000 0.000000
ray_color returns 156/27 2 0.000000 0.000000 0.000000
ray_color returns 337/44 2 0.000000 0.000000 0.000000
========= Stack overflow
========= at glm::vec<(int)3, T1, T2> glm::operator *<float, (glm::qualifier)0>(const glm::vec<(int)3, T1, T2> &, const glm::vec<(int)3, T1, T2> &)+0x50 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/libs/glm/detail/type_vec3.inl:708
========= by thread (0,0,0) in block (2,1,0)
========= Device Frame:glm::detail::compute_dot<glm::vec<(int)3, float, (glm::qualifier)0>, float, (bool)0>::call(const glm::vec<(int)3, float, (glm::qualifier)0> &, const glm::vec<(int)3, float, (glm::qualifier)0> &)+0x2f0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/libs/glm/detail/func_geometric.inl:52
========= Device Frame:T2 glm::dot<(int)3, float, (glm::qualifier)0>(const glm::vec<T1, T2, T3> &, const glm::vec<T1, T2, T3> &)+0x1d0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/libs/glm/detail/func_geometric.inl:170
========= Device Frame:omni_light::hit(const ray &, interval, hit_record &, int, curandStateXORWOW *) const+0xdd0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/lights/omni_light.cuh:59
========= Device Frame:omni_light::pdf_value(const glm::vec<(int)3, float, (glm::qualifier)0> &, const glm::vec<(int)3, float, (glm::qualifier)0> &, curandStateXORWOW *) const+0x960 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/lights/omni_light.cuh:113
========= Device Frame:hittable_list::pdf_value(const glm::vec<(int)3, float, (glm::qualifier)0> &, const glm::vec<(int)3, float, (glm::qualifier)0> &, curandStateXORWOW *) const+0xb90 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/primitives/hittable_list.cuh:254
========= Device Frame:hittable_pdf::value(const glm::vec<(int)3, float, (glm::qualifier)0> &, curandStateXORWOW *) const+0x470 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/pdf/hittable_pdf.cuh:32
========= Device Frame:mixture_pdf::value(const glm::vec<(int)3, float, (glm::qualifier)0> &, curandStateXORWOW *) const+0x4c0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/pdf/mixture_pdf.cuh:30
========= Device Frame:ray_color(const ray &, int, hittable_list &, hittable_list &, curandStateXORWOW *)+0x35b0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/gpu.cu:198
========= Device Frame:ray_color(const ray &, int, hittable_list &, hittable_list &, curandStateXORWOW *)+0x3990 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/gpu.cu:263
========= Device Frame:render(color *, int, int, int, int, int, hittable_list **, hittable_list **, camera **, curandStateXORWOW *)+0x1ca0 in C:/Users/flarive/Documents/Visual Studio 2022/Projects/RTC2/core/src/gpu.cu:418
gpu.cu :
__device__ color ray_color(const ray& r, int depth, hittable_list& _world, hittable_list& _lights, curandState* local_rand_state)
{
// If we've exceeded the ray bounce limit, no more light is gathered.
if (depth <= 0)
{
// return background solid color
return color::red();// background_color;
}
hit_record rec;
vector3 unit_dir = unit_vector(r.direction());
// If the ray hits nothing, return the background color.
// 0.001 is to fix shadow acne interval
if (!_world.hit(r, interval(SHADOW_ACNE_FIX, INFINITY), rec, depth, local_rand_state))
{
return color::black();
}
// ray hit a world object
scatter_record srec;
color color_from_emission = rec.mat->emitted(r, rec, rec.u, rec.v, rec.hit_point, local_rand_state);
if (!rec.mat->scatter(r, _lights, rec, srec, local_rand_state))
{
return color_from_emission;
}
if (_lights.object_count == 0)
{
// no lights = no importance sampling
return srec.attenuation * ray_color(srec.skip_pdf_ray, depth - 1, _world, _lights, local_rand_state);
}
// no importance sampling
if (srec.skip_pdf)
{
return srec.attenuation * ray_color(srec.skip_pdf_ray, depth - 1, _world, _lights, local_rand_state);
}
// i have problems with this 2 pointers i think
hittable_pdf* hpdf = new hittable_pdf(_lights, rec.hit_point);
mixture_pdf *mpdf = new mixture_pdf(hpdf, srec.pdf_ptr);
ray scattered = ray(rec.hit_point, mpdf->generate(srec, local_rand_state), r.time());
float pdf_val = mpdf->value(scattered.direction(), local_rand_state);
float scattering_pdf = rec.mat->scattering_pdf(r, rec, scattered);
color sample_color = ray_color(scattered, depth - 1, _world, _lights, local_rand_state);
color color_from_scatter = (srec.attenuation * scattering_pdf * sample_color) / pdf_val;
// render opaque object
color final_color = color_from_emission + color_from_scatter;
delete(mpdf);
delete(hpdf);
return final_color;
}
__global__ void render(color* fb, int width, int height, int spp, int sqrt_spp, int max_depth, hittable_list **world, hittable_list **lights, camera** cam, curandState *randState)
{
int i = threadIdx.x + blockIdx.x * blockDim.x;
int j = threadIdx.y + blockIdx.y * blockDim.y;
if((i >= width) || (j >= height)) return;
int pixel_index = j* width + i;
curandState local_rand_state = randState[pixel_index];
color pixel_color(0, 0, 0);
color background(0, 0, 0);
// new
for (int s_j = 0; s_j < sqrt_spp; ++s_j)
{
for (int s_i = 0; s_i < sqrt_spp; ++s_i)
{
ray r = (*cam)->get_ray(i, j, s_i, s_j, nullptr, &local_rand_state);
pixel_color += ray_color(r, max_depth, **world, **lights, &local_rand_state);
}
}
randState[pixel_index] = local_rand_state;
pixel_color /= float(spp);
pixel_color[0] = sqrt(pixel_color[0]);
pixel_color[1] = sqrt(pixel_color[1]);
pixel_color[2] = sqrt(pixel_color[2]);
fb[pixel_index] = pixel_color;
}
void renderGPU(int width, int height, int spp, int max_depth, int tx, int ty, const char* filepath)
{
std::cout << "Rendering " << width << "x" << height << " " << spp << " samples > " << filepath << std::endl;
size_t stackSize;
// Get the current stack size limit
cudaError_t result1 = cudaDeviceGetLimit(&stackSize, cudaLimitStackSize);
if (result1 != cudaSuccess) {
std::cerr << "Failed to get stack size: " << cudaGetErrorString(result1) << std::endl;
return;
}
std::cout << "Current stack size limit: " << stackSize << " bytes" << std::endl;
size_t newStackSize = 2048; // Set the stack size to 1MB per thread
cudaError_t result2 = cudaDeviceSetLimit(cudaLimitStackSize, newStackSize);
if (result2 != cudaSuccess) {
std::cerr << "Failed to set stack size: " << cudaGetErrorString(result2) << std::endl;
return;
}
std::cout << "New stack size limit: " << newStackSize << " bytes" << std::endl;
int sqrt_spp = static_cast<int>(sqrt(spp));
// Values
int num_pixels = width * height;
int tex_x, tex_y, tex_n;
unsigned char *tex_data_host = stbi_load("C:\\earth_diffuse.jpg", &tex_x, &tex_y, &tex_n, 0);
if (!tex_data_host) {
std::cerr << "Failed to load texture." << std::endl;
return;
}
unsigned char *tex_data;
checkCudaErrors(cudaMallocManaged(&tex_data, tex_x * tex_y * tex_n * sizeof(unsigned char)));
checkCudaErrors(cudaMemcpy(tex_data, tex_data_host, tex_x * tex_y * tex_n * sizeof(unsigned char), cudaMemcpyHostToDevice));
image_texture**texture;
checkCudaErrors(cudaMalloc((void **)&texture, sizeof(image_texture*)));
texture_init<<<1, 1>>>(tex_data, tex_x, tex_y, tex_n, texture);
// Allocating CUDA memory
color* image;
checkCudaErrors(cudaMallocManaged((void**)&image, width * height * sizeof(color)));
// Allocate random state
curandState *d_rand_state;
checkCudaErrors(cudaMalloc((void **)&d_rand_state, num_pixels * sizeof(curandState)));
curandState *d_rand_state2;
checkCudaErrors(cudaMalloc((void **)&d_rand_state2, 1 * sizeof(curandState)));
// Allocate 2nd random state to be initialized for the world creation
rand_init<<<1,1>>>(d_rand_state2);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
// Building the world
hittable_list **elist;
checkCudaErrors(cudaMalloc((void**)&elist, sizeof(hittable_list*)));
hittable_list **elights;
checkCudaErrors(cudaMalloc((void**)&elights, sizeof(hittable_list*)));
camera** cam;
checkCudaErrors(cudaMalloc((void**)&cam, sizeof(camera*)));
create_cornell_box<<<1, 1>>>(elist, elights, cam, width, height, spp, sqrt_spp, texture, d_rand_state2);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
dim3 blocks(width / tx+1, height / ty+1);
dim3 threads(tx, ty);
render_init<<<blocks, threads>>>(width, height, d_rand_state);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
render<<<blocks, threads>>>(image, width, height, spp, sqrt_spp, max_depth, elist, elights, cam, d_rand_state);
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaDeviceSynchronize());
uint8_t* imageHost = new uint8_t[width * height * 3 * sizeof(uint8_t)];
for (int j = height - 1; j >= 0; j--) {
for (int i = 0; i < width; i++) {
size_t pixel_index = j * width + i;
imageHost[(height - j - 1) * width * 3 + i * 3] = 255.99f * image[pixel_index].r();
imageHost[(height - j - 1) * width * 3 + i * 3 + 1] = 255.99f * image[pixel_index].g();
imageHost[(height - j - 1) * width * 3 + i * 3 + 2] = 255.99f * image[pixel_index].b();
}
}
stbi_write_png(filepath, width, height, 3, imageHost, width * 3);
// Clean up
checkCudaErrors(cudaDeviceSynchronize());
checkCudaErrors(cudaGetLastError());
checkCudaErrors(cudaFree(cam));
checkCudaErrors(cudaFree(elights));
checkCudaErrors(cudaFree(elist));
//checkCudaErrors(cudaFree(myscene));
checkCudaErrors(cudaFree(d_rand_state));
checkCudaErrors(cudaFree(image));
}
pdf.cuh
#pragma once
#include "../misc/vector3.cuh"
#include "../textures/image_texture.cuh"
#include "../misc/onb.cuh"
#include "../primitives/hittable.cuh"
// avoid circular dependency
struct scatter_record;
__host__ __device__ enum class pdfTypeID {
pdfBaseType = 0,
pdfCosine = 1,
pdfHittable = 2,
pdfImage = 3,
pdfMixture = 4,
pdfSphere = 5,
pdfAnisotropicPhong = 6
};
/// <summary>
/// Probability Distribution Function (henceforth PDF).
/// In short, a PDF is a continuous function that can be integrated over to determine how likely a result is over an integral.
/// Remember that the PDF is a probability function.
/// </summary>
class pdf
{
public:
__device__ virtual ~pdf() {}
__device__ virtual float value(const vector3& direction, curandState* local_rand_state) const = 0;
__device__ virtual vector3 generate(scatter_record& rec, curandState* local_rand_state) = 0;
};
cosine_pdf.cuh :
#pragma once
#include "pdf.cuh"
#include "../misc/constants.cuh"
#include "../misc/gpu_randomizer.cuh"
#include <cmath>
class cosine_pdf : public pdf
{
public:
__device__ cosine_pdf(const vector3& w)
{
m_uvw.build_from_w(w);
}
__device__ ~cosine_pdf() = default;
__device__ float value(const vector3& direction, curandState* local_rand_state) const override;
__device__ vector3 generate(scatter_record& rec, curandState* local_rand_state) override;
__host__ __device__ virtual pdfTypeID getTypeID() const { return pdfTypeID::pdfCosine; }
private:
onb m_uvw;
};
__device__ inline float cosine_pdf::value(const vector3& direction, curandState* local_rand_state) const
{
float cosine_theta = glm::dot(unit_vector(direction), m_uvw.w());
return ffmax(0.0f, cosine_theta / M_PI);
}
__device__ inline vector3 cosine_pdf::generate(scatter_record& rec, curandState* local_rand_state)
{
return m_uvw.local(random_cosine_direction(local_rand_state));
}
mixture_pdf.cuh :
#pragma once
#include "pdf.cuh"
#include "../misc/constants.cuh"
#include "../misc/gpu_randomizer.cuh"
// https://github.com/Drummersbrother/raytracing-in-one-weekend/blob/90b1d3d7ce7f6f9244bcb925c77baed4e9d51705/pdf.h
class mixture_pdf : public pdf
{
public:
__device__ mixture_pdf() : proportion(0.5f) { p0 = nullptr; p1 = nullptr; }
__device__ mixture_pdf(pdf* _p0, pdf* _p1) : proportion(0.5f) { p0 = _p0; p1 = _p1; }
__device__ mixture_pdf(pdf* _p0, pdf* _p1, float _prop) : proportion(_prop) { p0 = _p0; p1 = _p1; }
__device__ ~mixture_pdf();
__device__ float value(const vector3& direction, curandState* local_rand_state) const override;
__device__ vector3 generate(scatter_record& rec, curandState* local_rand_state) override;
__host__ __device__ virtual pdfTypeID getTypeID() const { return pdfTypeID::pdfMixture; }
private:
float proportion = 0.0f;
pdf* p0 = nullptr;
pdf* p1 = nullptr;
};
__device__ inline float mixture_pdf::value(const vector3& direction, curandState* local_rand_state) const
{
return proportion*(p0->value(direction, local_rand_state)) + (1.0f - proportion) * (p1->value(direction, local_rand_state));
}
__device__ inline vector3 mixture_pdf::generate(scatter_record& rec, curandState* local_rand_state)
{
if (get_real(local_rand_state) < proportion)
{
auto v0 = p0->generate(rec, local_rand_state);
printf("mixture_pdf::return p[0]->generate %f %f %f\n", v0.x, v0.y, v0.z);
return v0;
}
else
{
auto v1 = p1->generate(rec, local_rand_state);
printf("mixture_pdf::return p[0]->generate %f %f %f\n", v1.x, v1.y, v1.z);
return v1;
}
}
__device__ inline mixture_pdf::~mixture_pdf()
{
if (p0) {
delete p0;
p0 = nullptr;
}
if (p1) {
delete p1;
p1 = nullptr;
}
}