The following code starts to build kd-tree, but on the lines marked by exclimation symbol (on either of the two - try to choose by #if 0/#if 1) execution failed with an error message:
CUDA error 98 [C:\Program Files\NVIDIA GPU Computing Toolkit\CUDA\v10.1\include\thrust/system/cuda/detail/parallel_for.h, 143]: invalid device function
Also message box said “abort() called”.
The code itself:
__forceinline__ __device__
float min(float a, float b, float c)
{
return fminf(fminf(a, b), c);
}
__forceinline__ __device__
float max(float a, float b, float c)
{
return fmaxf(fmaxf(a, b), c);
}
using Vertex = float3;
struct Triangle { Vector A, B, C; };
struct AABB { Vertex min, max; };
void KdTreeBuilderPrivate::build(const Triangle * t, size_t triangleCount)
{
using U = unsigned int;
//auto p = thrust::cuda::par.on(stream);
auto p = thrust::device;
thrust::device_vector< Triangle > triangles{t + 0, t + triangleCount};
thrust::device_vector< Vertex > aabb{triangleCount + triangleCount};
{
auto even = thrust::make_transform_iterator(thrust::counting_iterator< U >(0), [] __device__ (U i) -> U { return i + i; });
{
auto minbb = [] __device__ (const Triangle & t) -> Vertex { return {min(t.A.x, t.B.x, t.C.x), min(t.A.y, t.B.y, t.C.y), min(t.A.z, t.B.z, t.C.z)}; };
auto dest = thrust::make_permutation_iterator(aabb.begin(), even);
thrust::transform(p, triangles.cbegin(), triangles.cend(), dest, minbb);
}
{
auto maxbb = [] __device__ (const Triangle & t) -> Vertex { return {max(t.A.x, t.B.x, t.C.x), max(t.A.y, t.B.y, t.C.y), max(t.A.z, t.B.z, t.C.z)}; };
auto dest = thrust::make_permutation_iterator(thrust::next(aabb.begin()), even); // odd
thrust::transform(p, triangles.cbegin(), triangles.cend(), dest, maxbb);
}
}
thrust::device_vector< U > X{aabb.size()};
{
auto halves = [] __device__ (U i) { return i / 2; };
auto bb = thrust::make_transform_iterator(thrust::make_counting_iterator< U >(0), halves);
#if 1
thrust::copy_n(p, bb, X.size(), X.begin()); // !
#else
X.assign(bb, thrust::next(bb, X.size())); // !
#endif
}
auto Y = X, Z = Y;
{
auto xless = [] __device__ (const Vertex & l, const Vertex & r) -> bool { return l.x < r.x; };
auto aabbCopy = aabb;
thrust::stable_sort_by_key(p, aabbCopy.begin(), aabbCopy.end(), X.begin(), xless);
auto yless = [] __device__ (const Vertex & l, const Vertex & r) -> bool { return l.y < r.y; };
aabbCopy = aabb;
thrust::stable_sort_by_key(p, aabbCopy.begin(), aabbCopy.end(), Y.begin(), yless);
auto zless = [] __device__ (const Vertex & l, const Vertex & r) -> bool { return l.z < r.z; };
aabbCopy = aabb;
thrust::stable_sort_by_key(p, aabbCopy.begin(), aabbCopy.end(), Z.begin(), zless);
}
// ...
}
My guess is that the code, generated for bb iterator, is somehow broken.
How to fix the error?