Hello,
I have an octree which I store in an std::vector on the cpu. I store the tree in an std::vector because building a const struct on the cpu would cause problems. I would like to copy the tree to the gpu to a struct with fixed N in which N is the maximum number of elements in the tree nodes.
I tryed some approaches but nothing seemed to work.
Thanks in advance,
Rafael S.
Dynamic CPU Struct:
struct OctreeNode {int64_t index; Point Center; double HalfWidth[3]; int64_t ChildIndex[8]; int Level; int TriangleCount; double* p1x; double* p1y; double* p1z;
double* p2x; double* p2y; double* p2z; double* p3x; double* p3y;
double* p3z; int* Material; int* Body; int64_t* triangle_id;}
Const GPU Struct:
const int N = 680235;
struct OctreeNodeGPUStatic {int64_t index; Point Center; double HalfWidth[3]; int64_t ChildIndex[8]; int Level;
int TriangleCount; double p1x[N]; double p1y[N]; double p1z[N];
double p2x[N]; double p2y[N]; double p2z[N]; double p3x[N]; double p3y[N]; double p3z[N]; int Material[N]; int Body[N]; int64_t triangle_id[N];};
Code for copying the octree to GPU:
__host__ void AllocateAndLaunchOctreeStatic(OctreeNodeGPUStatic* &d_nodes, std::vector<OctreeNode>& h_nodes) {
// Allocate memory for the entire OctreeNode array on the device
int numNodes = h_nodes.size();
CUDA_CHECK(cudaMalloc(&d_nodes, numNodes * sizeof(OctreeNodeGPUStatic)));
for (int j = 0; j < numNodes; ++j) {
// Copy arrays like p1x, p1y, p1z only if TriangleCount > 0
if (h_nodes[j].TriangleCount > 0) {
for (int i = 0; i < h_nodes[j].TriangleCount; ++i) {
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1x[i], &h_nodes[j].p1x[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1y[i], &h_nodes[j].p1y[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p1z[i], &h_nodes[j].p1z[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2x[i], &h_nodes[j].p2x[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2y[i], &h_nodes[j].p2y[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p2z[i], &h_nodes[j].p2z[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3x[i], &h_nodes[j].p3x[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3y[i], &h_nodes[j].p3y[i], sizeof(double), cudaMemcpyHostToDevice));
CUDA_CHECK(cudaMemcpy(&d_nodes[j].p3z[i], &h_nodes[j].p3z[i], sizeof(double), cudaMemcpyHostToDevice));
}
}
}}
I also tryed:
__host__ void AllocateAndLaunchOctreeStatic(OctreeNodeGPUStatic*
&d_nodes, std::vector<OctreeNode>& h_nodes) {
int numNodes = h_nodes.size();
for (int j = 0; j < numNodes; ++j) {
if (h_nodes[j].TriangleCount > 0) {
for (int i = 0; i < h_nodes[j].TriangleCount; ++i) {
std::cerr << "Triangle Copy" << std::endl;
d_nodes[j].p1x[i] = h_nodes[j].p1x[i];
d_nodes[j].p1y[i] = h_nodes[j].p1y[i];
d_nodes[j].p1z[i] = h_nodes[j].p1z[i];
d_nodes[j].p2x[i] = h_nodes[j].p2x[i];
d_nodes[j].p2y[i] = h_nodes[j].p2y[i];
d_nodes[j].p2z[i] = h_nodes[j].p2z[i];
d_nodes[j].p3x[i] = h_nodes[j].p3x[i];
d_nodes[j].p3y[i] = h_nodes[j].p3y[i];
d_nodes[j].p3z[i] = h_nodes[j].p3z[i];
d_nodes[j].Material[i] = h_nodes[j].Material[i];
d_nodes[j].Body[i] = h_nodes[j].Body[i];
d_nodes[j].triangle_id[i] = h_nodes[j].triangle_id[i];}}}