Hi,
For some reason when I run the attached code in my jetson AGX xavier, I get the following error:
GPUassert: an illegal memory access was encountered /home/folder/test.cu 55
Can someone please help me figure out what is wrong with my memory access pattern? The size of my pointcloud is 14316 and it is initialized and filled before I enter this function.
Here is the code:
#include <iostream>
#include <math.h>
#define PCL_NO_PRECOMPILE
#include <pcl/point_cloud.h>
#include <pcl/point_types.h>
#define gpuErrchk(ans) { gpuAssert((ans), __FILE__, __LINE__); }
inline void gpuAssert(cudaError_t code, const char *file, int line, bool abort=true)
{
if (code != cudaSuccess)
{
fprintf(stderr,"GPUassert: %s %s %d\n", cudaGetErrorString(code), file, line);
if (abort) exit(code);
}
}
__global__
void pointCloud2Float3(const int nfloat3, const float* point, float3* point3)
{
int idx = blockIdx.x * blockDim.x + threadIdx.x;
if(idx < nfloat3)
{
point3[idx] = make_float3(point[4*idx], point[4*idx+1], point[4*idx+2]);
}
__syncthreads();
}
void test(pcl::PointCloud<pcl::PointXYZ>::Ptr target_cloud)
{
int max_iterations_ = 100;
int nr_samples_ = 4;
// Allocate Unified Memory – accessible from CPU or GPU
float* points = NULL;
gpuErrchk( cudaMallocManaged(&points, target_cloud->points.size()*sizeof(float)* 4) );
points = (float *)target_cloud->points.data();
float3* points3 = NULL;
gpuErrchk( cudaMallocManaged(&points3, target_cloud->points.size()*sizeof(float3)) );
// Define kernel parameters
const int gridSize = 1024;
const int blocks = ceil(static_cast<float>(target_cloud->points.size())/static_cast<float>(gridSize));
const int nfloat3 = static_cast<int>(target_cloud->points.size());
pointCloud2Float3<<<gridSize, blocks>>>(nfloat3, points, points3);
gpuErrchk( cudaPeekAtLastError() );
// Wait for GPU to finish before accessing on host
gpuErrchk( cudaDeviceSynchronize() );
cudaFree(points);
cudaFree(points3);
}