Two problems with PxParticleFluid

I try to draw water with this algorithm.

However, I have two problems.

The first problem is with CUDA Graphics Interop. Here is the code:

__global__ void copyParticlesPos(float3* particlesD3D, float3* particlesPhysX, int stride)
{
	int thread = blockIdx.x * blockDim.x + threadIdx.x;

	char* pointerToParticles = (char*)particlesPhysX;
	pointerToParticles += thread*stride;

	#pragma unroll
	for(int i = 0; i < 16; i++)
	{
		particlesD3D[thread + i] = *((float3*)pointerToParticles);
		pointerToParticles += stride;
	}
}

void PhysXSimulator::Update(cudaGraphicsResource* particles)
{
	mScene->simulate(simulationTimeStep); 
	mScene->fetchResults(true);

	float3 *mappedRes;
	size_t mappedSize;

	auto physxdata = mFluid->lockParticleFluidReadData(physx::PxDataAccessFlag::eDEVICE);

	cudaError Err = cudaGraphicsMapResources(1, &particles);
	if(Err != cudaSuccess)
	{
		WCHAR aaa[100];
		swprintf(aaa, L"Error code: %d", Err);
		//MessageBox(NULL, aaa, L"Error", MB_OK); //At second and next launches shows error code 4 or 33
	}

	cudaGraphicsResourceGetMappedPointer(reinterpret_cast<void**>(&mappedRes), &mappedSize, particles);
	
	int numParticlesPerBlock = 16;
	dim3 threadsPerBlock(4096 / numParticlesPerBlock);
	dim3 numBlocks(mappedSize / 4096);

	copyParticlesPos<<<numBlocks, threadsPerBlock>>>(mappedRes, (float3*)physxdata->positionBuffer.ptr(), physxdata->positionBuffer.stride());

	cudaDeviceSynchronize();

	cudaGraphicsUnmapResources(1, &particles);
	physxdata->unlock();
}

When Update() computes at first time, everything is ok. But at the second and every next time, the function lockParticleFluidReadData() returns cudaErrorLaunchFailure in debug build and cudaErrorInvalidResourceHandle in release build. It looks like resource is not being unmapped properly.

The second problem is strange fluid behavior. Here is the fluid parameters:

mFluid->setRestOffset(0.45f);
mFluid->setContactOffset(0.9f);
mFluid->setMaxMotionDistance(1.0f);
mFluid->setRestitution(0.3f);
mFluid->setDynamicFriction(0.001f);
mFluid->setStaticFriction(0.0f);
mFluid->setDamping(0.0f);
mFluid->setExternalAcceleration(physx::PxVec3(0.0f, -9.8f, 0.0f));
mFluid->setParticleBaseFlag(physx::PxParticleBaseFlag::eENABLED, true);
mFluid->setParticleBaseFlag(physx::PxParticleBaseFlag::eGPU, true);
mFluid->setStiffness(45.0f);
mFluid->setViscosity(150.0f);

The fluid does not leak and pool is not being filled by water, the particles just flock to walls. Here is view in PVD(4096 particles).

Khm, I still don’t get it.

Okay, now I understand that problem is in lockParticleFluidReadData. I can’t access that memory. But why? This is the output of CUDA-MEMCHECK:

========= Invalid global read of size 4
========= at 0x00000118 in C:/Users/Sixshaman/All/Projects/PhysX/PhysXWater/PhysXWaterGPU/CudaParticles.cu:20:copyParticlesPos(float3*, float3*, int)
========= by thread (0,0,0) in block (0,0,0)
========= Address 0x40920000 is out of bounds
========= Saved host backtrace up to driver entry point at kernel launch time
========= Host Frame:C:\WINDOWS\system32\nvcuda.dll (cuLaunchKernel + 0x13b) [0x1817b]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\cudart32_70.dll [0x2e26]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\cudart32_70.dll [0x2e7e]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\cudart32_70.dll (cudaLaunch + 0xd8) [0x213d8]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (cudaLaunch + 0xc) [0x5aedc]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (__device_stub__Z16copyParticlesPosP6float3S0_i + 0x50) [0x5ae40]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (copyParticlesPos + 0x14) [0x5add4]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (PhysXSimulator::Update + 0x179) [0x5acf9]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (PhysXWater::UpdateScene + 0x169) [0xcc1a9]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (D3DApplication::Run + 0xb8) [0xc54e8]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (WinMain + 0x7c) [0xc561c]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (__tmainCRTStartup + 0x23f) [0xe138f]
========= Host Frame:C:\Users\Sixshaman\All\Projects\PhysX\PhysXWater\Debug\PhysXWaterGPU.exe (WinMainCRTStartup + 0xd) [0xe113d]
========= Host Frame:C:\WINDOWS\SYSTEM32\KERNEL32.DLL (BaseThreadInitThunk + 0xe) [0x1919f]
========= Host Frame:C:\WINDOWS\SYSTEM32\ntdll.dll (RtlInitializeExceptionChain + 0x8f) [0x5b54f]
========= Host Frame:C:\WINDOWS\SYSTEM32\ntdll.dll (RtlInitializeExceptionChain + 0x5a) [0x5b51a]

That happens even if I try to run only one thread in only one block. Maybe I didn’t set some flag?

My GPU is GTX970.

Okay.

I’ve set this flag:
mFluid->setParticleReadDataFlag(physx::PxParticleReadDataFlag::ePOSITION_BUFFER, true);

But it still doesn’t work. I don’t get it. I can’t even find examples of GPU fluid simulation on PhysX.