Textures giving corrupt results


I am trying to use textures in my neighbor search algorithm.

At the moment, the textures are giving meaningless results. The standard algorithm looks like:

Attributes(Global) Subroutine BucketSearch & (x,hsml,CellList,nCell,CellIndex,NodesInCell,Neib,nTotal,maxIndex)

	Implicit None
	Integer, Shared:: i, Id, idx, idy, idz, j1, j2, j3, IdNeibCell
	Integer, Device:: j_neib, j4, k, d
	Integer, Value:: nTotal, maxIndex
	Integer, Device, Intent(IN):: CellList(maxIndex,NumCLRows), nCell(3), NodesInCell(maxIndex), CellIndex(nTotal,NumCIRows)  
	Integer, Device, Intent(OUT):: Neib(nTotal,nNeib_max)
	Real(fp_kind), Device:: dx(Dim), r, hsml_ave, d_influence, xi1, xi2, xi3, hsmli
	Real(fp_kind), Device, Intent(IN):: x(nTotal,Dim), hsml(nTotal)
	i = (blockIdx%x-1)*blockDim%x + threadIdx%x
	If (i >= 1 .and. i <= nTotal) Then
		Neib(i,2) = 0	
		Id = CellIndex(i,2)
		idx = CellIndex(i,3)
		idy = CellIndex(i,4)
		idz = CellIndex(i,5)
		xi1 = x(i,1)
		xi2 = x(i,2)
		xi3 = x(i,3)
		hsmli = hsml(i)
		Do j1 = (idx-1), (idx+1)		
			Do j2 = (idy-1), (idy+1)
				Do j3 = (idz-1), (idz+1)
					IdNeibCell = (j1-1) + (j2-1)*nCell(1) + (j3-1)*nCell(1)*nCell(2) + 1
					Do j4 = 2, NodesInCell(IdNeibCell) + 1
						j_neib = CellList(IdNeibCell,j4)
						hsml_ave = 0.5*(hsmli + hsml(j_neib))
						d_influence = 2.0*hsml_ave		
						dx(1) = xi1 - x(j_neib,1)
						dx(2) = xi2 - x(j_neib,2)
						dx(3) = xi3 - x(j_neib,3)
						r = sqrt( (dx(1)*dx(1)) + (dx(2)*dx(2)) + (dx(3)*dx(3)) )
						If (i .ne. j_neib) Then		
							If (r <= d_influence) Then
								Neib(i,1) = i
								Neib(i,2) = Neib(i,2) + 1
								k = Neib(i,2) + 2
								Neib(i,k) = j_neib				
							End If
						End If
					End Do
				End Do
			End Do
		End Do	
	End If
End Subroutine BucketSearch

Access to the hsml and x arrays at j_neib is especially not coalesced. So I thought that I could use textures for these:

Attributes(Global) Subroutine BucketSearch &

	Implicit None
	Integer, Shared:: i, Id, idx, idy, idz, j1, j2, j3, IdNeibCell
	Integer, Device:: j_act, j_neib, j4, k, d
	Integer, Value:: nTotal, maxIndex
	Integer, Device, Intent(IN):: CellList(maxIndex,NumCLRows), nCell(3), NodesInCell(maxIndex), CellIndex(nTotal,NumCIRows)  
	Integer, Device, Intent(OUT):: Neib(nTotal,nNeib_max)
	Real(fp_kind), Device:: dx(Dim), r, hsml_ave, d_influence
	Real(fp_kind), Device, Intent(IN):: x(nTotal,Dim), hsml(nTotal)
	i = (blockIdx%x-1)*blockDim%x + threadIdx%x
	If (i >= 1 .and. i <= nTotal) Then
		Neib(i,2) = 0	
		Id = CellIndex(i,2)
		idx = CellIndex(i,3)
		idy = CellIndex(i,4)
		idz = CellIndex(i,5)
		Do j1 = (idx-1), (idx+1)		
			Do j2 = (idy-1), (idy+1)
				Do j3 = (idz-1), (idz+1)
					IdNeibCell = (j1-1) + (j2-1)*nCell(1) + (j3-1)*nCell(1)*nCell(2) + 1
					Do j4 = 2, NodesInCell(IdNeibCell) + 1
						j_neib = CellList(IdNeibCell,j4)
						hsml_ave = 0.5*(hsml(i) + hsml_t(j_neib))
						d_influence = 2.0*hsml_ave		
						dx(1) = x(i,1) - x_t(j_neib,1)
						dx(2) = x(i,2) - x_t(j_neib,2)
						dx(3) = x(i,3) - x_t(j_neib,3)
						r = sqrt( (dx(1)*dx(1)) + (dx(2)*dx(2)) + (dx(3)*dx(3)) )
						If (i .ne. j_neib) Then		
							If (r <= d_influence) Then
								Neib(i,1) = i
								Neib(i,2) = Neib(i,2) + 1
								k = Neib(i,2) + 2
								Neib(i,k) = j_neib				
							End If
						End If
					End Do
				End Do
			End Do
		End Do	
	End If
End Subroutine BucketSearch

Can you see a reason why this doesn’t work. The standard algorithm without textures works perfectly fine. I have prepared a (as simple as possible) reproducing case for the standard and texture approaches. Let me know if you want me to send them to you.

I have tried simple texture test cases and they work fine. There is something about this search algorithm that is giving me trouble.

Thank you for any help,


By the way, x_t is the texture version of x and hsml_t is the texture version of hsml.


Hi Kirk,

Please do send me the example since I don’t see anything obvious.


Hi Mat,

I have emailed you the projects.

Have a good one,
