Hi,
I am trying to use textures in my neighbor search algorithm.
At the moment, the textures are giving meaningless results. The standard algorithm looks like:
Attributes(Global) Subroutine BucketSearch & (x,hsml,CellList,nCell,CellIndex,NodesInCell,Neib,nTotal,maxIndex)
Implicit None
Integer, Shared:: i, Id, idx, idy, idz, j1, j2, j3, IdNeibCell
Integer, Device:: j_neib, j4, k, d
Integer, Value:: nTotal, maxIndex
Integer, Device, Intent(IN):: CellList(maxIndex,NumCLRows), nCell(3), NodesInCell(maxIndex), CellIndex(nTotal,NumCIRows)
Integer, Device, Intent(OUT):: Neib(nTotal,nNeib_max)
Real(fp_kind), Device:: dx(Dim), r, hsml_ave, d_influence, xi1, xi2, xi3, hsmli
Real(fp_kind), Device, Intent(IN):: x(nTotal,Dim), hsml(nTotal)
i = (blockIdx%x-1)*blockDim%x + threadIdx%x
If (i >= 1 .and. i <= nTotal) Then
Neib(i,2) = 0
Id = CellIndex(i,2)
idx = CellIndex(i,3)
idy = CellIndex(i,4)
idz = CellIndex(i,5)
xi1 = x(i,1)
xi2 = x(i,2)
xi3 = x(i,3)
hsmli = hsml(i)
Do j1 = (idx-1), (idx+1)
Do j2 = (idy-1), (idy+1)
Do j3 = (idz-1), (idz+1)
IdNeibCell = (j1-1) + (j2-1)*nCell(1) + (j3-1)*nCell(1)*nCell(2) + 1
Do j4 = 2, NodesInCell(IdNeibCell) + 1
j_neib = CellList(IdNeibCell,j4)
hsml_ave = 0.5*(hsmli + hsml(j_neib))
d_influence = 2.0*hsml_ave
dx(1) = xi1 - x(j_neib,1)
dx(2) = xi2 - x(j_neib,2)
dx(3) = xi3 - x(j_neib,3)
r = sqrt( (dx(1)*dx(1)) + (dx(2)*dx(2)) + (dx(3)*dx(3)) )
If (i .ne. j_neib) Then
If (r <= d_influence) Then
Neib(i,1) = i
Neib(i,2) = Neib(i,2) + 1
k = Neib(i,2) + 2
Neib(i,k) = j_neib
End If
End If
End Do
End Do
End Do
End Do
End If
End Subroutine BucketSearch
Access to the hsml and x arrays at j_neib is especially not coalesced. So I thought that I could use textures for these:
Attributes(Global) Subroutine BucketSearch &
(x,hsml,CellList,nCell,CellIndex,NodesInCell,Neib,nTotal,maxIndex)
Implicit None
Integer, Shared:: i, Id, idx, idy, idz, j1, j2, j3, IdNeibCell
Integer, Device:: j_act, j_neib, j4, k, d
Integer, Value:: nTotal, maxIndex
Integer, Device, Intent(IN):: CellList(maxIndex,NumCLRows), nCell(3), NodesInCell(maxIndex), CellIndex(nTotal,NumCIRows)
Integer, Device, Intent(OUT):: Neib(nTotal,nNeib_max)
Real(fp_kind), Device:: dx(Dim), r, hsml_ave, d_influence
Real(fp_kind), Device, Intent(IN):: x(nTotal,Dim), hsml(nTotal)
i = (blockIdx%x-1)*blockDim%x + threadIdx%x
If (i >= 1 .and. i <= nTotal) Then
Neib(i,2) = 0
Id = CellIndex(i,2)
idx = CellIndex(i,3)
idy = CellIndex(i,4)
idz = CellIndex(i,5)
Do j1 = (idx-1), (idx+1)
Do j2 = (idy-1), (idy+1)
Do j3 = (idz-1), (idz+1)
IdNeibCell = (j1-1) + (j2-1)*nCell(1) + (j3-1)*nCell(1)*nCell(2) + 1
Do j4 = 2, NodesInCell(IdNeibCell) + 1
j_neib = CellList(IdNeibCell,j4)
hsml_ave = 0.5*(hsml(i) + hsml_t(j_neib))
d_influence = 2.0*hsml_ave
dx(1) = x(i,1) - x_t(j_neib,1)
dx(2) = x(i,2) - x_t(j_neib,2)
dx(3) = x(i,3) - x_t(j_neib,3)
r = sqrt( (dx(1)*dx(1)) + (dx(2)*dx(2)) + (dx(3)*dx(3)) )
If (i .ne. j_neib) Then
If (r <= d_influence) Then
Neib(i,1) = i
Neib(i,2) = Neib(i,2) + 1
k = Neib(i,2) + 2
Neib(i,k) = j_neib
End If
End If
End Do
End Do
End Do
End Do
End If
End Subroutine BucketSearch
Can you see a reason why this doesn’t work. The standard algorithm without textures works perfectly fine. I have prepared a (as simple as possible) reproducing case for the standard and texture approaches. Let me know if you want me to send them to you.
I have tried simple texture test cases and they work fine. There is something about this search algorithm that is giving me trouble.
Thank you for any help,
Kirk