Hi,
i have writte a rotation for 3d volumes of 646464 up to 512512512 float volumes. so my problem now is that the interpolation takes me “hours”. it is a simple tri-linear interpolation and i think the problem in is that there would be a massiv amount of reads from gpu memory for that. so my question is now. is there any better way to do an interpolation like that.
__device__ real InterpolatedValueL(float *image, long sx, long sy, long sz, real r_x, real r_y, real r_z)
{
long sxyL = sx * sy;
/*----------------*/
float img0, img1, img2, img3;
float img4, img5, img6, img7;
float ipx = floor(r_x);
float vx2 = r_x - ipx;
float vx1 = 1.0 - vx2;
float ipy = floor(r_y);
float vy2 = r_y - ipy;
float vy1 = 1.0 - vy2;
float ipz = floor(r_z);
float vz2 = r_z - ipz;
float vz1 = 1.0 - vz2;
long iindex = floor(ipx + ipy * sxy[0] + ipz * sxy[2]);
img0 = img1 = img2 = img3 = img4 = img5 = img6 = 1;
if (ipx >= sxy2[0])
{
img0 = img2 = img4 = img5 = 0;
}
if (ipy >= sxy2[1])
{
img1 = img2 = img5 = img6 = 0;
}
if (ipz >= sxy2[2])
{
img3 = img4 = img5 = img6 = 0;
}
if (img0 == 1)
img0 = image[iindex + 1];
if (img1 == 1)
img1 = image[iindex + sx];
if (img2 == 1)
img2 = image[iindex + sx+1];
if (img3 == 1)
img3 = image[iindex + sxyL];
if (img4 == 1)
img4 = image[iindex + sxyL + 1];
if (img5 == 1)
img5 = image[iindex + sx + sxyL];
if (img6 == 1)
img6 = image[iindex + sx + sxyL + 1];
img7 = image[iindex];
img0 = img7 + (img0 - img7) * vx2;
img1 = img1 * vx1 + img2 * vx2;
img3 = img3 * vx1 + img4 * vx2;
img5 = img5 * vx1 + img6 * vx2;
return img0 * vy1 + img1 * vy2) * vz1 + (img3 * vy1 + img5 * vy2) * vz2;
}
thx for any idea
cheers