Help with indexing to calc the AVG sum of a 3d array

I’ve originally wrote this demo to simulate the changes in a 3D volume (as crude as it is) in FreeBasic as it was quick and dirty.

I’ve been looking at converting it to CUDA in Visual C++.

The problem is I’m not referencing the 3D data correctly and not sure if it’s the way your supposed to used the blockIdx and threadIdx,

If someone could have a quick look over this piece of code and maybe point out where my indexing is failing.

To note: each point in the 3D array simply averages the values of the imitate surrounding points (26 of them) and then simply adjusts the central point by half the average of the surrounding points.

It’s not complicated it’s just I’m going around in circles.

Any help is appricated

[codebox]// example1.cpp : Defines the entry point for the console application.

//

#include “stdafx.h”

#include <stdio.h>

#include <cuda.h>

global void calc_avg(float *datain, float *dataout,int N, int sN)

{

int idx = blockIdx.x * blockDim.x + threadIdx.x;

float avg;

int x,y,z,bdx;

//

x=0; y=0; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg = datain[idx+bdx];

x=1; y=0; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=0; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=1; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=1; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=1; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=2; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=2; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=2; z=0; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=0; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=0; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=0; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=1; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

//x=1; y=1; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=1; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=2; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=2; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=2; z=1; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=0; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=0; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=0; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=1; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=1; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=1; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=0; y=2; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=1; y=2; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

x=2; y=2; z=2; bdx=((sN*(y-1))+(x-1))+((z-1)*sN); if(idx+bdx<0) bdx=N+bdx; if(idx+bdx>N) bdx=(idx+bdx)-N; avg += datain[idx+bdx];

dataout[idx]=datain[idx]-((avg/26)/2);

}

// main routine that executes on the host

int main(void)

{

const int dimsize = 64;

float *a_h, *a_d, *b_d;  // Pointer to host & device arrays

size_t N = (dimsize*dimsize*dimsize) * sizeof(float);



a_h = (float *)malloc(N);        // Allocate array on host

cudaMalloc((void **) &a_d, N);   // Allocate array on device

cudaMalloc((void **) &b_d, N);   // Allocate array on device

// Initialize host array and copy it to CUDA device

for (int z=0; z<dimsize; z++) 

	for (int y=0; y<dimsize; y++) 

		for (int x=0; x<dimsize; x++) 

			a_h[(z*(dimsize*2))+(((y*dimsize)*x)+x)] = ((float)rand() / ( (float)(RAND_MAX)+(float)(1) ));

			

cudaMemcpy(a_d, a_h, N, cudaMemcpyHostToDevice);



// Do calculation on device:

int block_size = 4;

int n_blocks = N/block_size + (N%block_size == 0 ? 0:1);

calc_avg <<< n_blocks, block_size >>> (a_d, b_d, N, dimsize);

// Retrieve result from device and store it in host array

cudaMemcpy(a_h, b_d, N, cudaMemcpyDeviceToHost);



// Print results

for (int i=0; i<10; i++) printf("%d %f\n", i, a_h[i]);

getchar();



// Cleanup

free(a_h); cudaFree(a_d); cudaFree(b_d); 

}[/codebox]