How to implement mean and standard deviation Image processing

Hi all,

I need to calculate the mean and standard deviation of the lines of a matrix (image) of bytes (0 - 255 values).

colums = x

lines = y

The code to calculate the mean is here:

[codebox]extern “C” int CalcThreasholds(unsigned int numLines, unsigned int pixelsPerLine)

{

cudaError_t cerror;

cudaMemcpy2D(f_d_threasholdslocal, pitchThreasholds, f_d_lines, pitchLines, pixelsPerLine*sizeof(CFPTYPE), numLines, cudaMemcpyDeviceToDevice);

for (unsigned int stride=2;(stride/2)<pixelsPerLine;stride*=2)

{

	CalcDims(ceil(pixelsPerLine/(float)stride), numLines, &dimBlocksLines, &dimGridLines);

	CalcThreasholdsGPUVR<<<dimGridLines, dimBlocksLines>>>(stride, pixelsPerLine, pitchThreasholds, f_d_threasholdslocal, f_d_threasholds_SumQ, f_d_threasholds);

	cudaThreadSynchronize();

	if ((cerror = cudaGetLastError()))

		return cerror;

}[/codebox]

[codebox]global

void CalcThreasholdsGPUVR(unsigned int stride, unsigned int pixelsPerLine, size_t pitchTField, CFPTYPE *f_d_threasholdsField, CFPTYPE *f_d_threasholds_SumQ,CFPTYPE *f_d_threasholds)

{

unsigned int x = blockIdx.x*blockDim.x*stride + threadIdx.x*stride;

unsigned int y = blockIdx.y*blockDim.y + threadIdx.y;

if (((x%stride)==0) && ((x+stride/2)<pixelsPerLine))

{

	((float*)((char*)f_d_threasholdsField+y*pitchTField))[x] += ((float*)((char*)f_d_threasholdsField+y*pitchTField))[x+stri

de/2];

}

__syncthreads();

if ((stride*2)>(pixelsPerLine*2))

{

	f_d_threasholds[y] = ((float*)((char*)f_d_threasholdsField+y*pitchTField))[0] / (float)pixelsPerLine;

}

}[/codebox]

It works fine, but I need to calculate the standard deviation too, the equation is:

StdDev = SquareRoot( ( SumQ - (mean*mean)/pixelsPerLine ) ) / (1-pixelsPerLine ) External Image

SumQ += x*x (pixel x of line y)

Any idea? Thanks