Scalar Product (dot product) with Atomic Operations How to implement a DotProduct all in the Kernel

here is a matlab version it is the pre conditioned version:

[codebox]

function [relErrAr,dv] = SolvePCG(smtA,smtP,smtPinv,b,iMaxIter)

t2 = smtP*r;

delta0 = dot(t2,r);

bnorm = dot(b,B);

c = smtPinv*r;

deltaN = dot(r,c);

iIter = 0;

deltaMin = deltaN;

relErr = 1;

n = size(smtA)(1,1);

dv = zeros(n,1);

relErrAr = zeros(iMaxIter,1);

while (relErr > 1e-5 && iIter < iMaxIter)

    q = smtA*c;

   dotp = dot(c,q);

    alpha = deltaN/dotp;

    c = full( c);

    t1 = full(c*alpha);

    dv = dv + t1;

    q = full(q) * alpha;

    r = full( r ) -q;

    t1 = full(smtPinv*r);

    deltaO = deltaN;

    deltaN = dot(r,t1);

    if (deltaN < 0)

        break;

    end

    c = c*(deltaN/deltaO) + t1;

    iIter =iIter+1;

    relErr = sqrt(deltaN/bnorm)

end

[/codebox]