How to use atomicCAS() to implement atomicAdd(short)? Trouble adapting programming guide example

Great idea, Sylvain! And it works for signed short, too:

__device__ short atomicAddShort(short* address, short val)

{

    unsigned int *base_address = (unsigned int *)((size_t)address & ~2);

    unsigned int long_val = ((size_t)address & 2) ? ((unsigned int)val << 16) : (unsigned short)val;

unsigned int long_old = atomicAdd(base_address, long_val);

    if((size_t)address & 2) {

        return (short)(long_old >> 16);

    } else {

        unsigned int overflow = ((long_old & 0xffff) + long_val) & 0xffff0000;

        if (overflow)

            atomicSub(base_address, overflow);

        return (short)(long_old & 0xffff);

    }

}