Right/left shift operations for int4

Hello,

I was trying to find any ready to use functions which implements right/left bit shift for CUDA’s type int4. Now as I see there are no such functions in SDK, so I had to implement it by myself, please take a look at right shift operator:

__device__ int4 right_shift_int4(int4 a_value, unsigned int a_num)

{

    int w_h = a_value.w >> (a_num % 32);

int z_h = a_value.z >> (a_num % 32);

    int z_l = a_value.z << (32 - (a_num % 32));

int y_h = a_value.y >> (a_num % 32);

    int y_l = a_value.y << (32 - (a_num % 32));

int x_h = a_value.x >> (a_num % 32);

    int x_l = a_value.x << (32 - (a_num %32));

if (a_num < 32) {

        a_value = make_int4(x_h, y_h | x_l, z_h | y_l, w_h | z_l);

    }

    else if (a_num >= 32 && a_num < 64) {

        a_value = make_int4(0, x_h, y_h | x_l, z_h | y_l);

    }

    else if (a_num >= 64 && a_num < 96) {

        a_value = make_int4(0, 0, x_h, y_h | x_l);

    }

    else {

        a_value = make_int4(0, 0, 0, x_h);

    }

return a_value;

}

The question is this effective or there is should be other ways to implement these for int4 (for example to avoid thread divergence)? Thank you.