Hello All,
I have an issue regrading Shuffle instruction in Kepler. I have used __shfl_down(var,1,32) and i am getting wrong value at laneID=30. It should return a correct value but it always returns value=0. I have also implement same functionality with __shfl(var,(laneid+1)) but i got same results.
(1) Here is my kernel with __shfl() instruction.
__global__ void SHUFFLE(unsigned char* input, unsigned char* output)
{
int pos = threadIdx.x + blockIdx.x * blockDim.x;
int x = pos % WIDTH;
int y = pos / HEIGHT;
int input0 = 255;
int input1 = 1;
// laneID is Thread's index within a warp
int laneID = threadIdx.x % 32;
input0 = input[pos];
if(laneID == 31)
{
input1 = input[pos+1];
}
else
{
input1 = __shfl(input0,(laneID+1));
if (input1 == 1)
{
printf("___GOT NO VALUE__ THREAD EXITED...!!!\n");
input1 = input[pos+1];
}
}
if( x < WIDTH && y < HEIGHT)
{
if ( (laneID <= 31) && (blockIdx.x == 0) && (threadIdx.x <= 31) )
printf("LANEID = %d AT pos=%d input0=%d input1=%d \n",laneID,pos,input0,input1);
if (pos < 33)
printf("pos=%d, Value=%d \n",pos,input[pos]);
}
}
(2) Here is my kernel with __shfl_down() instruction.
__global__ void shffle_kernel(unsigned char* input, unsigned char* output)
{
int pos = threadIdx.x + blockIdx.x * blockDim.x;
int x = pos % WIDTH;
int y = pos / HEIGHT;
int input0 = 255;
int input1 = 1;
// laneID is Thread's index within a warp
int laneID = threadIdx.x % 32;
input0 = input[pos];
if(laneID == 31)
{
input1 = input[pos+1];
}
else
{
input1 = __shfl_down(input0,1,32);
if (input1 == 1)
{
printf("___GOT NO VALUE__ THREAD EXITED...!!!\n");
input1 = input[pos+1];
}
}
if( x < WIDTH && y < HEIGHT)
{
if ( (laneID <= 31) && (blockIdx.x == 0) && (threadIdx.x <= 31) )
printf("LANEID = %d AT pos=%d input0=%d input1=%d \n",laneID,pos,input0,input1);
if (pos < 33)
printf("pos=%d, Value=%d \n",pos,input[pos]);
}
}
Here is output of 2nd kernel which is __shfl_down()
LANEID = 0 AT pos=0 input0=156 input1=155
LANEID = 1 AT pos=1 input0=155 input1=154
LANEID = 2 AT pos=2 input0=154 input1=153
LANEID = 3 AT pos=3 input0=153 input1=156
LANEID = 4 AT pos=4 input0=156 input1=150
LANEID = 5 AT pos=5 input0=150 input1=156
LANEID = 6 AT pos=6 input0=156 input1=155
LANEID = 7 AT pos=7 input0=155 input1=158
LANEID = 8 AT pos=8 input0=158 input1=154
LANEID = 9 AT pos=9 input0=154 input1=156
LANEID = 10 AT pos=10 input0=156 input1=154
LANEID = 11 AT pos=11 input0=154 input1=148
LANEID = 12 AT pos=12 input0=148 input1=158
LANEID = 13 AT pos=13 input0=158 input1=152
LANEID = 14 AT pos=14 input0=152 input1=148
LANEID = 15 AT pos=15 input0=148 input1=152
LANEID = 16 AT pos=16 input0=152 input1=147
LANEID = 17 AT pos=17 input0=147 input1=156
LANEID = 18 AT pos=18 input0=156 input1=154
LANEID = 19 AT pos=19 input0=154 input1=147
LANEID = 20 AT pos=20 input0=147 input1=151
LANEID = 21 AT pos=21 input0=151 input1=151
LANEID = 22 AT pos=22 input0=151 input1=151
LANEID = 23 AT pos=23 input0=151 input1=149
LANEID = 24 AT pos=24 input0=149 input1=151
LANEID = 25 AT pos=25 input0=151 input1=149
LANEID = 26 AT pos=26 input0=149 input1=147
LANEID = 27 AT pos=27 input0=147 input1=149
LANEID = 28 AT pos=28 input0=149 input1=150
LANEID = 29 AT pos=29 input0=150 input1=149
LANEID = 30 AT pos=30 input0=149 input1=0
LANEID = 31 AT pos=31 input0=148 input1=147
If somebody has answer kindly post it. This is my first post so if somethings not clear then inform me. Thanks in advance.