this is my code
__global__ void CUDA_set_memory_plane2 (int16_t *addr,int c,uint32_t stride,uint32_t stride2,uint32_t plane_stride,uint32_t L)
{
unsigned int bx = blockIdx.x;
unsigned int by = (blockIdx.y)%L;
unsigned int tx = threadIdx.x;
unsigned int ty = threadIdx.y;
addr[ ((blockIdx.y)/L)*plane_stride + by*stride*stride2+bx*stride2+ty*stride+tx ]=c;
}
visual profiler report branch 76,divergent 3.
this is strange.Is this right?
this is my code
__global__ void CUDA_set_memory_plane2 (int16_t *addr,int c,uint32_t stride,uint32_t stride2,uint32_t plane_stride,uint32_t L)
{
unsigned int bx = blockIdx.x;
unsigned int by = (blockIdx.y)%L;
unsigned int tx = threadIdx.x;
unsigned int ty = threadIdx.y;
addr[ ((blockIdx.y)/L)*plane_stride + by*stride*stride2+bx*stride2+ty*stride+tx ]=c;
}
visual profiler report branch 76,divergent 3.
this is strange.Is this right?
tera
3
They probably result from the integer division and the modulo operation.
tera
4
They probably result from the integer division and the modulo operation.