Driver API - Normalized coordinate 2D texture Fetch returns invalid values

The texture is declared like this and is of size 4 x4

texture<uint8_t, 2, cudaReadModeElementType> refTex;

I set the following texture parameters

// set texture parameters
    CUtexref cu_texref;
    errorCode= (cuModuleGetTexRef(&cu_texref, cuModule, "refTex"));
    cuErr(errorCode, "Fetching reference to global texture failed");
    errorCode= (cuTexRefSetArray(cu_texref, cu_array, CU_TRSA_OVERRIDE_FORMAT));
    cuErr(errorCode, "Set CUDA array format failed");
    errorCode= (cuTexRefSetAddressMode(cu_texref, 0, CU_TR_ADDRESS_MODE_WRAP));
    cuErr(errorCode, "Set CUDA addressing mode failed");
    errorCode= (cuTexRefSetAddressMode(cu_texref, 1, CU_TR_ADDRESS_MODE_WRAP));
    cuErr(errorCode, "Set CUDA addressing mode failed");
    errorCode= (cuTexRefSetFilterMode(cu_texref, CU_TR_FILTER_MODE_POINT));
   // errorCode= (cuTexRefSetFilterMode(cu_texref, CU_TR_FILTER_MODE_LINEAR));
    cuErr(errorCode, "Set CUDA filtering mode failed");
    
  //if(NORMALIZED)
   errorCode= (cuTexRefSetFlags(cu_texref, CU_TRSF_READ_AS_INTEGER));
   cuErr(errorCode, "Set CUDA normalized coordinate addressing mode failed");
    errorCode= (cuTexRefSetFlags(cu_texref, CU_TRSF_NORMALIZED_COORDINATES));
 // else
  cuErr(errorCode, "Set CUDA normalized coordinate addressing mode failed");

errorCode= (cuTexRefSetFormat(cu_texref,   CU_AD_FORMAT_UNSIGNED_INT8, 1));
  cuErr(errorCode, "Set CUDA texture data type failed");

    CUfunction hfunc;
    //CUresult cuModuleGetFunction ( hfunc, CUmodule hmod, const char* name ) 
        errorCode = cuModuleGetFunction( &hfunc, cuModule, "Png_kernel" );
   cuErr(errorCode, "Get module functionPNG_TEXTURE failed");
	errorCode= cuParamSetTexRef(hfunc, CU_PARAM_TR_DEFAULT, cu_texref);
    cuErr(errorCode, "Bind CUDA array totexture failed");

When I use normalized coordinates the texture fetch gives invalid values

__device__
uint8_t ditherTex ( uint32_t x, uint32_t y,uint8_t ditherFactor)
{

  float u,v;  
  float spacing =(1.0/(float)ditherFactor);
  float midpoint = 0.5;///(float)ditherFactor;
 u= ((float)x+ midpoint)*spacing;
  v= ((float)y+ midpoint)*spacing; 

  return tex2D(refTex,u,v);
}

But when I turn of normalized coordinates the texture fetch works properly

When I use no unnormalized coordinates the kernel reads properly the texture

__device__
uint8_t ditherTex ( uint32_t x, uint32_t y,uint8_t ditherFactor)
{
  u= x%ditherFactor;
  v= y%ditherFactor;   

return tex2D(refTex,u+0.5,v+0.5);
}

My goal is to paste the small 4 by 4 texture into a 2000 x 2000 image. In the context of unnormalized coordinates I use the modulo operator to imitate the WRAP mode which works just fine. But when i switch to normalized coordinates the texture fectch returns invalid values.

Thanks for your help
nicolas