Is __float2ll_rz the default behavior?

Quick question (at least, for me, this is a short question). If I write

float x = (... some result ...);
long long int fp_x = x;

is the default behavior to compile that in a CUDA kernel as

long long int fp_x = __float2ll_rz(x);

I know there are __float2ll_rd, __float2ll_ru, and __float2ll_rn options, of which I actually prefer _float2ll_rn for its ability to suppress the effects of truncation when accumulating lots of floating-point contributions in a fixed-precision, deterministic process. (If you’re truncating every contribution to its whole part, your overall process is being muffled just a little bit, whereas a conversion that can round 1021583.56 up to 1021584 rather than down to 1021583 can convey more accuracy.) I know that the C++ static_cast<long long int>(x) behaves like __float2ll_rz(x).

# cat t70.cu
#include <math.h>
__device__ long long int f1(float x) {long long int fp_x = x; return fp_x;}
__device__ long long int f2(float x) {long long int fp_x = __float2ll_rz(x); return fp_x;}
__device__ long long int f3(float x) {long long int fp_x = __float2ll_rd(x); return fp_x;}
__device__ long long int f4(float x) {long long int fp_x = __float2ll_ru(x); return fp_x;}
__device__ long long int f5(float x) {long long int fp_x = __float2ll_rn(x); return fp_x;}
# nvcc -dc t70.cu
# cuobjdump -sass ./t70.o

Fatbin elf code:
================
arch = sm_52
code version = [1,7]
host = linux
compile_size = 64bit
compressed

        code for sm_52
                Function : _Z2f5f
        .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM52 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
                                                               /* 0x001ffc01ffe0071f */
        /*0008*/                   F2I.S64.F32 R4, R4 ;        /* 0x5cb0000000471b04 */
        /*0010*/                   RET ;                       /* 0xe32000000007000f */
        /*0018*/                   BRA 0x18 ;                  /* 0xe2400fffff87000f */
                                                               /* 0x001f8000fc0007e0 */
        /*0028*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0030*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0038*/                   NOP;                        /* 0x50b0000000070f00 */
                ..........


                Function : _Z2f4f
        .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM52 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
                                                               /* 0x001ffc01ffe0071f */
        /*0008*/                   F2I.S64.F32.CEIL R4, R4 ;   /* 0x5cb0010000471b04 */
        /*0010*/                   RET ;                       /* 0xe32000000007000f */
        /*0018*/                   BRA 0x18 ;                  /* 0xe2400fffff87000f */
                                                               /* 0x001f8000fc0007e0 */
        /*0028*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0030*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0038*/                   NOP;                        /* 0x50b0000000070f00 */
                ..........


                Function : _Z2f3f
        .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM52 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
                                                               /* 0x001ffc01ffe0071f */
        /*0008*/                   F2I.S64.F32.FLOOR R4, R4 ;  /* 0x5cb0008000471b04 */
        /*0010*/                   RET ;                       /* 0xe32000000007000f */
        /*0018*/                   BRA 0x18 ;                  /* 0xe2400fffff87000f */
                                                               /* 0x001f8000fc0007e0 */
        /*0028*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0030*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0038*/                   NOP;                        /* 0x50b0000000070f00 */
                ..........


                Function : _Z2f2f
        .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM52 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
                                                               /* 0x001ffc01ffe0071f */
        /*0008*/                   F2I.S64.F32.TRUNC R4, R4 ;  /* 0x5cb0018000471b04 */
        /*0010*/                   RET ;                       /* 0xe32000000007000f */
        /*0018*/                   BRA 0x18 ;                  /* 0xe2400fffff87000f */
                                                               /* 0x001f8000fc0007e0 */
        /*0028*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0030*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0038*/                   NOP;                        /* 0x50b0000000070f00 */
                ..........


                Function : _Z2f1f
        .headerflags    @"EF_CUDA_TEXMODE_UNIFIED EF_CUDA_64BIT_ADDRESS EF_CUDA_SM52 EF_CUDA_VIRTUAL_SM(EF_CUDA_SM52)"
                                                               /* 0x001ffc01ffe0071f */
        /*0008*/                   F2I.S64.F32.TRUNC R4, R4 ;  /* 0x5cb0018000471b04 */
        /*0010*/                   RET ;                       /* 0xe32000000007000f */
        /*0018*/                   BRA 0x18 ;                  /* 0xe2400fffff87000f */
                                                               /* 0x001f8000fc0007e0 */
        /*0028*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0030*/                   NOP;                        /* 0x50b0000000070f00 */
        /*0038*/                   NOP;                        /* 0x50b0000000070f00 */
                ..........



Fatbin ptx code:
================
arch = sm_52
code version = [8,2]
host = linux
compile_size = 64bit
compressed
ptxasOptions = --compile-only
#

looks like it
(nvcc 12.2.128)

1 Like

See, I don’t know how to do sutff like that :)

But I know you guys are very generous to help us understand. Thanks, Robert!

From the 2011 version of the ISO-C++ standard:

4.9 Floating-integral conversions [conv.fpint]
1 A prvalue of a floating point type can be converted to a prvalue of an integer type. The conversion truncates; that is, the fractional part is discarded. The behavior is undefined if the truncated value cannot be represented in the destination type.

1 Like

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.