What is CUDA equivalent of lrint call?

https://en.cppreference.com/w/cpp/numeric/math/rint

Can I choose the rounding method when converting from double to int ?

This is for RGB to XYZ colour space conversion, btw.

this may be of interest. Doesn’t allow to choose the rounding mode. However an example of “choice” could be had using eg. llround as an alternative. Doesn’t have the same behavior of obeying a preset rounding preference, however.

Thanks! That should be good enough - I think round to nearest even is default behaviour on CPU.

GPUs do not support a dynamically configurable rounding mode. All standard C++ math library functions that assume the existence of dynamic rounding, such as rint(), lrint(), and llrint(), are therefore hardwired in CUDA C++ to use the rounding mode “to nearest of even”, which is typically the default for C++ runtime environments.

CUDA offers an extensive set of device function intrinsics for double to integer conversion with compile-time selected rounding mode, which are demonstrated in the small sample program below.

#include <stdio.h>
#include <stdlib.h>

__global__ void kernel1 (double a)
{
    double nega = 0.0 - a;
    unsigned int uz, un, ud, uu;
    int sz, sn, sd, su;

    uz = __double2uint_rz (a);    // to zero (truncate)
    un = __double2uint_rn (a);    // to nearest-or-even
    ud = __double2uint_rd (a);    // down (towards -INF)
    uu = __double2uint_ru (a);    // up (towards +INF)
    sz = __double2int_rz (nega);  // to zero (truncate)
    sn = __double2int_rn (nega);  // to nearest-or-even
    sd = __double2int_rd (nega);  // down (towards -INF)
    su = __double2int_ru (nega);  // up (towards +INF)

    printf ("uz=%2u un=%2u ud=%2u uu=%2u\n", uz, un, ud, uu);
    printf ("sz=%2d sn=%2d sd=%2d su=%2d\n", sz, sn, sd, su);
}
__global__ void kernel2 (double a)
{
    double nega = 0.0 - a;
    unsigned long long int uz, un, ud, uu;
    long long int sz, sn, sd, su;

    uz = __double2ull_rz (a);    // to zero (truncate)
    un = __double2ull_rn (a);    // to nearest-or-even
    ud = __double2ull_rd (a);    // down (towards -INF)
    uu = __double2ull_ru (a);    // up (towards +INF)
    sz = __double2ll_rz (nega);  // to zero (truncate)
    sn = __double2ll_rn (nega);  // to nearest-or-even
    sd = __double2ll_rd (nega);  // down (towards -INF)
    su = __double2ll_ru (nega);  // up (towards +INF)

    printf ("uz=%13llu un=%13llu ud=%13llu uu=%13llu\n", uz, un, ud, uu);
    printf ("sz=%13lld sn=%13lld sd=%13lld su=%13lld\n", sz, sn, sd, su);
}

int main (void)
{
    kernel1<<<1,1>>>(5.5);
    kernel2<<<1,1>>>(123456789012.5);
    cudaDeviceSynchronize();
    return EXIT_SUCCESS;
}

The output of the above program should look similar to this:

uz= 5 un= 6 ud= 5 uu= 6
sz=-5 sn=-6 sd=-6 su=-5
uz= 123456789012 un= 123456789012 ud= 123456789012 uu= 123456789013
sz=-123456789012 sn=-123456789012 sd=-123456789013 su=-123456789012
2 Likes

This topic was automatically closed 14 days after the last reply. New replies are no longer allowed.