Parser error when added __host__

I have the following code example which gives an error when compiled as a host code but does not compile as a device code.

#define GPU
#include <cstdint>

template<uint32_t T>
struct A{
};

struct In
{
    template <uint32_t T>
    __host__ __device__ void get(A<T>& ret) const
    {
    }
};

template <typename T, uint32_t F>
__host__ __device__ void  test(uint8_t out[], T in)
{
    A<F> a;
    in.get<F>(a);
}

__global__ void test_run() {
    test<In, 4>(nullptr, In{}) ;
}

int main() {
#ifdef GPU
test_run<<<1,1>>>();
#else
test<In, 4>(nullptr, In{});
#endif
}

run on godbolt
(comment #define GPU for host version)

Is this a parser error? I could see that it does not compile on g++ as well. But in that case, I would expect it to fail on device version as well.

Update:
Found that adding template to the function call on the object of the template parameter would fix the compile issue on host. But still, I wonder why it parses differently on device version.