NVCC bug related to GCC 6 <tuple> header?

My CUDA version is 9.1, GCC version is 6.4.1, it’s on Fedora 25 system.

The following code is a small example that I came up with to demonstrate the problem:

#include <iostream>
#include <tuple>

template<typename... Args> using MyArgs = std::tuple<Args*...>;

template<typename... Args>
MyArgs<Args...> MakeMyArgs(Args&... args)
{
        return MyArgs<Args...>(&args...);
}

template<typename T, class Derived>
class KernelBase
{
public:
        template<typename... Args>
        void operator()(Args&&... args)
        {
                SetArgs<0>(args...);

                Derived* d = reinterpret_cast<Derived*>(this);
                d->dump();
        }

protected:
        template<int I, typename Q, typename... Args>
        void SetArgs(Q&& arg1, Args&& ... args)
        {
                Derived* ptr = reinterpret_cast<Derived*>(this);
                auto myargs = ptr->DefArgs();
                *std::get<I>(myargs) = arg1;
                SetArgs<I+1>(args...);
        }
        template<int I>
        void SetArgs() {}
};

template<typename T>
class KernelDerived : public KernelBase<T, KernelDerived<T>>
{
public:
        float f1;
        float f2;
        float f3;

        MyArgs<T, T, T> DefArgs()
        {
                return MakeMyArgs(f1, f2, f3);
        }

        void dump(void)
        {
                std::cout << f1 << std::endl;
                std::cout << f2 << std::endl;
                std::cout << f3 << std::endl;
        }
};

template<typename T>
void KernelDerivedTest(T f1, T f2, T f3)
{
        KernelDerived<T>()(f1, f2, f3);
}

template
void KernelDerivedTest<float>(float f1, float f2, float f3);

int main(void)
{
        float f1 = 2.0;
        float f2 = 2.1;
        float f3 = 2.2;
        
        KernelDerivedTest<float>(f1, f2, f3);
}

If the code is saved into file foo.cu, and if compilation attempted with nvcc:

nvcc -std=c++11 -c foo.cu

it will dump out lengthy error message basically reported deeply into the header:

/usr/include/c++/6.4.1/tuple: In instantiation of ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_MoveConstructibleTuple() [with _UElements = {std::tuple<float*, float*, float*>}; bool <anonymous> = true; _Elements = {float*, float*, float*}]’:
/usr/include/c++/6.4.1/tuple:626:248:   required by substitution of ‘template<class ... _UElements, typename std::enable_if<(((std::_TC<(sizeof... (_UElements) == 1), float*, float*, float*>::_NotSameTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>()) && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && (3ul >= 1)), bool>::type <anonymous> > constexpr std::tuple< <template-parameter-1-1> >::tuple(_UElements&& ...) [with _UElements = {std::tuple<float*, float*, float*>}; typename std::enable_if<(((std::_TC<(sizeof... (_UElements) == 1), float*, float*, float*>::_NotSameTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>()) && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && (3ul >= 1)), bool>::type <anonymous> = <missing>]’
foo.cu:30:30:   required from ‘void KernelBase<T, Derived>::SetArgs(Q&&, Args&& ...) [with int I = 0; Q = float&; Args = {float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:19:13:   required from ‘void KernelBase<T, Derived>::operator()(Args&& ...) [with Args = {float&, float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:62:18:   required from ‘void KernelDerivedTest(T, T, T) [with T = float]’
foo.cu:65:67:   required from here
/usr/include/c++/6.4.1/tuple:483:67: error: mismatched argument pack lengths while expanding ‘std::is_constructible<_Elements, _UElements&&>’
       return __and_<is_constructible<_Elements, _UElements&&>...>::value;
                                                                   ^~~~~
/usr/include/c++/6.4.1/tuple:484:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_MoveConstructibleTuple() [with _UElements = {std::tuple<float*, float*, float*>}; bool <anonymous> = true; _Elements = {float*, float*, float*}]’ not a return-statement
     }
 ^
/usr/include/c++/6.4.1/tuple: In instantiation of ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_ImplicitlyMoveConvertibleTuple() [with _UElements = {std::tuple<float*, float*, float*>}; bool <anonymous> = true; _Elements = {float*, float*, float*}]’:
/usr/include/c++/6.4.1/tuple:626:362:   required by substitution of ‘template<class ... _UElements, typename std::enable_if<(((std::_TC<(sizeof... (_UElements) == 1), float*, float*, float*>::_NotSameTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>()) && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && (3ul >= 1)), bool>::type <anonymous> > constexpr std::tuple< <template-parameter-1-1> >::tuple(_UElements&& ...) [with _UElements = {std::tuple<float*, float*, float*>}; typename std::enable_if<(((std::_TC<(sizeof... (_UElements) == 1), float*, float*, float*>::_NotSameTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>()) && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && (3ul >= 1)), bool>::type <anonymous> = <missing>]’
foo.cu:30:30:   required from ‘void KernelBase<T, Derived>::SetArgs(Q&&, Args&& ...) [with int I = 0; Q = float&; Args = {float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:19:13:   required from ‘void KernelBase<T, Derived>::operator()(Args&& ...) [with Args = {float&, float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:62:18:   required from ‘void KernelDerivedTest(T, T, T) [with T = float]’
foo.cu:65:67:   required from here
/usr/include/c++/6.4.1/tuple:489:65: error: mismatched argument pack lengths while expanding ‘std::is_convertible<_UElements&&, _Elements>’
       return __and_<is_convertible<_UElements&&, _Elements>...>::value;
                                                                 ^~~~~
/usr/include/c++/6.4.1/tuple:490:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_ImplicitlyMoveConvertibleTuple() [with _UElements = {std::tuple<float*, float*, float*>}; bool <anonymous> = true; _Elements = {float*, float*, float*}]’ not a return-statement
     }
 ^
/usr/include/c++/6.4.1/tuple: In instantiation of ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = const std::tuple<float*, float*, float*>&; bool <anonymous> = true; _Elements = {float*, float*, float*}]’:
/usr/include/c++/6.4.1/tuple:662:419:   required by substitution of ‘template<class ... _UElements, class _Dummy, typename std::enable_if<((std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ConstructibleTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyConvertibleTuple<_UElements ...>()) && std::_TC<(std::is_same<_Dummy, void>::value && (1ul == 1)), float*, float*, float*>::_NonNestedTuple<const tuple<_Elements ...>&>()), bool>::type <anonymous> > constexpr std::tuple< <template-parameter-1-1> >::tuple(const std::tuple<_Args1 ...>&) [with _UElements = {float*, float*, float*}; _Dummy = void; typename std::enable_if<((std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ConstructibleTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyConvertibleTuple<_UElements ...>()) && std::_TC<(std::is_same<_Dummy, void>::value && (1ul == 1)), float*, float*, float*>::_NonNestedTuple<const tuple<_Elements ...>&>()), bool>::type <anonymous> = <missing>]’
foo.cu:30:30:   required from ‘void KernelBase<T, Derived>::SetArgs(Q&&, Args&& ...) [with int I = 0; Q = float&; Args = {float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:19:13:   required from ‘void KernelBase<T, Derived>::operator()(Args&& ...) [with Args = {float&, float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:62:18:   required from ‘void KernelDerivedTest(T, T, T) [with T = float]’
foo.cu:65:67:   required from here
/usr/include/c++/6.4.1/tuple:495:244: error: wrong number of template arguments (4, should be 2)
       return  __and_<__not_<is_same<tuple<_Elements...>,
                                                                                                                                                                                                                                                    ^    
/usr/include/c++/6.4.1/type_traits:1558:8: note: provided for ‘template<class _From, class _To> struct std::is_convertible’
     struct is_convertible
        ^~~~~~~~~~~~~~
/usr/include/c++/6.4.1/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = const std::tuple<float*, float*, float*>&; bool <anonymous> = true; _Elements = {float*, float*, float*}]’ not a return-statement
     }
 ^
/usr/include/c++/6.4.1/tuple: In instantiation of ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple<float*, float*, float*>&&; bool <anonymous> = true; _Elements = {float*, float*, float*}]’:
/usr/include/c++/6.4.1/tuple:686:422:   required by substitution of ‘template<class ... _UElements, class _Dummy, typename std::enable_if<((std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && std::_TC<(std::is_same<_Dummy, void>::value && (1ul == 1)), float*, float*, float*>::_NonNestedTuple<tuple<_Elements ...>&&>()), bool>::type <anonymous> > constexpr std::tuple< <template-parameter-1-1> >::tuple(std::tuple<_Args1 ...>&&) [with _UElements = {float*, float*, float*}; _Dummy = void; typename std::enable_if<((std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_MoveConstructibleTuple<_UElements ...>() && std::_TC<(1ul == sizeof... (_UElements)), float*, float*, float*>::_ImplicitlyMoveConvertibleTuple<_UElements ...>()) && std::_TC<(std::is_same<_Dummy, void>::value && (1ul == 1)), float*, float*, float*>::_NonNestedTuple<tuple<_Elements ...>&&>()), bool>::type <anonymous> = <missing>]’
foo.cu:30:30:   required from ‘void KernelBase<T, Derived>::SetArgs(Q&&, Args&& ...) [with int I = 0; Q = float&; Args = {float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:19:13:   required from ‘void KernelBase<T, Derived>::operator()(Args&& ...) [with Args = {float&, float&, float&}; T = float; Derived = KernelDerived<float>]’
foo.cu:62:18:   required from ‘void KernelDerivedTest(T, T, T) [with T = float]’
foo.cu:65:67:   required from here
/usr/include/c++/6.4.1/tuple:495:244: error: wrong number of template arguments (4, should be 2)
       return  __and_<__not_<is_same<tuple<_Elements...>,
                                                                                                                                                                                                                                                    ^    
/usr/include/c++/6.4.1/type_traits:1558:8: note: provided for ‘template<class _From, class _To> struct std::is_convertible’
     struct is_convertible
        ^~~~~~~~~~~~~~
/usr/include/c++/6.4.1/tuple:502:1: error: body of constexpr function ‘static constexpr bool std::_TC<<anonymous>, _Elements>::_NonNestedTuple() [with _SrcTuple = std::tuple<float*, float*, float*>&&; bool <anonymous> = true; _Elements = {float*, float*, float*}]’ not a return-statement
     }
 ^

However, if file renamed to foo.cpp, and compiled using GCC:

g++ -std=c++11 -c foo.cpp

then it would compile fine.

The code in question is actually trying to define a base class for a CUDA kernel (this is KernelBase class), that different kernels implementation classes would then derive from (like KernelDerived class). Kernels would be run through base class operator() (as I removed CUDA code, the kernel arguments will be just printed to std::out here), and whole variadic templates/functions mumbo-jumbo is to make it possible for CUDA kernels implemented by derived classes to actually be run with different numbers of arguments passed (so kernel “implemented” by KernelDerived class in the code above would have three arguments).

If the code changed so that two arguments are used instead of three, it would compile and run properly. And code also builds and run properly on different installation, with CUDA 8 and GCC 4.9 installed.

The latest gcc version supported with CUDA 9.1 is 6.3.0, it looks like 6.2.1 is the version tested with Fedora 25:

http://docs.nvidia.com/cuda/cuda-installation-guide-linux/index.html#system-requirements

gcc 6.4.1 is not supported with CUDA 9.1

Same error happens with gcc 6.3.0, on an Ubuntu 17.04 installation, that should be supported according to this system requirements table that you linked.

I don’t have a convenient setup to try to duplicate this at the moment. Probably best just to file a bug at http://developer.nvidia.com

When I fill in the bug report form, and press “Submit a bug” button, I get following:

An AJAX HTTP error occurred.
HTTP Result Code: 403
Debugging information follows.
Path: /system/ajax
StatusTest: Forbidden
ResponseText:
403 - Forbidden
403 - Forbidden

Please do the following:

  1. Provide here in this thread, the exact text of the fields you filled out in the bug reporting form to reproduce this error. - I need that to try and get this bug reporting issue fixed.

  2. Attempt to file a bug report with little or no information in it.

  3. Give me the bug report number that is returned by the system.

  4. I will update the bug report with the necessary information.

The bug reporting form has an aggressive filter on it to detect rogue content. Something you have submitted in text form in one of the fields is triggering that filter, and the 403 error is the result. By eliminating nearly everything that you would have submitted, I hope to avoid the 403 error, then I will update the bug for you.

Here is what I put in corresponding fields on the bug report form:

Summary: nvcc issue related to STL tuple header, code compiles fine with g++
Relevant Area: Other
(Next field): CUDA SDK
Description: please see below
NVIDIA GPU or System: any GPU
Operating System: Linux
Operating System Details: Ubuntu 17.04

And here is what I put initially into the Description field:

Using CUDA SDK 9.1 and GCC 6.3.0 compiler, on Ubuntu 17.04 machine, save following to a file foo.cu:

// -------------------------------------------
#include <iostream>
#include <tuple>

template<typename... Args> using MyArgs = std::tuple<Args*...>;

template<typename... Args>
MyArgs<Args...> MakeMyArgs(Args&... args)
{
        return MyArgs<Args...>(&args...);
}

template<typename T, class Derived>
class KernelBase
{
public:
        template<typename... Args>
        void operator()(Args&&... args)
        {
                SetArgs<0>(args...);

                Derived* d = reinterpret_cast<Derived*>(this);
                d->dump();
        }

protected:
        template<int I, typename Q, typename... Args>
        void SetArgs(Q&& arg1, Args&& ... args)
        {
                Derived* ptr = reinterpret_cast<Derived*>(this);
                auto myargs = ptr->DefArgs();
                *std::get<I>(myargs) = arg1;
                SetArgs<I+1>(args...);
        }
        template<int I>
        void SetArgs() {}
};

template<typename T>
class KernelDerived : public KernelBase<T, KernelDerived<T>>
{
public:
        float f1;
        float f2;
        float f3;

        MyArgs<T, T, T> DefArgs()
        {
                return MakeMyArgs(f1, f2, f3);
        }

        void dump(void)
        {
                std::cout << f1 << std::endl;
                std::cout << f2 << std::endl;
                std::cout << f3 << std::endl;
        }
};

template<typename T>
void KernelDerivedTest(T f1, T f2, T f3)
{
        KernelDerived<T>()(f1, f2, f3);
}

template
void KernelDerivedTest<float>(float f1, float f2, float f3);

int main(void)
{
        float f1 = 2.0;
        float f2 = 2.1;
        float f3 = 2.2;
        
        KernelDerivedTest<float>(f1, f2, f3);
}
// -------------------------------------------

Then, run "nvcc -std=c++11 -c foo.cu".  Errors related to <tuple> header will be reported.  On the other side, after renaming file to foo.cpp, and then running "g++ -std=c++11 -c foo.cpp", compilation will complete fine.  Note that all CUDA kernels code is actually removed from the code above, in order to make the example small.   The idea of code is have classes like KernelDerived to represent actual CUDA kernels, with kernel arguments as class member variables.  Kernels are run through calling KernelBase::operator() method, as demonstrated by KernelDerivedTest function.

Now I’ve replaced this with “TBD”, and the bug get accepted. The bug ID is 2042017.

Thanks, I have updated the bug with enough information for it to move forward. You have probably received an email requesting additional information. I have provided that information. However if you receive further emails, you should respond to those.

Thanks for your patience.

Would it be possible that you re-check the bug description? It seems that using some sort of code blocks is possible in the description field, however the code that I supplied is not put in alike block, so the description is rather unreadable.

Thanks for all your help so far.

I think it should be OK. I edited the description field. You had placed TBD. there (which is fine). I changed it. Yes I did use a code block, and the code you supplied is in that code block. The full description field now looks like this:

From:

https://devtalk.nvidia.com/default/topic/1028112/cuda-setup-and-installation/nvcc-bug-related-to-gcc-6-lt-tuple-gt-header-/

Test code:
 
#include <iostream>
#include <tuple>

template<typename... Args> using MyArgs = std::tuple<Args*...>;

template<typename... Args>
MyArgs<Args...> MakeMyArgs(Args&... args)
{
        return MyArgs<Args...>(&args...);
}

template<typename T, class Derived>
class KernelBase
{
public:
        template<typename... Args>
        void operator()(Args&&... args)
        {
                SetArgs<0>(args...);

                Derived* d = reinterpret_cast<Derived*>(this);
                d->dump();
        }

protected:
        template<int I, typename Q, typename... Args>
        void SetArgs(Q&& arg1, Args&& ... args)
        {
                Derived* ptr = reinterpret_cast<Derived*>(this);
                auto myargs = ptr->DefArgs();
                *std::get<I>(myargs) = arg1;
                SetArgs<I+1>(args...);
        }
        template<int I>
        void SetArgs() {}
};

template<typename T>
class KernelDerived : public KernelBase<T, KernelDerived<T>>
{
public:
        float f1;
        float f2;
        float f3;

        MyArgs<T, T, T> DefArgs()
        {
                return MakeMyArgs(f1, f2, f3);
        }

        void dump(void)
        {
                std::cout << f1 << std::endl;
                std::cout << f2 << std::endl;
                std::cout << f3 << std::endl;
        }
};

template<typename T>
void KernelDerivedTest(T f1, T f2, T f3)
{
        KernelDerived<T>()(f1, f2, f3);
}

template
void KernelDerivedTest<float>(float f1, float f2, float f3);

int main(void)
{
        float f1 = 2.0;
        float f2 = 2.1;
        float f3 = 2.2;
        
        KernelDerivedTest<float>(f1, f2, f3);
}

If the code is saved into file foo.cu, and if compilation attempted with nvcc:
 
nvcc -std=c++11 -c foo.cu


it will dump out lengthy error message

However, if file renamed to foo.cpp, and compiled using GCC:
 
g++ -std=c++11 -c foo.cpp


then it would compile fine.

CUDA 9.1, Ubuntu 17.04, gcc 6.3.0

Here is how this field looks in my browser (Firefox 57.0.3): https://imagebin.ca/v/3mpP37gFHbTs

Sorry, I would imagine that my use of the code block probably involves formatting that strips end-of-line characters from the text, making it all run together when viewed from the portal you are looking at.

Any update on this issue? Having similar problems trying to compile tensorflow against CUDA 9.1 with gcc 6.4.1
I’ve compiled against cuda 9.1 before but haven’t been able to in the last couple weeks.

The issue is recorded internally and has been reviewed by the developer team. A fix is in progress.

Since this affects the compiler, the earliest possible date for a publicly available fix would be in the next CUDA release. I won’t be able to give any information about when that may be.

I won’t be able to give any further updates on the issue until after the next CUDA release.

Just upgraded to CUDA 9.2, and I can confirm that this issue is fixed.