cudaErrorInvalidDeviceFunction when running google typed test with CUDA kernel

matomic · October 1, 2021, 9:29pm

I have the following skeleton code for testing a CUDA kernel:

#include <cuda.h>
#include <array>
#include <exception>
#include "gtest/gtest.h"

#define CHECK_CUDA(expr)                                                       \
  do {                                                                         \
    auto err = (expr);                                                         \
    if (err != cudaSuccess) {                                                  \
      fprintf(                                                                 \
          stderr, "%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err)); \
      std::terminate();                                                        \
    }                                                                          \
  } while (0)

template <typename T>
__global__ void emptyKernel(T*) {}

struct Int3 {
  int data_[3];
};

template <int count>
struct IntArray {
  int data_[count];
};

namespace ns {

template <typename T>
class TypedUnitTest : public testing::Test {};
using TestTypes = ::testing::Types<
    int,        // okay
    Int3,       // cudaErrorInvalidDeviceFunction if inside namespace ns
    IntArray<3> // cudaErrorInvalidDeviceFunction
    >;

TYPED_TEST_CASE(TypedUnitTest, TestTypes);

TYPED_TEST(TypedUnitTest, SimpleTest) {
  TypeParam* ptr{nullptr};
  emptyKernel << <1, 1, 0, 0>>> (ptr);
  CHECK_CUDA(cudaGetLastError());
}
} // namespace ns

The runtime behavior for the three types is as shown in comment: cudaErrorInvalidDeviceFunction is returned for Int3 if it is defined inside the namespace, and unconditionally for the template struct IntArray. Granted Google test macros are complicated, I am wondering if someone can explain the run time behavior.