I have the following skeleton code for testing a CUDA kernel:
#include <cuda.h>
#include <array>
#include <exception>
#include "gtest/gtest.h"
#define CHECK_CUDA(expr) \
do { \
auto err = (expr); \
if (err != cudaSuccess) { \
fprintf( \
stderr, "%s: %s\n", cudaGetErrorName(err), cudaGetErrorString(err)); \
std::terminate(); \
} \
} while (0)
template <typename T>
__global__ void emptyKernel(T*) {}
struct Int3 {
int data_[3];
};
template <int count>
struct IntArray {
int data_[count];
};
namespace ns {
template <typename T>
class TypedUnitTest : public testing::Test {};
using TestTypes = ::testing::Types<
int, // okay
Int3, // cudaErrorInvalidDeviceFunction if inside namespace ns
IntArray<3> // cudaErrorInvalidDeviceFunction
>;
TYPED_TEST_CASE(TypedUnitTest, TestTypes);
TYPED_TEST(TypedUnitTest, SimpleTest) {
TypeParam* ptr{nullptr};
emptyKernel << <1, 1, 0, 0>>> (ptr);
CHECK_CUDA(cudaGetLastError());
}
} // namespace ns
The runtime behavior for the three types is as shown in comment: cudaErrorInvalidDeviceFunction
is returned for Int3
if it is defined inside the namespace, and unconditionally for the template struct IntArray
. Granted Google test macros are complicated, I am wondering if someone can explain the run time behavior.