I use a custom Linux kernel, when I run a CUDA hello world app, it crashed:
#include <stdio.h>
#include <unistd.h>
/*
- A simple introduction to programming in CUDA. This program prints "Hello
- World from GPU! from 10 CUDA threads running on the GPU.
*/
global void helloFromGPU()
{
printf(“Hello World from GPU!\n”);
}
int main(int argc, char **argv)
{
printf(“Hello World from CPU!\n”);
helloFromGPU<<<1, 10>>>();
cudaDeviceSynchronize();
pause();
return 0;
}
backtrace:
Thread 1 “t728” received signal SIGSEGV, Segmentation fault.
0x0000007fb747c628 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
(gdb) bt
#0 0x0000007fb747c628 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#1 0x0000007fb74620c8 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#2 0x0000007fb74628fc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#3 0x0000007fb746c210 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#4 0x0000007fb746c700 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#5 0x0000007fb73a0930 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#6 0x0000007fb73a09dc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#7 0x0000007fb73acd60 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#8 0x0000007fb73b6cc0 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#9 0x0000007fb7384adc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#10 0x0000007fb738630c in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#11 0x0000007fb72c3118 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#12 0x0000007fb73e6748 in cuDevicePrimaryCtxRetain () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#13 0x0000000000432608 in cudart::contextStateManager::initPrimaryContext(cudart::device*) ()
#14 0x0000000000432908 in cudart::contextStateManager::initDriverContext() ()
#15 0x00000000004332c0 in cudart::contextStateManager::getRuntimeContextState(cudart::contextState**, bool) ()
#16 0x0000000000404098 in cudart::cudaApiLaunchCommon(void const*, bool) ()
But it’s all OK when I’m using official built Linux kernel: Linux version 4.4.38-tegra (buildbrain@mobile-u64-553) (gcc version 4.8.5 (GCC) ) #1 SMP PREEMPT Thu Jul 20 00:49:07 PDT 2017
I also use strace to diff the demo running on the two kernels, I find:
official kernel:
ioctl(5, _IOC(_IOC_READ|_IOC_WRITE, 0x41, 0x0b, 0x20), 0x7ff558f3e0) = 0
my kernel:
ioctl(5, _IOC(_IOC_READ|_IOC_WRITE, 0x41, 0x0b, 0x20), 0x7fd16bef30) = -1 EINVAL (Invalid argument)
and:
lrwx------ 1 root root 64 Mar 20 02:48 5 → anon_inode:nvhost-17000000.gp10b-fd5