Hi! I’m having strange problem that occurs on one Jetson Nano device, but does not on another, with the same settings, same NN, same jetpack.
I’m loading a serialized network and cuModuleLoadData never finishes (see frame #14):
#0 ioctl () at ../sysdeps/unix/sysv/linux/aarch64/ioctl.S:25
#1 0x0000007fae960758 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libnvrm_graphics.so
#2 0x0000007fae963d68 in NvRmHost1xSyncpointWait () from /usr/lib/aarch64-linux-gnu/tegra/libnvrm_graphics.so
#3 0x0000007faf18c8f4 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#4 0x0000007faf00dd78 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#5 0x0000007faf151bd0 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#6 0x0000007faf07e274 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#7 0x0000007faf07f1dc in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#8 0x0000007faf0653f8 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#9 0x0000007faf06546c in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#10 0x0000007faf066198 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#11 0x0000007faf062888 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#12 0x0000007faf084be8 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#13 0x0000007faeffec78 in ?? () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#14 0x0000007faf0e2944 in cuModuleLoadData () from /usr/lib/aarch64-linux-gnu/tegra/libcuda.so.1
#15 0x0000005558372fdc in cuPointwiseV2::generateKernelFromTactic(cuPointwiseV2::TacticConfig const&, nvinfer1::builder::Tensor const&, pwgen::Expression const&, int, std::bitset<8ul>) ()
#16 0x0000005558247590 in nvinfer1::builder::PointWiseV2Builder::setTactic(nvinfer1::rt::Tactic) ()
#17 0x0000005557c52238 in nvinfer1::cudnn::getMaxPersistentMem(nvinfer1::builder::EngineBuildContext const&, std::unique_ptr<nvinfer1::builder::RunnerBuilder, std::default_delete<nvinfer1::builder::RunnerBuilder> > const&) ()
#18 0x0000005557c52384 in nvinfer1::cudnn::getMaxPersistentMem(nvinfer1::builder::EngineBuildContext const&, std::vector<std::unique_ptr<nvinfer1::builder::RunnerBuilder, std::default_delete<nvinfer1::builder::RunnerBuilder> >, std::allocator<std::unique_ptr<nvinfer1::builder::RunnerBuilder, std::default_delete<nvinfer1::builder::RunnerBuilder> > > >&) ()
#19 0x0000005557c56418 in nvinfer1::cudnn::selectFastestLayer(nvinfer1::builder::EngineBuildContext&, std::vector<std::unique_ptr<nvinfer1::builder::RunnerBuilder, std::default_delete<nvinfer1::builder::RunnerBuilder> >, std::allocator<std::unique_ptr<nvinfer1::builder::RunnerBuilder, std::default_delete<nvinfer1::builder::RunnerBuilder> > > >&, nvinfer1::builder::AutoDeletingVector<nvinfer1::builder::Algorithm>*) ()
#20 0x0000005557c9c508 in nvinfer1::builder::(anonymous namespace)::CandidatesHolder::select(nvinfer1::builder::EngineBuildContext&, nvinfer1::builder::AutoDeletingVector<nvinfer1::builder::Algorithm>*) ()
#21 0x0000005557ca17fc in nvinfer1::builder::buildSingleLayer(nvinfer1::builder::EngineBuildContext&, nvinfer1::builder::Node&, std::unordered_map<std::string, std::unique_ptr<nvinfer1::rt::Region, std::default_delete<nvinfer1::rt::Region> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::unique_ptr<nvinfer1::rt::Region, std::default_delete<nvinfer1::rt::Region> > > > > const&, nvinfer1::builder::SlotEncoder&, std::unordered_map<std::string, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> > > > >*, bool, std::unordered_map<nvinfer1::builder::Tensor const*, nvinfer1::builder::Tensor const*, std::hash<nvinfer1::builder::Tensor const*>, std::equal_to<nvinfer1::builder::Tensor const*>, std::allocator<std::pair<nvinfer1::builder::Tensor const* const, nvinfer1::builder::Tensor const*> > >&, int, nvinfer1::builder::AutoDeletingVector<nvinfer1::builder::Algorithm>*) ()
#22 0x0000005557ca65a4 in nvinfer1::builder::EngineTacticSupply::getBestTactic(nvinfer1::builder::Node&, nvinfer1::query::Ports<nvinfer1::builder::SymbolicFormat> const&, bool, nvinfer1::builder::AutoDeletingVector<nvinfer1::builder::Algorithm>*) ()
#23 0x00000055581db7f4 in nvinfer1::builder::(anonymous namespace)::LeafCNode::computeCosts(nvinfer1::builder::TacticSupply&, std::unordered_map<std::string, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> > > > >*, bool, nvinfer1::IAlgorithmSelector*) ()
#24 0x00000055581d72d4 in nvinfer1::builder::chooseFormatsAndTactics(nvinfer1::builder::Graph&, nvinfer1::builder::TacticSupply&, std::unordered_map<std::string, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> > > > >*, nvinfer1::NetworkBuildConfig const&) ()
#25 0x0000005557cab4b8 in nvinfer1::builder::(anonymous namespace)::makeEngineFromGraph(nvinfer1::Network const&, nvinfer1::NetworkBuildConfig const&, nvinfer1::NetworkQuantizationConfig const&, nvinfer1::builder::EngineBuildContext const&, nvinfer1::builder::Graph&, std::unordered_map<std::string, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> >, std::hash<std::string>, std::equal_to<std::string>, std::allocator<std::pair<std::string const, std::vector<nvinfer1::builder::DynamicRangeSymbol, std::allocator<nvinfer1::builder::DynamicRangeSymbol> > > > >*, int, bool, bool) ()
#26 0x0000005557cb0224 in nvinfer1::builder::buildEngine(nvinfer1::NetworkBuildConfig&, nvinfer1::NetworkQuantizationConfig const&, nvinfer1::builder::EngineBuildContext const&, nvinfer1::Network const&) ()
Never - meaning I waited for a few hours, although it should finish in seconds/minutes.
Why can this be happening? Thanks!
Details:
jetson:~$ jetson_release -v
- NVIDIA Jetson Nano (Developer Kit Version)
* Jetpack 4.4.1 [L4T 32.4.4]
* NV Power Mode: MAXN - Type: 0
* jetson_stats.service: active
- Board info:
* Type: Nano (Developer Kit Version)
* SOC Family: tegra210 - ID:33
* Module: P3448-0000 - Board: P3449-0000
* Code Name: porg
* Boardids: 3448
* CUDA GPU architecture (ARCH_BIN): 5.3
* Serial Number: 1421121071189
- Libraries:
* CUDA: 10.2.89
* cuDNN: 8.0.0.180
* TensorRT: 7.1.3.0
* Visionworks: 1.6.0.501
* OpenCV: 4.1.1 compiled CUDA: NO
* VPI: 0.4.4
* Vulkan: 1.2.70