&&&& RUNNING TensorRT.trtexec # /home/ubuntu/klass_fr/tensorrt-5.1.6.0/aarch64-linux-gnu/samples/trtexec/build/trtexec --uff=./libs/klass_fr/tensor_rt/data/ssrnet_nchw.uff --uffInput=input_1,3,64,64 --output=ssr_function/mul_6 --useSpinWait [I] uff: ./libs/klass_fr/tensor_rt/data/ssrnet_nchw.uff [I] uffInput: input_1,3,64,64 [I] output: ssr_function/mul_6 [I] useSpinWait y3.Conv2D.1/Conv2D 0.101ms y3.tanh.1/Tanh 0.153ms y3.MaxPool2D.1/MaxPool 0.074ms y3.Conv2D.2/Conv2D 0.090ms y3.tanh.2/Tanh 0.142ms y3.Conv2D.3/Conv2D 0.076ms y3.tanh.3/Tanh 0.159ms y3.MaxPool2D.2/MaxPool 0.075ms y2.Conv2D.1/Conv2D 0.074ms y2.tanh.1/Tanh 0.150ms y2.Conv2D.2/Conv2D 0.077ms y2.tanh.2/Tanh 0.151ms y2.MaxPool2D.1/MaxPool 0.075ms y1.Conv2D.1/Conv2D 0.084ms y1.tanh.1/Tanh 0.139ms y1.Conv2D.2/Conv2D 0.087ms y1.tanh.2/Tanh 0.135ms s1.fusion.y.reshape.1/Conv2D 0.084ms s1.fusion.y.reshape.1/Tanh 0.153ms s1.fusion.y.maxpool2D.1/MaxPool 0.073ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.092ms x3.AvePool2D.1/AvgPool 0.055ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.077ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.067ms x3.AvePool2D.2/AvgPool 0.072ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.071ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.088ms x2.AvePool2D.1/AvgPool 0.057ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.081ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.078ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.061ms s1.x.fusion.avepool2D.1/AvgPool 0.074ms s1.fusion.mul/mul 0.072ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.081ms s1.fusion.offset.dense.1/MatMul 0.073ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.078ms s1.fusion.offset.dense.2/MatMul 0.070ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.073ms ssr_function/add 0.072ms s2.fusion.y.reshape.1/Conv2D 0.074ms s2.fusion.y.reshape.1/Tanh 0.152ms s2.fusion.y.maxpool2D.1/MaxPool 0.075ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.073ms s2.x.fusion.avepool2D.1/AvgPool 0.074ms s2.fusion.mul/mul 0.072ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.076ms s2.fusion.offset.dense.1/MatMul 0.070ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.075ms s2.fusion.offset.dense.2/MatMul 0.070ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.072ms ssr_function/add_1 0.071ms ssr_function/add copy 0.080ms s1.fusion.offset.dense.2/Tanh copy 0.080ms ssr_function/add_1 copy 0.079ms s1.fusion.pred.dense.1/MatMul 0.070ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.074ms s1.fusion.offset.pred.reshape.1/Reshape 0.089ms ssr_function/ssrf.s1.mul.1 0.070ms ssr_function/ssrf.s1.reshape.1 0.078ms ssr_function/ssrf.s1.matmul.1 0.075ms s1.fusion.delta.dense.1/MatMul 0.069ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.072ms s1.fusion.delta.dense.2/MatMul 0.069ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.073ms ssr_function/add_6 0.073ms ssr_function/mul 0.071ms ssr_function/truediv 0.070ms ssr_function/add_2 0.072ms ssr_function/add_3 0.074ms ssr_function/add_2 copy 0.078ms s2.fusion.offset.dense.2/Tanh copy 0.083ms ssr_function/add_3 copy 0.078ms s2.fusion.pred.dense.1/MatMul 0.071ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.072ms s2.fusion.offset.pred.reshape.1/Reshape 0.082ms ssr_function/ssrf.s2.mul.1 0.071ms ssr_function/ssrf.s2.reshape.1 0.079ms ssr_function/ssrf.s2.matmul.1 0.071ms s2.fusion.delta.dense.1/MatMul 0.069ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.072ms s2.fusion.delta.dense.2/MatMul 0.069ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.073ms ssr_function/add_7 0.072ms ssr_function/mul_1 0.071ms ssr_function/mul_3 0.072ms ssr_function/truediv_1 0.070ms ssr_function/add_9 0.069ms s3.fusion.y.reshape.1/Conv2D 0.079ms s3.fusion.y.reshape.1/Tanh 0.156ms s3.fusion.y.maxpool2D.1/MaxPool 0.073ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.078ms s3.x.fusion.avepool2D.1/AvgPool 0.071ms s3.fusion.mul/mul 0.070ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.077ms s3.fusion.offset.dense.1/MatMul 0.081ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.062ms s3.fusion.offset.dense.2/MatMul 0.085ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.075ms ssr_function/add_4 0.070ms ssr_function/add_5 0.071ms ssr_function/add_4 copy 0.084ms s3.fusion.offset.dense.2/Tanh copy 0.076ms ssr_function/add_5 copy 0.074ms s3.fusion.pred.dense.1/MatMul 0.081ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.060ms s3.fusion.offset.pred.reshape.1/Reshape 0.083ms ssr_function/ssrf.s3.mul.1 0.073ms ssr_function/ssrf.s3.reshape.1 0.078ms ssr_function/ssrf.s3.matmul.1 0.071ms ssr_function/mul_4 0.070ms s3.fusion.delta.dense.1/MatMul 0.080ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.060ms s3.fusion.delta.dense.2/MatMul 0.069ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.075ms ssr_function/add_8 0.070ms ssr_function/mul_2 0.071ms ssr_function/mul_5 0.069ms ssr_function/truediv_2 0.069ms ssr_function/add_10 0.068ms ssr_function/mul_6 0.070ms Time over all layers: 9.655 [I] Average over 10 runs is 11.1786 ms (host walltime is 11.2108 ms, 99% percentile time is 11.843). y3.Conv2D.1/Conv2D 0.198ms y3.tanh.1/Tanh 0.283ms y3.MaxPool2D.1/MaxPool 0.143ms y3.Conv2D.2/Conv2D 0.173ms y3.tanh.2/Tanh 0.286ms y3.Conv2D.3/Conv2D 0.149ms y3.tanh.3/Tanh 0.303ms y3.MaxPool2D.2/MaxPool 0.144ms y2.Conv2D.1/Conv2D 0.145ms y2.tanh.1/Tanh 0.296ms y2.Conv2D.2/Conv2D 0.148ms y2.tanh.2/Tanh 0.296ms y2.MaxPool2D.1/MaxPool 0.146ms y1.Conv2D.1/Conv2D 0.164ms y1.tanh.1/Tanh 0.273ms y1.Conv2D.2/Conv2D 0.171ms y1.tanh.2/Tanh 0.268ms s1.fusion.y.reshape.1/Conv2D 0.158ms s1.fusion.y.reshape.1/Tanh 0.301ms s1.fusion.y.maxpool2D.1/MaxPool 0.143ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.182ms x3.AvePool2D.1/AvgPool 0.104ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.152ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.129ms x3.AvePool2D.2/AvgPool 0.138ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.139ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.175ms x2.AvePool2D.1/AvgPool 0.110ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.160ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.150ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.114ms s1.x.fusion.avepool2D.1/AvgPool 0.142ms s1.fusion.mul/mul 0.139ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.154ms s1.fusion.offset.dense.1/MatMul 0.145ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.152ms s1.fusion.offset.dense.2/MatMul 0.137ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.146ms ssr_function/add 0.144ms s2.fusion.y.reshape.1/Conv2D 0.144ms s2.fusion.y.reshape.1/Tanh 0.300ms s2.fusion.y.maxpool2D.1/MaxPool 0.145ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.142ms s2.x.fusion.avepool2D.1/AvgPool 0.141ms s2.fusion.mul/mul 0.140ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.148ms s2.fusion.offset.dense.1/MatMul 0.138ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.145ms s2.fusion.offset.dense.2/MatMul 0.139ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.140ms ssr_function/add_1 0.140ms ssr_function/add copy 0.154ms s1.fusion.offset.dense.2/Tanh copy 0.151ms ssr_function/add_1 copy 0.159ms s1.fusion.pred.dense.1/MatMul 0.139ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.143ms s1.fusion.offset.pred.reshape.1/Reshape 0.171ms ssr_function/ssrf.s1.mul.1 0.138ms ssr_function/ssrf.s1.reshape.1 0.153ms ssr_function/ssrf.s1.matmul.1 0.143ms s1.fusion.delta.dense.1/MatMul 0.135ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.141ms s1.fusion.delta.dense.2/MatMul 0.136ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.142ms ssr_function/add_6 0.142ms ssr_function/mul 0.138ms ssr_function/truediv 0.136ms ssr_function/add_2 0.140ms ssr_function/add_3 0.142ms ssr_function/add_2 copy 0.153ms s2.fusion.offset.dense.2/Tanh copy 0.155ms ssr_function/add_3 copy 0.154ms s2.fusion.pred.dense.1/MatMul 0.138ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.141ms s2.fusion.offset.pred.reshape.1/Reshape 0.162ms ssr_function/ssrf.s2.mul.1 0.138ms ssr_function/ssrf.s2.reshape.1 0.155ms ssr_function/ssrf.s2.matmul.1 0.139ms s2.fusion.delta.dense.1/MatMul 0.135ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.142ms s2.fusion.delta.dense.2/MatMul 0.136ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.142ms ssr_function/add_7 0.141ms ssr_function/mul_1 0.139ms ssr_function/mul_3 0.139ms ssr_function/truediv_1 0.136ms ssr_function/add_9 0.136ms s3.fusion.y.reshape.1/Conv2D 0.154ms s3.fusion.y.reshape.1/Tanh 0.303ms s3.fusion.y.maxpool2D.1/MaxPool 0.148ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.150ms s3.x.fusion.avepool2D.1/AvgPool 0.143ms s3.fusion.mul/mul 0.141ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.149ms s3.fusion.offset.dense.1/MatMul 0.163ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.120ms s3.fusion.offset.dense.2/MatMul 0.154ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.145ms ssr_function/add_4 0.139ms ssr_function/add_5 0.139ms ssr_function/add_4 copy 0.158ms s3.fusion.offset.dense.2/Tanh copy 0.149ms ssr_function/add_5 copy 0.146ms s3.fusion.pred.dense.1/MatMul 0.161ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.117ms s3.fusion.offset.pred.reshape.1/Reshape 0.163ms ssr_function/ssrf.s3.mul.1 0.142ms ssr_function/ssrf.s3.reshape.1 0.154ms ssr_function/ssrf.s3.matmul.1 0.144ms ssr_function/mul_4 0.140ms s3.fusion.delta.dense.1/MatMul 0.160ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.118ms s3.fusion.delta.dense.2/MatMul 0.136ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.145ms ssr_function/add_8 0.138ms ssr_function/mul_2 0.140ms ssr_function/mul_5 0.136ms ssr_function/truediv_2 0.136ms ssr_function/add_10 0.135ms ssr_function/mul_6 0.140ms Time over all layers: 18.852 [I] Average over 10 runs is 10.6261 ms (host walltime is 10.6551 ms, 99% percentile time is 10.8362). y3.Conv2D.1/Conv2D 0.291ms y3.tanh.1/Tanh 0.418ms y3.MaxPool2D.1/MaxPool 0.217ms y3.Conv2D.2/Conv2D 0.256ms y3.tanh.2/Tanh 0.423ms y3.Conv2D.3/Conv2D 0.222ms y3.tanh.3/Tanh 0.447ms y3.MaxPool2D.2/MaxPool 0.213ms y2.Conv2D.1/Conv2D 0.216ms y2.tanh.1/Tanh 0.442ms y2.Conv2D.2/Conv2D 0.219ms y2.tanh.2/Tanh 0.441ms y2.MaxPool2D.1/MaxPool 0.216ms y1.Conv2D.1/Conv2D 0.246ms y1.tanh.1/Tanh 0.412ms y1.Conv2D.2/Conv2D 0.258ms y1.tanh.2/Tanh 0.398ms s1.fusion.y.reshape.1/Conv2D 0.229ms s1.fusion.y.reshape.1/Tanh 0.447ms s1.fusion.y.maxpool2D.1/MaxPool 0.214ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.271ms x3.AvePool2D.1/AvgPool 0.154ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.226ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.191ms x3.AvePool2D.2/AvgPool 0.206ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.208ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.260ms x2.AvePool2D.1/AvgPool 0.164ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.239ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.222ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.168ms s1.x.fusion.avepool2D.1/AvgPool 0.216ms s1.fusion.mul/mul 0.209ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.228ms s1.fusion.offset.dense.1/MatMul 0.214ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.222ms s1.fusion.offset.dense.2/MatMul 0.205ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.215ms ssr_function/add 0.214ms s2.fusion.y.reshape.1/Conv2D 0.214ms s2.fusion.y.reshape.1/Tanh 0.448ms s2.fusion.y.maxpool2D.1/MaxPool 0.215ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.211ms s2.x.fusion.avepool2D.1/AvgPool 0.210ms s2.fusion.mul/mul 0.206ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.221ms s2.fusion.offset.dense.1/MatMul 0.205ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.215ms s2.fusion.offset.dense.2/MatMul 0.208ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.219ms ssr_function/add_1 0.209ms ssr_function/add copy 0.233ms s1.fusion.offset.dense.2/Tanh copy 0.224ms ssr_function/add_1 copy 0.231ms s1.fusion.pred.dense.1/MatMul 0.206ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.213ms s1.fusion.offset.pred.reshape.1/Reshape 0.251ms ssr_function/ssrf.s1.mul.1 0.206ms ssr_function/ssrf.s1.reshape.1 0.230ms ssr_function/ssrf.s1.matmul.1 0.212ms s1.fusion.delta.dense.1/MatMul 0.201ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.211ms s1.fusion.delta.dense.2/MatMul 0.203ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.213ms ssr_function/add_6 0.212ms ssr_function/mul 0.206ms ssr_function/truediv 0.203ms ssr_function/add_2 0.214ms ssr_function/add_3 0.211ms ssr_function/add_2 copy 0.230ms s2.fusion.offset.dense.2/Tanh copy 0.227ms ssr_function/add_3 copy 0.227ms s2.fusion.pred.dense.1/MatMul 0.206ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.211ms s2.fusion.offset.pred.reshape.1/Reshape 0.239ms ssr_function/ssrf.s2.mul.1 0.207ms ssr_function/ssrf.s2.reshape.1 0.230ms ssr_function/ssrf.s2.matmul.1 0.208ms s2.fusion.delta.dense.1/MatMul 0.202ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.212ms s2.fusion.delta.dense.2/MatMul 0.203ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.213ms ssr_function/add_7 0.210ms ssr_function/mul_1 0.207ms ssr_function/mul_3 0.206ms ssr_function/truediv_1 0.203ms ssr_function/add_9 0.203ms s3.fusion.y.reshape.1/Conv2D 0.233ms s3.fusion.y.reshape.1/Tanh 0.455ms s3.fusion.y.maxpool2D.1/MaxPool 0.218ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.221ms s3.x.fusion.avepool2D.1/AvgPool 0.212ms s3.fusion.mul/mul 0.208ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.222ms s3.fusion.offset.dense.1/MatMul 0.242ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.178ms s3.fusion.offset.dense.2/MatMul 0.222ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.214ms ssr_function/add_4 0.207ms ssr_function/add_5 0.208ms ssr_function/add_4 copy 0.233ms s3.fusion.offset.dense.2/Tanh copy 0.220ms ssr_function/add_5 copy 0.219ms s3.fusion.pred.dense.1/MatMul 0.241ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.179ms s3.fusion.offset.pred.reshape.1/Reshape 0.243ms ssr_function/ssrf.s3.mul.1 0.209ms ssr_function/ssrf.s3.reshape.1 0.232ms ssr_function/ssrf.s3.matmul.1 0.212ms ssr_function/mul_4 0.207ms s3.fusion.delta.dense.1/MatMul 0.239ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.176ms s3.fusion.delta.dense.2/MatMul 0.203ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.215ms ssr_function/add_8 0.207ms ssr_function/mul_2 0.210ms ssr_function/mul_5 0.203ms ssr_function/truediv_2 0.202ms ssr_function/add_10 0.201ms ssr_function/mul_6 0.208ms Time over all layers: 28.058 [I] Average over 10 runs is 10.6193 ms (host walltime is 10.6477 ms, 99% percentile time is 10.7692). y3.Conv2D.1/Conv2D 0.386ms y3.tanh.1/Tanh 0.548ms y3.MaxPool2D.1/MaxPool 0.287ms y3.Conv2D.2/Conv2D 0.338ms y3.tanh.2/Tanh 0.558ms y3.Conv2D.3/Conv2D 0.295ms y3.tanh.3/Tanh 0.591ms y3.MaxPool2D.2/MaxPool 0.283ms y2.Conv2D.1/Conv2D 0.287ms y2.tanh.1/Tanh 0.589ms y2.Conv2D.2/Conv2D 0.291ms y2.tanh.2/Tanh 0.592ms y2.MaxPool2D.1/MaxPool 0.289ms y1.Conv2D.1/Conv2D 0.327ms y1.tanh.1/Tanh 0.545ms y1.Conv2D.2/Conv2D 0.342ms y1.tanh.2/Tanh 0.529ms s1.fusion.y.reshape.1/Conv2D 0.299ms s1.fusion.y.reshape.1/Tanh 0.593ms s1.fusion.y.maxpool2D.1/MaxPool 0.283ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.361ms x3.AvePool2D.1/AvgPool 0.203ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.302ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.254ms x3.AvePool2D.2/AvgPool 0.273ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.279ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.348ms x2.AvePool2D.1/AvgPool 0.218ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.320ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.294ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.222ms s1.x.fusion.avepool2D.1/AvgPool 0.284ms s1.fusion.mul/mul 0.277ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.301ms s1.fusion.offset.dense.1/MatMul 0.282ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.292ms s1.fusion.offset.dense.2/MatMul 0.272ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.285ms ssr_function/add 0.283ms s2.fusion.y.reshape.1/Conv2D 0.284ms s2.fusion.y.reshape.1/Tanh 0.595ms s2.fusion.y.maxpool2D.1/MaxPool 0.284ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.281ms s2.x.fusion.avepool2D.1/AvgPool 0.277ms s2.fusion.mul/mul 0.272ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.296ms s2.fusion.offset.dense.1/MatMul 0.275ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.285ms s2.fusion.offset.dense.2/MatMul 0.280ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.288ms ssr_function/add_1 0.278ms ssr_function/add copy 0.307ms s1.fusion.offset.dense.2/Tanh copy 0.296ms ssr_function/add_1 copy 0.307ms s1.fusion.pred.dense.1/MatMul 0.273ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.282ms s1.fusion.offset.pred.reshape.1/Reshape 0.332ms ssr_function/ssrf.s1.mul.1 0.273ms ssr_function/ssrf.s1.reshape.1 0.306ms ssr_function/ssrf.s1.matmul.1 0.280ms s1.fusion.delta.dense.1/MatMul 0.267ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.283ms s1.fusion.delta.dense.2/MatMul 0.272ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.283ms ssr_function/add_6 0.286ms ssr_function/mul 0.275ms ssr_function/truediv 0.269ms ssr_function/add_2 0.282ms ssr_function/add_3 0.281ms ssr_function/add_2 copy 0.304ms s2.fusion.offset.dense.2/Tanh copy 0.300ms ssr_function/add_3 copy 0.298ms s2.fusion.pred.dense.1/MatMul 0.273ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.281ms s2.fusion.offset.pred.reshape.1/Reshape 0.317ms ssr_function/ssrf.s2.mul.1 0.275ms ssr_function/ssrf.s2.reshape.1 0.306ms ssr_function/ssrf.s2.matmul.1 0.277ms s2.fusion.delta.dense.1/MatMul 0.268ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.282ms s2.fusion.delta.dense.2/MatMul 0.270ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.285ms ssr_function/add_7 0.281ms ssr_function/mul_1 0.276ms ssr_function/mul_3 0.275ms ssr_function/truediv_1 0.269ms ssr_function/add_9 0.270ms s3.fusion.y.reshape.1/Conv2D 0.307ms s3.fusion.y.reshape.1/Tanh 0.603ms s3.fusion.y.maxpool2D.1/MaxPool 0.289ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.293ms s3.x.fusion.avepool2D.1/AvgPool 0.281ms s3.fusion.mul/mul 0.276ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.294ms s3.fusion.offset.dense.1/MatMul 0.321ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.237ms s3.fusion.offset.dense.2/MatMul 0.291ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.287ms ssr_function/add_4 0.277ms ssr_function/add_5 0.278ms ssr_function/add_4 copy 0.306ms s3.fusion.offset.dense.2/Tanh copy 0.295ms ssr_function/add_5 copy 0.292ms s3.fusion.pred.dense.1/MatMul 0.324ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.237ms s3.fusion.offset.pred.reshape.1/Reshape 0.322ms ssr_function/ssrf.s3.mul.1 0.277ms ssr_function/ssrf.s3.reshape.1 0.308ms ssr_function/ssrf.s3.matmul.1 0.281ms ssr_function/mul_4 0.274ms s3.fusion.delta.dense.1/MatMul 0.318ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.234ms s3.fusion.delta.dense.2/MatMul 0.270ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.284ms ssr_function/add_8 0.275ms ssr_function/mul_2 0.278ms ssr_function/mul_5 0.269ms ssr_function/truediv_2 0.270ms ssr_function/add_10 0.270ms ssr_function/mul_6 0.280ms Time over all layers: 37.258 [I] Average over 10 runs is 10.6179 ms (host walltime is 10.6462 ms, 99% percentile time is 10.7702). y3.Conv2D.1/Conv2D 0.479ms y3.tanh.1/Tanh 0.676ms y3.MaxPool2D.1/MaxPool 0.357ms y3.Conv2D.2/Conv2D 0.419ms y3.tanh.2/Tanh 0.694ms y3.Conv2D.3/Conv2D 0.367ms y3.tanh.3/Tanh 0.736ms y3.MaxPool2D.2/MaxPool 0.352ms y2.Conv2D.1/Conv2D 0.358ms y2.tanh.1/Tanh 0.738ms y2.Conv2D.2/Conv2D 0.361ms y2.tanh.2/Tanh 0.738ms y2.MaxPool2D.1/MaxPool 0.361ms y1.Conv2D.1/Conv2D 0.408ms y1.tanh.1/Tanh 0.678ms y1.Conv2D.2/Conv2D 0.426ms y1.tanh.2/Tanh 0.658ms s1.fusion.y.reshape.1/Conv2D 0.369ms s1.fusion.y.reshape.1/Tanh 0.739ms s1.fusion.y.maxpool2D.1/MaxPool 0.353ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.450ms x3.AvePool2D.1/AvgPool 0.255ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.379ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.316ms x3.AvePool2D.2/AvgPool 0.340ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.348ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.433ms x2.AvePool2D.1/AvgPool 0.276ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.399ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.368ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.275ms s1.x.fusion.avepool2D.1/AvgPool 0.353ms s1.fusion.mul/mul 0.345ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.374ms s1.fusion.offset.dense.1/MatMul 0.351ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.363ms s1.fusion.offset.dense.2/MatMul 0.339ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.354ms ssr_function/add 0.352ms s2.fusion.y.reshape.1/Conv2D 0.353ms s2.fusion.y.reshape.1/Tanh 0.746ms s2.fusion.y.maxpool2D.1/MaxPool 0.354ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.351ms s2.x.fusion.avepool2D.1/AvgPool 0.344ms s2.fusion.mul/mul 0.341ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.374ms s2.fusion.offset.dense.1/MatMul 0.343ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.355ms s2.fusion.offset.dense.2/MatMul 0.348ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.359ms ssr_function/add_1 0.347ms ssr_function/add copy 0.383ms s1.fusion.offset.dense.2/Tanh copy 0.369ms ssr_function/add_1 copy 0.379ms s1.fusion.pred.dense.1/MatMul 0.340ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.351ms s1.fusion.offset.pred.reshape.1/Reshape 0.417ms ssr_function/ssrf.s1.mul.1 0.342ms ssr_function/ssrf.s1.reshape.1 0.382ms ssr_function/ssrf.s1.matmul.1 0.348ms s1.fusion.delta.dense.1/MatMul 0.336ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.353ms s1.fusion.delta.dense.2/MatMul 0.340ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.352ms ssr_function/add_6 0.357ms ssr_function/mul 0.342ms ssr_function/truediv 0.336ms ssr_function/add_2 0.352ms ssr_function/add_3 0.349ms ssr_function/add_2 copy 0.378ms s2.fusion.offset.dense.2/Tanh copy 0.372ms ssr_function/add_3 copy 0.370ms s2.fusion.pred.dense.1/MatMul 0.340ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.350ms s2.fusion.offset.pred.reshape.1/Reshape 0.394ms ssr_function/ssrf.s2.mul.1 0.342ms ssr_function/ssrf.s2.reshape.1 0.384ms ssr_function/ssrf.s2.matmul.1 0.346ms s2.fusion.delta.dense.1/MatMul 0.337ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.351ms s2.fusion.delta.dense.2/MatMul 0.338ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.355ms ssr_function/add_7 0.351ms ssr_function/mul_1 0.344ms ssr_function/mul_3 0.342ms ssr_function/truediv_1 0.337ms ssr_function/add_9 0.338ms s3.fusion.y.reshape.1/Conv2D 0.382ms s3.fusion.y.reshape.1/Tanh 0.751ms s3.fusion.y.maxpool2D.1/MaxPool 0.359ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.364ms s3.x.fusion.avepool2D.1/AvgPool 0.350ms s3.fusion.mul/mul 0.342ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.367ms s3.fusion.offset.dense.1/MatMul 0.402ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.294ms s3.fusion.offset.dense.2/MatMul 0.359ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.359ms ssr_function/add_4 0.345ms ssr_function/add_5 0.347ms ssr_function/add_4 copy 0.383ms s3.fusion.offset.dense.2/Tanh copy 0.367ms ssr_function/add_5 copy 0.364ms s3.fusion.pred.dense.1/MatMul 0.404ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.292ms s3.fusion.offset.pred.reshape.1/Reshape 0.400ms ssr_function/ssrf.s3.mul.1 0.346ms ssr_function/ssrf.s3.reshape.1 0.383ms ssr_function/ssrf.s3.matmul.1 0.349ms ssr_function/mul_4 0.340ms s3.fusion.delta.dense.1/MatMul 0.396ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.291ms s3.fusion.delta.dense.2/MatMul 0.338ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.354ms ssr_function/add_8 0.343ms ssr_function/mul_2 0.347ms ssr_function/mul_5 0.336ms ssr_function/truediv_2 0.336ms ssr_function/add_10 0.337ms ssr_function/mul_6 0.349ms Time over all layers: 46.423 [I] Average over 10 runs is 10.5735 ms (host walltime is 10.6016 ms, 99% percentile time is 10.7082). y3.Conv2D.1/Conv2D 0.573ms y3.tanh.1/Tanh 0.806ms y3.MaxPool2D.1/MaxPool 0.426ms y3.Conv2D.2/Conv2D 0.502ms y3.tanh.2/Tanh 0.829ms y3.Conv2D.3/Conv2D 0.440ms y3.tanh.3/Tanh 0.880ms y3.MaxPool2D.2/MaxPool 0.423ms y2.Conv2D.1/Conv2D 0.432ms y2.tanh.1/Tanh 0.885ms y2.Conv2D.2/Conv2D 0.433ms y2.tanh.2/Tanh 0.883ms y2.MaxPool2D.1/MaxPool 0.431ms y1.Conv2D.1/Conv2D 0.489ms y1.tanh.1/Tanh 0.811ms y1.Conv2D.2/Conv2D 0.509ms y1.tanh.2/Tanh 0.788ms s1.fusion.y.reshape.1/Conv2D 0.440ms s1.fusion.y.reshape.1/Tanh 0.884ms s1.fusion.y.maxpool2D.1/MaxPool 0.423ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.541ms x3.AvePool2D.1/AvgPool 0.305ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.456ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.379ms x3.AvePool2D.2/AvgPool 0.407ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.417ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.519ms x2.AvePool2D.1/AvgPool 0.329ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.478ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.440ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.329ms s1.x.fusion.avepool2D.1/AvgPool 0.422ms s1.fusion.mul/mul 0.412ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.449ms s1.fusion.offset.dense.1/MatMul 0.420ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.433ms s1.fusion.offset.dense.2/MatMul 0.407ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.423ms ssr_function/add 0.422ms s2.fusion.y.reshape.1/Conv2D 0.423ms s2.fusion.y.reshape.1/Tanh 0.897ms s2.fusion.y.maxpool2D.1/MaxPool 0.427ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.420ms s2.x.fusion.avepool2D.1/AvgPool 0.413ms s2.fusion.mul/mul 0.408ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.452ms s2.fusion.offset.dense.1/MatMul 0.411ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.425ms s2.fusion.offset.dense.2/MatMul 0.416ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.428ms ssr_function/add_1 0.416ms ssr_function/add copy 0.458ms s1.fusion.offset.dense.2/Tanh copy 0.442ms ssr_function/add_1 copy 0.451ms s1.fusion.pred.dense.1/MatMul 0.407ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.421ms s1.fusion.offset.pred.reshape.1/Reshape 0.497ms ssr_function/ssrf.s1.mul.1 0.412ms ssr_function/ssrf.s1.reshape.1 0.461ms ssr_function/ssrf.s1.matmul.1 0.417ms s1.fusion.delta.dense.1/MatMul 0.404ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.424ms s1.fusion.delta.dense.2/MatMul 0.407ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.421ms ssr_function/add_6 0.426ms ssr_function/mul 0.410ms ssr_function/truediv 0.402ms ssr_function/add_2 0.420ms ssr_function/add_3 0.417ms ssr_function/add_2 copy 0.452ms s2.fusion.offset.dense.2/Tanh copy 0.444ms ssr_function/add_3 copy 0.441ms s2.fusion.pred.dense.1/MatMul 0.409ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.419ms s2.fusion.offset.pred.reshape.1/Reshape 0.472ms ssr_function/ssrf.s2.mul.1 0.410ms ssr_function/ssrf.s2.reshape.1 0.463ms ssr_function/ssrf.s2.matmul.1 0.414ms s2.fusion.delta.dense.1/MatMul 0.405ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.421ms s2.fusion.delta.dense.2/MatMul 0.405ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.426ms ssr_function/add_7 0.421ms ssr_function/mul_1 0.412ms ssr_function/mul_3 0.408ms ssr_function/truediv_1 0.404ms ssr_function/add_9 0.404ms s3.fusion.y.reshape.1/Conv2D 0.457ms s3.fusion.y.reshape.1/Tanh 0.900ms s3.fusion.y.maxpool2D.1/MaxPool 0.430ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.436ms s3.x.fusion.avepool2D.1/AvgPool 0.419ms s3.fusion.mul/mul 0.410ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.439ms s3.fusion.offset.dense.1/MatMul 0.482ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.357ms s3.fusion.offset.dense.2/MatMul 0.430ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.429ms ssr_function/add_4 0.414ms ssr_function/add_5 0.415ms ssr_function/add_4 copy 0.457ms s3.fusion.offset.dense.2/Tanh copy 0.442ms ssr_function/add_5 copy 0.437ms s3.fusion.pred.dense.1/MatMul 0.484ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.349ms s3.fusion.offset.pred.reshape.1/Reshape 0.480ms ssr_function/ssrf.s3.mul.1 0.413ms ssr_function/ssrf.s3.reshape.1 0.459ms ssr_function/ssrf.s3.matmul.1 0.418ms ssr_function/mul_4 0.408ms s3.fusion.delta.dense.1/MatMul 0.475ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.348ms s3.fusion.delta.dense.2/MatMul 0.405ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.427ms ssr_function/add_8 0.412ms ssr_function/mul_2 0.415ms ssr_function/mul_5 0.403ms ssr_function/truediv_2 0.405ms ssr_function/add_10 0.404ms ssr_function/mul_6 0.418ms Time over all layers: 55.607 [I] Average over 10 runs is 10.6017 ms (host walltime is 10.6299 ms, 99% percentile time is 10.7904). y3.Conv2D.1/Conv2D 0.666ms y3.tanh.1/Tanh 0.937ms y3.MaxPool2D.1/MaxPool 0.495ms y3.Conv2D.2/Conv2D 0.584ms y3.tanh.2/Tanh 0.966ms y3.Conv2D.3/Conv2D 0.512ms y3.tanh.3/Tanh 1.028ms y3.MaxPool2D.2/MaxPool 0.492ms y2.Conv2D.1/Conv2D 0.506ms y2.tanh.1/Tanh 1.031ms y2.Conv2D.2/Conv2D 0.506ms y2.tanh.2/Tanh 1.028ms y2.MaxPool2D.1/MaxPool 0.502ms y1.Conv2D.1/Conv2D 0.570ms y1.tanh.1/Tanh 0.944ms y1.Conv2D.2/Conv2D 0.593ms y1.tanh.2/Tanh 0.918ms s1.fusion.y.reshape.1/Conv2D 0.510ms s1.fusion.y.reshape.1/Tanh 1.030ms s1.fusion.y.maxpool2D.1/MaxPool 0.495ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.631ms x3.AvePool2D.1/AvgPool 0.358ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.531ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.441ms x3.AvePool2D.2/AvgPool 0.475ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.485ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.605ms x2.AvePool2D.1/AvgPool 0.384ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.556ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.512ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.382ms s1.x.fusion.avepool2D.1/AvgPool 0.491ms s1.fusion.mul/mul 0.480ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.522ms s1.fusion.offset.dense.1/MatMul 0.489ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.503ms s1.fusion.offset.dense.2/MatMul 0.475ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.493ms ssr_function/add 0.491ms s2.fusion.y.reshape.1/Conv2D 0.495ms s2.fusion.y.reshape.1/Tanh 1.047ms s2.fusion.y.maxpool2D.1/MaxPool 0.500ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.489ms s2.x.fusion.avepool2D.1/AvgPool 0.481ms s2.fusion.mul/mul 0.475ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.525ms s2.fusion.offset.dense.1/MatMul 0.478ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.496ms s2.fusion.offset.dense.2/MatMul 0.484ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.497ms ssr_function/add_1 0.486ms ssr_function/add copy 0.533ms s1.fusion.offset.dense.2/Tanh copy 0.513ms ssr_function/add_1 copy 0.523ms s1.fusion.pred.dense.1/MatMul 0.474ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.490ms s1.fusion.offset.pred.reshape.1/Reshape 0.578ms ssr_function/ssrf.s1.mul.1 0.488ms ssr_function/ssrf.s1.reshape.1 0.538ms ssr_function/ssrf.s1.matmul.1 0.486ms s1.fusion.delta.dense.1/MatMul 0.470ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.496ms s1.fusion.delta.dense.2/MatMul 0.475ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.490ms ssr_function/add_6 0.494ms ssr_function/mul 0.479ms ssr_function/truediv 0.468ms ssr_function/add_2 0.489ms ssr_function/add_3 0.486ms ssr_function/add_2 copy 0.526ms s2.fusion.offset.dense.2/Tanh copy 0.517ms ssr_function/add_3 copy 0.513ms s2.fusion.pred.dense.1/MatMul 0.476ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.489ms s2.fusion.offset.pred.reshape.1/Reshape 0.552ms ssr_function/ssrf.s2.mul.1 0.477ms ssr_function/ssrf.s2.reshape.1 0.541ms ssr_function/ssrf.s2.matmul.1 0.485ms s2.fusion.delta.dense.1/MatMul 0.472ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.491ms s2.fusion.delta.dense.2/MatMul 0.473ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.496ms ssr_function/add_7 0.490ms ssr_function/mul_1 0.480ms ssr_function/mul_3 0.475ms ssr_function/truediv_1 0.470ms ssr_function/add_9 0.472ms s3.fusion.y.reshape.1/Conv2D 0.533ms s3.fusion.y.reshape.1/Tanh 1.048ms s3.fusion.y.maxpool2D.1/MaxPool 0.501ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.508ms s3.x.fusion.avepool2D.1/AvgPool 0.488ms s3.fusion.mul/mul 0.477ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.512ms s3.fusion.offset.dense.1/MatMul 0.566ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.415ms s3.fusion.offset.dense.2/MatMul 0.498ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.499ms ssr_function/add_4 0.485ms ssr_function/add_5 0.484ms ssr_function/add_4 copy 0.531ms s3.fusion.offset.dense.2/Tanh copy 0.514ms ssr_function/add_5 copy 0.510ms s3.fusion.pred.dense.1/MatMul 0.564ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.405ms s3.fusion.offset.pred.reshape.1/Reshape 0.559ms ssr_function/ssrf.s3.mul.1 0.481ms ssr_function/ssrf.s3.reshape.1 0.534ms ssr_function/ssrf.s3.matmul.1 0.487ms ssr_function/mul_4 0.475ms s3.fusion.delta.dense.1/MatMul 0.553ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.406ms s3.fusion.delta.dense.2/MatMul 0.476ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.500ms ssr_function/add_8 0.482ms ssr_function/mul_2 0.483ms ssr_function/mul_5 0.472ms ssr_function/truediv_2 0.472ms ssr_function/add_10 0.469ms ssr_function/mul_6 0.487ms Time over all layers: 64.807 [I] Average over 10 runs is 10.6194 ms (host walltime is 10.6474 ms, 99% percentile time is 10.8366). y3.Conv2D.1/Conv2D 0.759ms y3.tanh.1/Tanh 1.068ms y3.MaxPool2D.1/MaxPool 0.567ms y3.Conv2D.2/Conv2D 0.672ms y3.tanh.2/Tanh 1.102ms y3.Conv2D.3/Conv2D 0.585ms y3.tanh.3/Tanh 1.173ms y3.MaxPool2D.2/MaxPool 0.563ms y2.Conv2D.1/Conv2D 0.577ms y2.tanh.1/Tanh 1.177ms y2.Conv2D.2/Conv2D 0.577ms y2.tanh.2/Tanh 1.174ms y2.MaxPool2D.1/MaxPool 0.572ms y1.Conv2D.1/Conv2D 0.651ms y1.tanh.1/Tanh 1.079ms y1.Conv2D.2/Conv2D 0.678ms y1.tanh.2/Tanh 1.056ms s1.fusion.y.reshape.1/Conv2D 0.583ms s1.fusion.y.reshape.1/Tanh 1.181ms s1.fusion.y.maxpool2D.1/MaxPool 0.565ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.721ms x3.AvePool2D.1/AvgPool 0.408ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.606ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.503ms x3.AvePool2D.2/AvgPool 0.542ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.554ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.691ms x2.AvePool2D.1/AvgPool 0.438ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.636ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.586ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.435ms s1.x.fusion.avepool2D.1/AvgPool 0.561ms s1.fusion.mul/mul 0.547ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.604ms s1.fusion.offset.dense.1/MatMul 0.558ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.574ms s1.fusion.offset.dense.2/MatMul 0.543ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.563ms ssr_function/add 0.560ms s2.fusion.y.reshape.1/Conv2D 0.564ms s2.fusion.y.reshape.1/Tanh 1.194ms s2.fusion.y.maxpool2D.1/MaxPool 0.569ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.559ms s2.x.fusion.avepool2D.1/AvgPool 0.548ms s2.fusion.mul/mul 0.542ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.598ms s2.fusion.offset.dense.1/MatMul 0.545ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.565ms s2.fusion.offset.dense.2/MatMul 0.551ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.567ms ssr_function/add_1 0.556ms ssr_function/add copy 0.613ms s1.fusion.offset.dense.2/Tanh copy 0.586ms ssr_function/add_1 copy 0.594ms s1.fusion.pred.dense.1/MatMul 0.542ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.560ms s1.fusion.offset.pred.reshape.1/Reshape 0.661ms ssr_function/ssrf.s1.mul.1 0.556ms ssr_function/ssrf.s1.reshape.1 0.614ms ssr_function/ssrf.s1.matmul.1 0.554ms s1.fusion.delta.dense.1/MatMul 0.537ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.566ms s1.fusion.delta.dense.2/MatMul 0.542ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.561ms ssr_function/add_6 0.564ms ssr_function/mul 0.547ms ssr_function/truediv 0.535ms ssr_function/add_2 0.559ms ssr_function/add_3 0.554ms ssr_function/add_2 copy 0.602ms s2.fusion.offset.dense.2/Tanh copy 0.592ms ssr_function/add_3 copy 0.588ms s2.fusion.pred.dense.1/MatMul 0.545ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.558ms s2.fusion.offset.pred.reshape.1/Reshape 0.630ms ssr_function/ssrf.s2.mul.1 0.547ms ssr_function/ssrf.s2.reshape.1 0.617ms ssr_function/ssrf.s2.matmul.1 0.553ms s2.fusion.delta.dense.1/MatMul 0.538ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.561ms s2.fusion.delta.dense.2/MatMul 0.539ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.565ms ssr_function/add_7 0.559ms ssr_function/mul_1 0.548ms ssr_function/mul_3 0.542ms ssr_function/truediv_1 0.537ms ssr_function/add_9 0.539ms s3.fusion.y.reshape.1/Conv2D 0.608ms s3.fusion.y.reshape.1/Tanh 1.204ms s3.fusion.y.maxpool2D.1/MaxPool 0.573ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.580ms s3.x.fusion.avepool2D.1/AvgPool 0.556ms s3.fusion.mul/mul 0.544ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.585ms s3.fusion.offset.dense.1/MatMul 0.645ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.473ms s3.fusion.offset.dense.2/MatMul 0.565ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.569ms ssr_function/add_4 0.554ms ssr_function/add_5 0.554ms ssr_function/add_4 copy 0.606ms s3.fusion.offset.dense.2/Tanh copy 0.586ms ssr_function/add_5 copy 0.582ms s3.fusion.pred.dense.1/MatMul 0.644ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.462ms s3.fusion.offset.pred.reshape.1/Reshape 0.640ms ssr_function/ssrf.s3.mul.1 0.552ms ssr_function/ssrf.s3.reshape.1 0.614ms ssr_function/ssrf.s3.matmul.1 0.555ms ssr_function/mul_4 0.543ms s3.fusion.delta.dense.1/MatMul 0.633ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.464ms s3.fusion.delta.dense.2/MatMul 0.543ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.569ms ssr_function/add_8 0.550ms ssr_function/mul_2 0.552ms ssr_function/mul_5 0.538ms ssr_function/truediv_2 0.538ms ssr_function/add_10 0.536ms ssr_function/mul_6 0.557ms Time over all layers: 74.024 [I] Average over 10 runs is 10.6442 ms (host walltime is 10.6727 ms, 99% percentile time is 10.9376). y3.Conv2D.1/Conv2D 0.854ms y3.tanh.1/Tanh 1.202ms y3.MaxPool2D.1/MaxPool 0.636ms y3.Conv2D.2/Conv2D 0.754ms y3.tanh.2/Tanh 1.239ms y3.Conv2D.3/Conv2D 0.657ms y3.tanh.3/Tanh 1.316ms y3.MaxPool2D.2/MaxPool 0.633ms y2.Conv2D.1/Conv2D 0.647ms y2.tanh.1/Tanh 1.323ms y2.Conv2D.2/Conv2D 0.649ms y2.tanh.2/Tanh 1.320ms y2.MaxPool2D.1/MaxPool 0.643ms y1.Conv2D.1/Conv2D 0.732ms y1.tanh.1/Tanh 1.217ms y1.Conv2D.2/Conv2D 0.762ms y1.tanh.2/Tanh 1.190ms s1.fusion.y.reshape.1/Conv2D 0.654ms s1.fusion.y.reshape.1/Tanh 1.327ms s1.fusion.y.maxpool2D.1/MaxPool 0.635ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.810ms x3.AvePool2D.1/AvgPool 0.458ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.681ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.565ms x3.AvePool2D.2/AvgPool 0.609ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.622ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.776ms x2.AvePool2D.1/AvgPool 0.492ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.715ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.658ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.495ms s1.x.fusion.avepool2D.1/AvgPool 0.629ms s1.fusion.mul/mul 0.619ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.678ms s1.fusion.offset.dense.1/MatMul 0.627ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.645ms s1.fusion.offset.dense.2/MatMul 0.610ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.632ms ssr_function/add 0.629ms s2.fusion.y.reshape.1/Conv2D 0.634ms s2.fusion.y.reshape.1/Tanh 1.342ms s2.fusion.y.maxpool2D.1/MaxPool 0.640ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.627ms s2.x.fusion.avepool2D.1/AvgPool 0.616ms s2.fusion.mul/mul 0.609ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.670ms s2.fusion.offset.dense.1/MatMul 0.612ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.635ms s2.fusion.offset.dense.2/MatMul 0.622ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.641ms ssr_function/add_1 0.627ms ssr_function/add copy 0.689ms s1.fusion.offset.dense.2/Tanh copy 0.658ms ssr_function/add_1 copy 0.667ms s1.fusion.pred.dense.1/MatMul 0.609ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.629ms s1.fusion.offset.pred.reshape.1/Reshape 0.742ms ssr_function/ssrf.s1.mul.1 0.624ms ssr_function/ssrf.s1.reshape.1 0.691ms ssr_function/ssrf.s1.matmul.1 0.623ms s1.fusion.delta.dense.1/MatMul 0.603ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.635ms s1.fusion.delta.dense.2/MatMul 0.609ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.630ms ssr_function/add_6 0.634ms ssr_function/mul 0.615ms ssr_function/truediv 0.603ms ssr_function/add_2 0.628ms ssr_function/add_3 0.623ms ssr_function/add_2 copy 0.681ms s2.fusion.offset.dense.2/Tanh copy 0.665ms ssr_function/add_3 copy 0.660ms s2.fusion.pred.dense.1/MatMul 0.612ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.628ms s2.fusion.offset.pred.reshape.1/Reshape 0.708ms ssr_function/ssrf.s2.mul.1 0.615ms ssr_function/ssrf.s2.reshape.1 0.692ms ssr_function/ssrf.s2.matmul.1 0.623ms s2.fusion.delta.dense.1/MatMul 0.605ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.631ms s2.fusion.delta.dense.2/MatMul 0.606ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.635ms ssr_function/add_7 0.628ms ssr_function/mul_1 0.617ms ssr_function/mul_3 0.609ms ssr_function/truediv_1 0.606ms ssr_function/add_9 0.608ms s3.fusion.y.reshape.1/Conv2D 0.687ms s3.fusion.y.reshape.1/Tanh 1.354ms s3.fusion.y.maxpool2D.1/MaxPool 0.644ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.651ms s3.x.fusion.avepool2D.1/AvgPool 0.626ms s3.fusion.mul/mul 0.612ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.659ms s3.fusion.offset.dense.1/MatMul 0.724ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.531ms s3.fusion.offset.dense.2/MatMul 0.633ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.638ms ssr_function/add_4 0.623ms ssr_function/add_5 0.623ms ssr_function/add_4 copy 0.680ms s3.fusion.offset.dense.2/Tanh copy 0.658ms ssr_function/add_5 copy 0.655ms s3.fusion.pred.dense.1/MatMul 0.725ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.524ms s3.fusion.offset.pred.reshape.1/Reshape 0.719ms ssr_function/ssrf.s3.mul.1 0.621ms ssr_function/ssrf.s3.reshape.1 0.690ms ssr_function/ssrf.s3.matmul.1 0.624ms ssr_function/mul_4 0.610ms s3.fusion.delta.dense.1/MatMul 0.711ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.522ms s3.fusion.delta.dense.2/MatMul 0.610ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.639ms ssr_function/add_8 0.619ms ssr_function/mul_2 0.620ms ssr_function/mul_5 0.605ms ssr_function/truediv_2 0.604ms ssr_function/add_10 0.602ms ssr_function/mul_6 0.625ms Time over all layers: 83.225 [I] Average over 10 runs is 10.6203 ms (host walltime is 10.6485 ms, 99% percentile time is 10.7078). y3.Conv2D.1/Conv2D 0.947ms y3.tanh.1/Tanh 1.336ms y3.MaxPool2D.1/MaxPool 0.707ms y3.Conv2D.2/Conv2D 0.837ms y3.tanh.2/Tanh 1.376ms y3.Conv2D.3/Conv2D 0.730ms y3.tanh.3/Tanh 1.459ms y3.MaxPool2D.2/MaxPool 0.703ms y2.Conv2D.1/Conv2D 0.717ms y2.tanh.1/Tanh 1.469ms y2.Conv2D.2/Conv2D 0.719ms y2.tanh.2/Tanh 1.468ms y2.MaxPool2D.1/MaxPool 0.713ms y1.Conv2D.1/Conv2D 0.816ms y1.tanh.1/Tanh 1.351ms y1.Conv2D.2/Conv2D 0.846ms y1.tanh.2/Tanh 1.323ms s1.fusion.y.reshape.1/Conv2D 0.725ms s1.fusion.y.reshape.1/Tanh 1.473ms s1.fusion.y.maxpool2D.1/MaxPool 0.705ms x3.Conv2D.1/Conv2D + x3.relu.1/Relu 0.900ms x3.AvePool2D.1/AvgPool 0.507ms x3.Conv2D.2/Conv2D + x3.relu.2/Relu 0.756ms x3.Conv2D.3/Conv2D + x3.relu.3/Relu 0.627ms x3.AvePool2D.2/AvgPool 0.676ms x2.Conv2D.1/Conv2D + x2.relu.1/Relu 0.690ms x2.Conv2D.2/Conv2D + x2.relu.2/Relu 0.862ms x2.AvePool2D.1/AvgPool 0.548ms x1.Conv2D.1/Conv2D + x1.relu.1/Relu 0.794ms x1.Conv2D.2/Conv2D + x1.relu.2/Relu 0.733ms s1.fusion.x.conv2d.1/Conv2D + s1.fusion. 0.549ms s1.x.fusion.avepool2D.1/AvgPool 0.698ms s1.fusion.mul/mul 0.689ms s1.fusion.nchw.reshape1 + (Unnamed Layer 0.750ms s1.fusion.offset.dense.1/MatMul 0.696ms s1.fusion.offset.dense.1/BiasAdd + s1.fu 0.716ms s1.fusion.offset.dense.2/MatMul 0.677ms s1.fusion.offset.dense.2/BiasAdd + s1.fu 0.703ms ssr_function/add 0.698ms s2.fusion.y.reshape.1/Conv2D 0.703ms s2.fusion.y.reshape.1/Tanh 1.490ms s2.fusion.y.maxpool2D.1/MaxPool 0.709ms s2.fusion.x.conv2d.1/Conv2D + s2.fusion. 0.696ms s2.x.fusion.avepool2D.1/AvgPool 0.688ms s2.fusion.mul/mul 0.676ms s2.fusion.nchw.reshape1 + (Unnamed Layer 0.743ms s2.fusion.offset.dense.1/MatMul 0.679ms s2.fusion.offset.dense.1/BiasAdd + s2.fu 0.707ms s2.fusion.offset.dense.2/MatMul 0.690ms s2.fusion.offset.dense.2/BiasAdd + s2.fu 0.712ms ssr_function/add_1 0.698ms ssr_function/add copy 0.764ms s1.fusion.offset.dense.2/Tanh copy 0.730ms ssr_function/add_1 copy 0.739ms s1.fusion.pred.dense.1/MatMul 0.677ms s1.fusion.pred.dense.1/BiasAdd + s1.fusi 0.698ms s1.fusion.offset.pred.reshape.1/Reshape 0.823ms ssr_function/ssrf.s1.mul.1 0.692ms ssr_function/ssrf.s1.reshape.1 0.768ms ssr_function/ssrf.s1.matmul.1 0.691ms s1.fusion.delta.dense.1/MatMul 0.669ms s1.fusion.delta.dense.1/BiasAdd + s1.fus 0.705ms s1.fusion.delta.dense.2/MatMul 0.676ms s1.fusion.delta.dense.2/BiasAdd + s1.fus 0.701ms ssr_function/add_6 0.703ms ssr_function/mul 0.685ms ssr_function/truediv 0.670ms ssr_function/add_2 0.696ms ssr_function/add_3 0.691ms ssr_function/add_2 copy 0.757ms s2.fusion.offset.dense.2/Tanh copy 0.737ms ssr_function/add_3 copy 0.731ms s2.fusion.pred.dense.1/MatMul 0.680ms s2.fusion.pred.dense.1/BiasAdd + s2.fusi 0.698ms s2.fusion.offset.pred.reshape.1/Reshape 0.785ms ssr_function/ssrf.s2.mul.1 0.682ms ssr_function/ssrf.s2.reshape.1 0.768ms ssr_function/ssrf.s2.matmul.1 0.691ms s2.fusion.delta.dense.1/MatMul 0.671ms s2.fusion.delta.dense.1/BiasAdd + s2.fus 0.701ms s2.fusion.delta.dense.2/MatMul 0.674ms s2.fusion.delta.dense.2/BiasAdd + s2.fus 0.704ms ssr_function/add_7 0.696ms ssr_function/mul_1 0.684ms ssr_function/mul_3 0.678ms ssr_function/truediv_1 0.674ms ssr_function/add_9 0.675ms s3.fusion.y.reshape.1/Conv2D 0.762ms s3.fusion.y.reshape.1/Tanh 1.503ms s3.fusion.y.maxpool2D.1/MaxPool 0.714ms s3.fusion.x.conv2d.1/Conv2D + s3.fusion. 0.722ms s3.x.fusion.avepool2D.1/AvgPool 0.694ms s3.fusion.mul/mul 0.679ms s3.fusion.nchw.reshape1 + (Unnamed Layer 0.732ms s3.fusion.offset.dense.1/MatMul 0.803ms s3.fusion.offset.dense.1/BiasAdd + s3.fu 0.589ms s3.fusion.offset.dense.2/MatMul 0.700ms s3.fusion.offset.dense.2/BiasAdd + s3.fu 0.707ms ssr_function/add_4 0.691ms ssr_function/add_5 0.694ms ssr_function/add_4 copy 0.754ms s3.fusion.offset.dense.2/Tanh copy 0.731ms ssr_function/add_5 copy 0.730ms s3.fusion.pred.dense.1/MatMul 0.805ms s3.fusion.pred.dense.1/BiasAdd + s3.fusi 0.581ms s3.fusion.offset.pred.reshape.1/Reshape 0.800ms ssr_function/ssrf.s3.mul.1 0.690ms ssr_function/ssrf.s3.reshape.1 0.768ms ssr_function/ssrf.s3.matmul.1 0.692ms ssr_function/mul_4 0.677ms s3.fusion.delta.dense.1/MatMul 0.790ms s3.fusion.delta.dense.1/BiasAdd + s3.fus 0.579ms s3.fusion.delta.dense.2/MatMul 0.676ms s3.fusion.delta.dense.2/BiasAdd + s3.fus 0.708ms ssr_function/add_8 0.687ms ssr_function/mul_2 0.689ms ssr_function/mul_5 0.671ms ssr_function/truediv_2 0.670ms ssr_function/add_10 0.667ms ssr_function/mul_6 0.697ms Time over all layers: 92.393 [I] Average over 10 runs is 10.5719 ms (host walltime is 10.5998 ms, 99% percentile time is 10.6857). &&&& PASSED TensorRT.trtexec # /home/ubuntu/klass_fr/tensorrt-5.1.6.0/aarch64-linux-gnu/samples/trtexec/build/trtexec --uff=./libs/klass_fr/tensor_rt/data/ssrnet_nchw.uff --uffInput=input_1,3,64,64 --output=ssr_function/mul_6 --useSpinWait