enqueue error

penggr2015 · September 19, 2019, 10:45pm

Hi, I met a bug in trtexec.cpp.

void doInference(ICudaEngine& engine)
{

    IExecutionContext* context = engine.createExecutionContext();
    std::cout<<"context created "<<std::endl;
    SimpleProfiler profiler = SimpleProfiler("Profiler");

    if (gParams.benchmarkLayers){

        context->setProfiler(&profiler);

    }

    // input and output buffer pointers that we pass to the engine - the engine requires exactly IEngine::getNbBindings(),
    // of these, but in this case we know that there is exactly one input and one output.
    
    std::vector<void*> buffers(gParams.inputs.size() + gParams.outputs.size());
    for (size_t i = 0; i < gParams.inputs.size(); i++)
        createMemory(engine, buffers, gParams.inputs[i]);

    for (size_t i = 0; i < gParams.outputs.size(); i++)
        createMemory(engine, buffers, gParams.outputs[i]);
    
    //void* buffers[gParams.inputs.size() + gParams.outputs.size()];

    std::cout<<"memory create is completed"<<std::endl;
    cudaStream_t stream;
    CHECK(cudaStreamCreate(&stream));
    cudaEvent_t start, end;
    unsigned int cudaEventFlags = gParams.useSpinWait ? cudaEventDefault : cudaEventBlockingSync;
    CHECK(cudaEventCreateWithFlags(&start, cudaEventFlags));
    CHECK(cudaEventCreateWithFlags(&end, cudaEventFlags));

    std::ofstream file;
    initiateFile(file);

    std::vector<float> timesGpu(gParams.avgRuns);
    std::vector<float> timesHost(gParams.avgRuns);
    for (int j = 0; j < gParams.iterations; j++)
    {
        float totalGpu{0}, totalHost{0}; // GPU and Host timers
        for (int i = 0; i < gParams.avgRuns; i++)
        {
            auto tStart = std::chrono::high_resolution_clock::now();
            cudaEventRecord(start, stream);
            std::cout<<"enqueue starts"<<std::endl;
            //context->enqueue(gParams.batchSize, &buffers[0], stream, nullptr);
            //context->enqueue(gParams.batchSize, &buffers[0],&buffer[1]);
            context->execute(gParams.batchSize, &buffers[0]);
            //mTrtContext->execute(batchSize, &mTrtCudaBuffer[inputIndex]);
            std::cout<<"enqueue ended here"<<std::endl;
            cudaEventRecord(end, stream);
            cudaEventSynchronize(end);

            auto tEnd = std::chrono::high_resolution_clock::now();
            timesHost[i] = std::chrono::duration<float, std::milli>(tEnd - tStart).count();
            totalHost += timesHost[i];
	    float ms;
            cudaEventElapsedTime(&ms, start, end);
	    timesGpu[i] = ms;
            totalGpu += ms;
        }
        totalGpu /= gParams.avgRuns;
        totalHost /= gParams.avgRuns;
        std::cout << "Average over " << gParams.avgRuns << " runs is " << totalGpu << " ms (host walltime is " << totalHost
                  << " ms, " << static_cast<int>(gParams.pct) << "\% percentile time is " << percentile(gParams.pct, timesGpu) << ")." << std::endl;

        // calculate standard deviation
        float stdGpu = stDev (timesGpu, totalGpu);
        float stdHost = stDev (timesHost, totalHost);

        file << totalGpu << "\t" << stdGpu << "\t" << totalHost << "\t" << stdHost << "\t" << percentile(gParams.pct, timesGpu) << "\n"; 
    }
    file << "\n";

    if (gParams.benchmarkLayers)
    {
    	file << profiler;
        std::cout << profiler;
    }

    file.close();
    cudaStreamDestroy(stream);
    cudaEventDestroy(start);
    cudaEventDestroy(end);

    context->destroy();
}

I kept getting error as:

enqueue starts
engine.cpp (713) - Cuda Error in execute: 700 (an illegal memory access was encountered)
engine.cpp (713) - Cuda Error in execute: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
Average over 6 runs is 171.985 ms (host walltime is 191.573 ms, 99% percentile time is 1031.91).
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
Average over 6 runs is 4.2514e-41 ms (host walltime is 0.0303392 ms, 99% percentile time is 7.94116e-41).
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
enqueue starts
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
cuda/genericReformat.cu (1259) - Cuda Error in executeMemcpy: 700 (an illegal memory access was encountered)
enqueue ended here
Average over 6 runs is 2.39664e-41 ms (host walltime is 0.0171033 ms, 99% percentile time is 4.52213e-41).
Parameter check failed at: engine.cpp::terminateCommonContext::192, condition: cudaEventDestroy(context.start) failure.
Parameter check failed at: engine.cpp::terminateCommonContext::197, condition: cudaEventDestroy(context.stop) failure.
runtime.cpp (31) - Cuda Error in free: 700 (an illegal memory access was encountered)
terminate called after throwing an instance of 'nvinfer1::CudaError'
  what():  std::exception
Abgebrochen (Speicherabzug geschrieben)

Is there anyone who can give me some hints?
Thanks in advance!

SunilJB · November 27, 2019, 7:01am

Hi,
Can you provide the following information so we can better help?
Provide details on the platforms you are using:
o Linux distro and version
o GPU type
o Nvidia driver version
o CUDA version
o CUDNN version
o Python version [if using python]
o Tensorflow version
o TensorRT version
o If Jetson, OS, hw versions

Also, if possible please share the script & model file to reproduce the issue.

Thanks

penggr2015 · November 27, 2019, 9:21am

Hi, I have solved this problem. But I met new problem using plugin layer.

--------------- Timing upsample(19)
Tactic 0 is the only option, timing skipped
Formats and tactics selection completed in 11.8396 seconds.
After reformat layers: 6 layers
Block size 3145728000
Block size 213811200
Block size 21483008
Block size 13363200
Total Activation Memory: 3394385408
Detected 1 input and 1 output network tensors.
supportsFormat=== type:0format0
type 0format 0
configureWithFormat:30 240 464
Data initialization and engine generation completed in 0.804268 seconds.
Calculating Maxima
Calibrating with batch 0
Tensor segmentation_type_1_30/BiasAdd is uniformly zero; network calibration failed.
Calibration completed in 20.7948 seconds.
[INT8 Quantization] INT8 Inference Tensor Scales: input_1 [0.00787594]
[INT8 Quantization] INT8 Inference Tensor Scales: (Unnamed Layer* 1) [Padding]_output [0.00787594]
[INT8 Quantization] INT8 Inference Tensor Scales: activation_1/Relu [0.000114801]
[INT8 Quantization] INT8 Inference Tensor Scales: max_pooling2d_1/MaxPool [0.000118674]
[INT8 Quantization] INT8 Inference Tensor Scales: segmentation_type_1_30/BiasAdd [9.38656e-10]
[INT8 Quantization] INT8 Inference Tensor Scales: upsample_HL_1804289383 [0.000114801]
[INT8 Quantization] INT8 Inference Tensor Scales: upsample [0.000114801]
Original: 20 layers
After dead-layer removal: 9 layers
Fusing convolution weights from conv0/convolution with scale conv0_bn/FusedBatchNorm_1
Fusing convolution weights from segmentation_type_1_30/convolution with scale segmentation_type_1_30/BiasAdd
After scale fusion: 7 layers
Fusing conv0/convolution with activation_1/Relu
After vertical fusions: 6 layers
After swap: 6 layers
After final dead-layer removal: 6 layers
After tensor merging: 6 layers
After concat removal: 6 layers
[INT8 Quantization] Writing Calibration Cache for calibrator: TRT-5101-EntropyCalibration
Configuring builder for Int8 Mode completed in 20.8078 seconds.
Graph construction and optimization completed in 20.8104 seconds.
supportsFormat=== type:0format0
supportsFormat=== type:1format0
supportsFormat=== type:1format1
supportsFormat=== type:1format2
supportsFormat=== type:3format0
supportsFormat=== type:0format0
supportsFormat=== type:1format0
supportsFormat=== type:1format1
supportsFormat=== type:1format2
supportsFormat=== type:3format0

--------------- Timing <reformat>(9)
../builder/cudnnBuilderUtils.cpp (253) - Cuda Error in findFastestTactic: 9 (invalid configuration argument)
../builder/cudnnBuilderUtils.cpp (253) - Cuda Error in findFastestTactic: 9 (invalid configuration argument)
could not build engine
Engine could not be created
Engine could not be created

penggr2015 · November 27, 2019, 9:33am

@SunilJB

Linux: Ubuntu 18.04
Platform: Nvidia Tegra Xavier
CUDA: 10.1
CUDNN: 7.5
python: 2.7
Tensorflow: 1.11
TensorRT: 5.15

I have uploaded the code and the model, please have a look.

Hope to get your feedback!
Thanks in advance.

penggr2015 · November 27, 2019, 1:43pm

Forgot to mention, that this error occurs when using int8 quantization…

SunilJB · November 29, 2019, 4:04am

Hi,

Code and model seems to be working with TRT 6.0 on desktop GPU.
Could you please try to run with TRT 6.0 version?

Thanks

penggr2015 · November 29, 2019, 8:41am

Hi, thank you for your reply.
Becasuse of some limitations, TensorRT6 is not available on my platform.
Since TensorRT6 supports ResizeBilinear operation, my plugin implementation makes no sense on TensorRT6.
Could you please check my code in TensorRT5 on your desktop GPU?
I would appreciate any feedback from you.
Thanks in advance!

Bests.

SunilJB · December 2, 2019, 4:02pm

Hi,

UFF parser doesn’t support ResizeBilinear op in TRT 6.
You have to create either a custom plugin or use ONNX parser for TRT optimization.

Also, JetPack 4.3 Developer Preview, which is packaged with TRT6 is available as a beta. https://developer.nvidia.com/jetpack-4_3_DP
Stay tuned for official production release of Jetpack 4.3.

Thanks

penggr2015 · December 3, 2019, 7:53pm

Hi,

I think you misunderstand my problem.

Custome plugin layer is finished and can recognize ResizeBilinear operation in model.

The thing is I can’t implement quantization using plugin layer, witout quantization it works properly, at least no error message.

And also TRT 6 is not available for my situation…

Topic		Replies	Views
getPluginCreator could not find plugin: EfficientNMS_TRT version: 1 error with C++ API but works fine with Python API TensorRT tensorrt	14	2940	January 11, 2024
Post-Training Quantization (PTQ) for semantic segmentation model running on Jetson Orin NX Jetson Orin NX tensorrt	24	283	March 26, 2025
Calibration failed: INTERNAL: Failed to build TensorRT engine (INT8 precision mode) in Jetson Xavier NX (16GB) Jetson Xavier NX tensorrt	9	758	April 12, 2023
TF-TRT issue Jetson TX2	26	3840	October 18, 2021
Unable to build model engine for INT8 yolov8m quantized using tensorrt model optimizer TensorRT jetson , deepstream	5	437	September 24, 2024
TensorFlow 1.11.0 wheel with JetPack 3.3 Jetson TX2	103	45426	November 13, 2019
The inference result based on transferred mode from tensorrt 4 with fcn8s is very terrible TensorRT	16	1745	October 12, 2021
TensorRT fails to build FasterRCNN GIE model with using INT8 TensorRT	28	9227	May 3, 2018
Runtime Error with NVIDIA TensorRT (Deep Learning) Jetson TX1	20	6790	July 27, 2017
[ERROR] Model has dynamic shape but no optimization profile specified. Aborted (core dumped) TAO Toolkit	30	2052	December 13, 2021

enqueue error

Related topics