hi,
I found that using tensorrt for inference takes more time than using tensorflow directly on GPU for big model.
when i use tensorrt to infer a 720 video and it takes 600ms per frames. The Memory-Usage is 5185MiB/24478Mib.
when i use tensorflow directly on GPU to infer a 720 video and it takes 236ms per frames. The Memory-Usage is 23370MiB/24478Mib.
it sinces tensorrt did not make full use of memory?
#include "tveModelSR.h"
namespace TVE {
tveModelSR::tveModelSR(const std::string& tve_name, nvinfer1::DataType type,
int n, int c, int h, int w, int s):tveModelBase(tve_name, type, n, c, h, w), scale(s) {
// TODO Auto-generated constructor stub
use_times=0;
n_frames=0;
}
tveModelSR::~tveModelSR() {
// TODO Auto-generated destructor stub
printf("sr_model: times:%.2fms\n", use_times/n_frames);
}
int tveModelSR::execute(int batch_n, void* input_cuda_buffer, void* output_cuda_buffer, cudaStream_t stream) {
if(!is_init) {
log_ctx.append("ctx is not inited,");
return -1;
}
// set buffer
void* buffers[2];
int y_input_index=engine->getBindingIndex(info.input_name.c_str());
int y_output_index=engine->getBindingIndex(info.output_name.c_str());
buffers[y_input_index]=input_cuda_buffer;
buffers[y_output_index]=output_cuda_buffer;
// start Infer
auto time_start = std::chrono::high_resolution_clock::now();
if(!context->enqueue(batch_n, &buffers[0], stream, nullptr)) {
log_ctx.append("execute context fail,");
return -5;
}
cudaStreamSynchronize(stream);
// sum times
use_times += std::chrono::duration<float, std::milli>(std::chrono::high_resolution_clock::now() - time_start).count();
++n_frames;
return 0;
}
} /* namespace TVE */
The context is inited outside and the buffer, the stream is public variable outside.
I don’t know how to solve this problem.
PS: This model is a super resolution model and has 20 conv layers.