I have already run my tensorrt model on TRTIS(tensorrt inference server),According to localhost:8000/api/status , model status is ready.However, errors occur either using the officially provided image_client or a custom client. According to TRTIS logs, detailed errors are as follows.
I0102 04:50:27.117956 1 plan_bundle.cc:209] Creating instance rec_white_nbi_0_0_gpu0 on GPU 0 (6.1) using model.p
I0102 04:50:27.118370 1 logging.cc:49] Glob Size is 14264520 bytes.
I0102 04:50:27.126893 1 logging.cc:49] Added linear block of size 516710400
I0102 04:50:27.126914 1 logging.cc:49] Added linear block of size 499046400
I0102 04:50:27.126919 1 logging.cc:49] Added linear block of size 374284800
I0102 04:50:27.126925 1 logging.cc:49] Added linear block of size 311904256
I0102 04:50:27.126931 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.126936 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.126941 1 logging.cc:49] Added linear block of size 187142656
I0102 04:50:27.126946 1 logging.cc:49] Added linear block of size 786432
I0102 04:50:27.133022 1 logging.cc:49] Deserialize required 14981 microseconds.
I0102 04:50:27.138303 1 plan_bundle.cc:273] Created instance rec_white_nbi_0_0_gpu0 on GPU 0 (6.1) with stream pr
I0102 04:50:27.138639 1 plan_bundle.cc:209] Creating instance rec_white_nbi_0_1_gpu0 on GPU 0 (6.1) using model.p
I0102 04:50:27.138961 1 logging.cc:49] Glob Size is 14264520 bytes.
I0102 04:50:27.142679 1 logging.cc:49] Added linear block of size 516710400
I0102 04:50:27.142693 1 logging.cc:49] Added linear block of size 499046400
I0102 04:50:27.142698 1 logging.cc:49] Added linear block of size 374284800
I0102 04:50:27.142703 1 logging.cc:49] Added linear block of size 311904256
I0102 04:50:27.142709 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.142714 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.142720 1 logging.cc:49] Added linear block of size 187142656
I0102 04:50:27.142725 1 logging.cc:49] Added linear block of size 786432
I0102 04:50:27.145389 1 logging.cc:49] Deserialize required 6730 microseconds.
I0102 04:50:27.149259 1 plan_bundle.cc:273] Created instance rec_white_nbi_0_1_gpu0 on GPU 0 (6.1) with stream pr
I0102 04:50:27.149620 1 plan_bundle.cc:209] Creating instance rec_white_nbi_0_2_gpu0 on GPU 0 (6.1) using model.p
I0102 04:50:27.149962 1 logging.cc:49] Glob Size is 14264520 bytes.
I0102 04:50:27.153747 1 logging.cc:49] Added linear block of size 516710400
I0102 04:50:27.153764 1 logging.cc:49] Added linear block of size 499046400
I0102 04:50:27.153769 1 logging.cc:49] Added linear block of size 374284800
I0102 04:50:27.153776 1 logging.cc:49] Added linear block of size 311904256
I0102 04:50:27.153782 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.153790 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.153796 1 logging.cc:49] Added linear block of size 187142656
I0102 04:50:27.153802 1 logging.cc:49] Added linear block of size 786432
I0102 04:50:27.156526 1 logging.cc:49] Deserialize required 6887 microseconds.
I0102 04:50:27.160389 1 plan_bundle.cc:273] Created instance rec_white_nbi_0_2_gpu0 on GPU 0 (6.1) with stream pr
I0102 04:50:27.160721 1 plan_bundle.cc:209] Creating instance rec_white_nbi_0_3_gpu0 on GPU 0 (6.1) using model.p
I0102 04:50:27.161039 1 logging.cc:49] Glob Size is 14264520 bytes.
I0102 04:50:27.164722 1 logging.cc:49] Added linear block of size 516710400
I0102 04:50:27.164735 1 logging.cc:49] Added linear block of size 499046400
I0102 04:50:27.164743 1 logging.cc:49] Added linear block of size 374284800
I0102 04:50:27.164749 1 logging.cc:49] Added linear block of size 311904256
I0102 04:50:27.164756 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.164762 1 logging.cc:49] Added linear block of size 249523200
I0102 04:50:27.164767 1 logging.cc:49] Added linear block of size 187142656
I0102 04:50:27.164773 1 logging.cc:49] Added linear block of size 786432
I0102 04:50:27.167512 1 logging.cc:49] Deserialize required 6774 microseconds.
I0102 04:50:27.171345 1 plan_bundle.cc:273] Created instance rec_white_nbi_0_3_gpu0 on GPU 0 (6.1) with stream pr
I0102 04:50:27.171469 1 infer.cc:788] Starting runner thread 0 at nice 5…
I0102 04:50:27.171942 1 infer.cc:788] Starting runner thread 1 at nice 5…
I0102 04:50:27.172577 1 infer.cc:788] Starting runner thread 2 at nice 5…
I0102 04:50:27.172632 1 loader_harness.cc:86] Successfully loaded servable version {name: rec_white_nbi version:
I0102 04:50:27.17263docker psE0102 04:52:14.937376 1 logging.cc:43] cuda/cudaConvolutionLayer.cpp (163) - Cudnn E
E0102 04:52:14.937579 1 logging.cc:43] cuda/cudaConvolutionLayer.cpp (163) - Cudnn Error in execute: 3
E0102 04:52:45.577469 1 logging.cc:43] cuda/cudaConvolutionLayer.cpp (163) - Cudnn Error in execute: 3
E0102 04:52:45.577533 1 logging.cc:43] cuda/cudaConvolutionLayer.cpp (163) - Cudnn Error in execute: 3
The following code is custom client, when I invoke ctx->Run(&(results->back())) or ctx.GetAsyncRunResults(&(results->back()), request, true) to get inference results, Cudnn Error in execute: 3 error occur.
#include
#include
#include
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#include “request.h”
#include “model_config.pb.h”
namespace ni = nvidia::inferenceserver;
namespace nic = nvidia::inferenceserver::client;
enum ProtocolType
{
HTTP = 0;
GRPC = 1;
};
void Preprocess(const cv::Mat& img, std::vector<uint8_t>* inputdata)
{
cv::resize(img, img, cv::Size(227, 227), 0, 0);
img.convertTo(img, CV_32FC3);
size_t img_byte_size = img.total() * img.elemSize();
size_t pos = 0;
inputdata->resize(img_byte_size);
if(img.isContinuous())
{
memcpy(&((*input_data)[0]), img.datastart, img_byte_size);
}
else
{
size_t row_byte_size = img.cols * img.elemSize();
for(int r = 0; r < img.rows; ++r)
{
memcpy(&((*input_data)[pos]), img.ptr<uint8_t>(r), row_byte_size);
pos += row_byte_size;
}
}
if(pos != img_byte_size)
{
std::cerr
<< "unexpected total size of channels " << pos
<< ", expecting " << img_byte_size << std::endl;
exit(1);
}
};
void Infer(std::unique_ptrnic::InferContext& ctx, const size_t topk, const std::vector<std::vector<uint8_t> >& inputs_data, std::vector<std::vector<std::unique_ptrnic::InferContext::Result > >* results, const bool verbose = false)
{
const size_t batch_size = inputs_data.size();
nic::Error err(ni::RequestStatusCode::SUCCESS);
const auto& input = ctx->inputs()[0];
std::unique_ptr<nic::InferContext::Options> options;
err = nic::InferContext::Options::Create(&options);
if(!err.IsOk())
{
std::cerr << "failed initializing infer options: " << err << std::endl;
exit(1);
}
options->SetBatchSize(batch_size);
options->AddClassResult(ctx->outputs()[0], topk);
err = ctx->setRunOptions(*options);
if(!err.IsOk())
{
std::cerr << "failed initializing batch size: " << err << std::endl;
exit(1);
}
std::vector<std::shared_ptr<nic::InferContext::Request> > requests;
for(size_t idx = 0; idx < batch_size; idx++)
{
if(idx % batch_size == 0)
{
err = input->Reset();
if(!err.IsOk())
{
std::cerr << "failed resetting input: " << err << std::endl;
exit(1);
}
}
nic::Error err = input->SetRaw(inputs_data[idx]);
if(!err.IsOk())
{
std::cerr << "failed setting input: " << err << std::endl;
exit(1);
}
if((idx + 1) % batch_size == 0)
{
std::shared_ptr<nic::InferContext::Request> req;
err = ctx->AsyncRun(&req);
if(!err.IsOk())
{
std::cerr << "failed sending infer request: " << err << std::endl;
exit(1);
}
requests.emplace_back(std::move(req));
}
}
for(auto& request : requests)
{
results->emplace_back();
err = ctx->GetAsyncRunResults(&(results->back()), request, true);
if(!err.IsOk())
{
std::cerr << "failed receiving infer response: " << err << std::endl;
exit(1);
}
}
};
void FileToInputData(const std::string& filename, std::vector<uint8_t>* input_data)
{
std::ifstream file(filename);
std::vector data;
file >> std::noskipws;
std::copy(
std::istream_iterator(file), std::istream_iterator(),
std::back_inserter(data));
if(data.empty())
{
std::cerr << "error: unable to read image file " << filename << std::endl;
exit(1);
}
cv::Mat img = imdecode(cv::Mat(data), 1);
if(img.empty())
{
std::cerr << "error: unable to decode image " << filename << std::endl;
exit(1);
}
Preprocess(img, input_data);
};
void Postprocess(std::vector<std::unique_ptrnic::InferContext::Result >& results, const size_t batch_size)
{
if(results.size() != 1)
{
std::cerr << "expected 1 result, got " << results.size() MM std::endl;
exit(1);
}
const std::unique_ptr<nic::InferContext::Result>& result = results[0];
//std::vector<std::pair<size_t, std::string> > predictions;
for(size_t b = 0; b < batch_size; ++b)
{
size_t cnt = 0;
nic::Error err = result->GetClassCount(b, &cnt);
if(!err.IsOk())
{
std::cerr
<< "failed reading class count for batch "
<< b << ": " << err << std::endl;
exit(1);
}
for(size_t c = 0; c < cnt; ++c)
{
nic::InferContext::Result::ClassResult cls;
nic::Error err = result->GetClassAtCursor(b, &cls);
if(!err.IsOk())
{
std::cerr
<< "failed reading class for batch "
<< b << ": " << err << std::endl;
exit(1);
}
std::cout
<< cls.idx << " (\""
<< cls.label << " \") = " << cls.value << std::endl;
}
}
};
int main(int argc, char** argv)
{
bool verbose = false;
size_t topk = 2;
size_t batch_size = 1;
std::string model_name = “rec_white_nbi”;
int model_version = -1;
std::string url(“localhost:8001”);
ProtocolType protocol = ProtocolType::GRPC;
std::unique_ptr<nic::InferContext> ctx;
nic::Error err;
if(protocol == ProtocolType.HTTP)
{
err = nic::InferHttpContext::Create(&ctx, url, model_name, model_version, verbose);
}
else
{
err = nic::InferGrpcContext::Create(&ctx, url, model_name, model_version, verbose);
}
if(!err.IsOk())
{
std::cerr << "error: unable to create inference context: " << err << std::endl;
exit(1);
}
std::vector<std::vector<std::string> > batched_filenames;
std::vector<std::vector<uint8_t> > inputs_data;
inputs_data.emplace_back();
batched_filenames.emplace_back();
std::string filename = "colon/1.jpg";
FileToInputData(filename, &(inputs_data[0]));
batched_filenames.back().push_back(filename);
std::vector<std::vector<std::unique_ptr<nic::InferContext::Result> > > results;
Infer(ctx, topk, inputs_data, &results, verbose);
for(size_t idx = 0; idx < results.size(); ++idx)
{
Postprocess(results[dix], batch_size);
}
return 0;
}