Here is the TF to UFF translation:
import tensorflow as tf
import sys
from tensorflow.python.platform import gfile
from tensorflow.core.protobuf import saved_model_pb2
from tensorflow.python.util import compat
import uff
UFF_OUTPUT_FILENAME = 'model_tensorrt.uff'
#OUTPUT_NAMES = ["output_names"]
with tf.Session() as persisted_sess:
print("load graph")
with gfile.FastGFile("../rpg_public_dronet/model/model_tensorflow.pb",'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
persisted_sess.graph.as_default()
tf.import_graph_def(graph_def, name='')
writer = tf.summary.FileWriter("./tf_summary", graph=persisted_sess.graph)
# Print all operation names
#for op in persisted_sess.graph.get_operations():
# print(op)
import tensorrt as trt
from tensorrt.parsers import uffparser
G_LOGGER = trt.infer.ConsoleLogger(trt.infer.LogSeverity.INFO)
# Load your newly created Tensorflow frozen model and convert it to UFF
uff_model = uff.from_tensorflow_frozen_model("../rpg_public_dronet/model/model_tensorflow.pb", ["activation_8/Sigmoid", "dense_1/BiasAdd"], output_filename=UFF_OUTPUT_FILENAME)
uff.from_tensorflow(graphdef=frozen_graph,
output_filename=UFF_OUTPUT_FILENAME,
output_nodes=OUTPUT_NAMES,
text=True)
# Create a UFF parser to parse the UFF file created from your TF Frozen model
#parser = uffparser.create_uff_parser()
#parser.register_input("input_1", (200,200,1),0)
#parser.register_output("activation_8/Sigmoid")
and the engine creation and inference functions:
int inference(void* p_engine, void* p_context, float *input_img, float output_arr[NUM_OF_OUTPUTS])
{
/*
* Get an image buffer ready for inference and run the NN on it.
* The image is expected to be AFTER all preprocessing steps -
* croping, resizing, rescale and normalization (unless this is done by batchnorm).
*/
LOG("TRTLib: clearing output array\n");
memset(output_arr, 0, (sizeof(float) * NUM_OF_OUTPUTS));
LOG("TRTLib: assigning from input pointers\n");
ICudaEngine &engine = *((ICudaEngine*)p_engine);
IExecutionContext* context = (IExecutionContext*)p_context;
LOG("TRTLib: getting bindings from engine\n");
int batchSize = 1;
int nbBindings = engine.getNbBindings();
assert(nbBindings == TOTAL_BINDINGS);
std::vector<void*> buffers(nbBindings);
auto buffersSizes = calculateBindingBufferSizes(engine, nbBindings, batchSize);
int bindingIdxInput = 0;
for (int i = 0; i < nbBindings; ++i)
{
if (engine.bindingIsInput(i))
{
bindingIdxInput = i;
}
else
{
auto bufferSizesOutput = buffersSizes[i];
buffers[i] = safeCudaMalloc(bufferSizesOutput.first *
elementSizeTrt(bufferSizesOutput.second));
}
}
auto bufferSizesInput = buffersSizes[bindingIdxInput];
LOG("TRTLib: creating buffer for input \n");
buffers[bindingIdxInput] = createImageCudaBuffer(bufferSizesInput.first,
bufferSizesInput.second, input_img);
LOG("TRTLib: executing inference\n");
LOG("TRTLib: moving output from GPU to host\n");
int output_idx = 0;
for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
{
float output;
if (engine.bindingIsInput(bindingIdx))
continue;
auto bufferSizesOutput = buffersSizes[bindingIdx];
output = getOutputs(bufferSizesOutput.first, bufferSizesOutput.second,
buffers[bindingIdx], bindingIdx);
LOG("assigning output %f in array slot %d\n", output, output_idx);
output_arr[output_idx++] = output;
}
LOG("TRTLib: clean GPU mem\n");
CHECK(cudaFree(buffers[bindingIdxInput]));
for (int bindingIdx = 0; bindingIdx < nbBindings; ++bindingIdx)
if (!engine.bindingIsInput(bindingIdx))
CHECK(cudaFree(buffers[bindingIdx]));
LOG("TRTLib: DONE\n");
return 0;
}
int build_engine(std::string uff_path, uint8_t input_shape[2], void** out_engine, void** out_context)
{
/*
* This function will prepare a tensorRT engine, ready for inference jobs.
* It should be called only once per NN.
*
* @uff_path : Full path to .uff model file.
* Note that this is not completely flexible, as input/output
* size/names are hardcoded in the 'trtinference.h' file.
* @input_shape : Integer array for input image size. should be [Height, Width].
* Only grayscale images (single channel) are supported now.
*/
*out_engine = NULL;
*out_context = NULL;
LOG("TRTlib: %s\n", uff_path.c_str());
LOG("TRTlib: %u,%u\n", input_shape[0], input_shape[1]);
int maxBatchSize = 1;
auto parser = createUffParser();
INPUT_H = input_shape[0];
INPUT_W = input_shape[1];
/* Register tensorflow input */
parser->registerInput(INPUT_BINDING_NAME,
Dims3(INPUT_C, INPUT_H, INPUT_W),
UffInputOrder::kNCHW);
parser->registerOutput(OUTPUT_1_BINDING_NAME);
parser->registerOutput(OUTPUT_2_BINDING_NAME);
ICudaEngine* engine = loadModelAndCreateEngine(uff_path.c_str(), maxBatchSize, parser);
if (!engine) {
std::cout << "Failed to create engine" << std::endl;
return -1;
}
/* we dont need to keep the memory created by the parser */
parser->destroy();
IExecutionContext* context = engine->createExecutionContext();
*out_engine = (void*)engine;
*out_context = (void*)context;
return 0;
}
I will soon have the full source code on github, and will update here when it is up.
can you see any issue with what I put here?
BR
Sagiv