I developed 2 distinct models, for 2 use cases, to analyzed some vibration patterns: one of them when system is turn on and second when system is shut down (so there are no any vibration detected )
The entire training process uses TensorFlow 2.7.0 (an auto encoder in python) to create .h5 models, which are converted to .onnx models files and then to .engine files for the Jetson platform (Jetson AGX Xavier CUDA ).
Jetson AGX Xavier specs:
cuda: 11.4.315
cuDNN: 8.6.0
tensorRT: 8.5.2.2
jetpack: 5.1.3
python3 -c “import tensorflow as tf; print(‘TensorFlow version:’, tf.version)”
TensorFlow version: 2.11.0
Auto encoder trainig script in python ( sample) :
input_img = tf.keras.layers.Input(shape=(2000, lines))
# Encoder
x = tf.keras.layers.Conv1D(12, 128, padding='same')(input_img)
x = tf.keras.layers.MaxPooling1D(4)(x) # Downsample: 2000 -> 500
x = tf.keras.layers.Conv1D(12, 64, padding='same')(x)
x = tf.keras.layers.MaxPooling1D(2)(x) # Downsample: 500 -> 250
x = tf.keras.layers.Conv1D(12, 16, padding='same')(x)
x = tf.keras.layers.MaxPooling1D(2)(x) # Downsample: 250 -> 125
# Bottleneck
x = tf.keras.layers.Flatten()(x)
x = tf.keras.layers.Dense(self.__config['MODEL']['ENCODED_STATE_SIZE'])(x)
# Decoder
x = tf.keras.layers.Dense(125 * 12)(x) # Expand to match last encoder feature size
x = tf.keras.layers.Reshape((125, 12))(x)
x = tf.keras.layers.UpSampling1D(2)(x) # Upsample: 125 -> 250
x = tf.keras.layers.Conv1D(12, 16, padding='same')(x)
x = tf.keras.layers.UpSampling1D(2)(x) # Upsample: 250 -> 500
x = tf.keras.layers.Conv1D(12, 64, padding='same')(x)
x = tf.keras.layers.UpSampling1D(4)(x) # Upsample: 500 -> 2000
x = tf.keras.layers.Conv1D(lines, 128, padding='same')(x) # Correct Final Layer
# Model definition
self.__model = tf.keras.models.Model(input_img, x)
It doesn’t matter which model I use, inference result values are the SAME, exactly the same values, as if the neural network learned nothing…
You can see below 2 comparative charts with the inference values
Don’t assume that the data might be corrupted, I have collected enough data to train for both cases and I’ve checked their validity
The confusing part is that inference works in python, using TensorFlow 2.7.0 with GPU, an Ubuntu Focal x86_64…I mean, I saw different values between 2 charts
In Jetson I’ve made a py script to convert .h5 model file into .onnx and then into .engine format:
import tf2onnx
import tensorflow as tf
import argparse
import subprocess
def convert_h5_to_onnx(h5_model_path, onnx_model_path):
print("Converting .h5 model to ONNX...")
model = tf.keras.models.load_model(h5_model_path)
model_proto, _ = tf2onnx.convert.from_keras(model, opset=13)
with open(onnx_model_path, "wb") as f:
f.write(model_proto.SerializeToString())
print(f"ONNX model saved at {onnx_model_path}")
def convert_onnx_to_trt(onnx_model_path, engine_model_path, trt_precision_mode):
print("Converting ONNX model to TensorRT Engine...")
fp_precision_flag = '--fp16' if trt_precision_mode.upper() == 'FP16' else ''
trtexec_path = "/usr/src/tensorrt/bin/trtexec"
command = f"{trtexec_path} --onnx={onnx_model_path} --saveEngine={engine_model_path} {fp_precision_flag}"
process = subprocess.run(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
if process.returncode != 0:
print(f"Error in converting to TensorRT engine:\n{process.stderr.decode('utf-8')}")
else:
print(f"TensorRT engine saved at {engine_model_path}")
# Main
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Convert a .h5 model to ONNX and TensorRT engine format")
parser.add_argument("--h5_model_path", type=str, required=True, help="Path to the .h5 model file")
parser.add_argument("--onnx_model_path", type=str, required=True, help="Path to save the converted ONNX model")
parser.add_argument("--engine_model_path", type=str, required=True, help="Path to save the converted TensorRT engine")
parser.add_argument("--trt_precision_mode", type=str, choices=['FP32', 'FP16'], default="FP16", help="Precision mode for TensorRT engine (FP32 or FP16)")
args = parser.parse_args()
convert_h5_to_onnx(args.h5_model_path, args.onnx_model_path)
convert_onnx_to_trt(args.onnx_model_path, args.engine_model_path, args.trt_precision_mode)
“RunInference” is my C/C++ inference function using TensorRT ( as input data , I used FFT s of the raw values )
void RunInference(ICudaEngine* engine, IExecutionContext* context, int input_index, int output_index, kiss_fft_cpx* x_fft, kiss_fft_cpx* y_fft, kiss_fft_cpx* z_fft, float* predicted_output, int g_code, const char* clientName) {
int batchSize = 1;
int input_size = batchSize * 2000 * 3 * sizeof(float); // [1, 2000, 3]
int output_size = batchSize * 3 * sizeof(float); // [1, 3]
// Prepare normalized input data and set DC component to zero
float input_data[2000 * 3];
const int MN = 4000;
for (int i = 0; i < 2000; i++) {
input_data[i * 3 + 0] = sqrt(x_fft[i].r * x_fft[i].r + x_fft[i].i * x_fft[i].i) / MN;
input_data[i * 3 + 1] = sqrt(y_fft[i].r * y_fft[i].r + y_fft[i].i * y_fft[i].i) / MN;
input_data[i * 3 + 2] = sqrt(z_fft[i].r * z_fft[i].r + z_fft[i].i * z_fft[i].i) / MN;
}
// Set DC component to zero
input_data[0] = 0; // X-axis
input_data[1] = 0; // Y-axis
input_data[2] = 0; // Z-axis
////Allocate GPU buffers for input and output
void* buffers[2];
write_log(LOG_DEBUG, "RunInference for '%s' - input_index = %d, output_index = %d", clientName, input_index, output_index);
if (cudaMalloc(&buffers[input_index], input_size) != cudaSuccess) {
write_log(LOG_ERROR, "RunInference for '%s' - Failed to allocate memory for input buffer", clientName);
return;
}
if (cudaMalloc(&buffers[output_index], output_size) != cudaSuccess) {
write_log(LOG_ERROR, "RunInference for '%s' - Failed to allocate memory for output buffer", clientName);
cudaFree(buffers[input_index]);
return;
}
if (cudaMemset(buffers[input_index], 0, input_size) != cudaSuccess) {
write_log(LOG_ERROR, "RunInference for '%s' - Failed to memset input buffer to zero", clientName);
return;
}
if (cudaMemset(buffers[output_index], 0, output_size) != cudaSuccess) {
write_log(LOG_ERROR, "RunInference for '%s' - Failed to memset output buffer to zero", clientName);
return;
}
///////////////////
// Copy the input data to the GPU
cudaMemcpy(buffers[input_index], input_data, input_size, cudaMemcpyHostToDevice);
// Launch inference
cudaStream_t stream;
cudaStreamCreate(&stream);
context->enqueueV2(buffers, stream, nullptr);
cudaStreamSynchronize(stream);
// Copy the output data from GPU to CPU
cudaMemcpy(predicted_output, buffers[output_index], output_size, cudaMemcpyDeviceToHost);
// Free GPU memory
cudaFree(buffers[input_index]);
cudaFree(buffers[output_index]);
cudaStreamDestroy(stream);
}
This is how I load one model in app and how I call inference function:
IRuntime* runtime = createInferRuntime(gLogger);
if (!runtime) {
write_log(LOG_ERROR, "client_handler: Failed to create runtime for client %s", client.ClientName);
return (void*)-1;
}
std::vector<char> engine_data = loadEngine(client.ModelPath, client.ClientName);
ICudaEngine* engine = runtime->deserializeCudaEngine(engine_data.data(), engine_data.size(), nullptr);
if (!engine) {
write_log(LOG_ERROR, "client_handler: Failed to create engine for thread %s", client.ClientName);
return (void*)-1;
}
IExecutionContext* context = engine->createExecutionContext();
if (!context) {
write_log(LOG_ERROR, "client_handler: Failed to create execution context for thread %s", client.ClientName);
engine->destroy();
return (void*)-1;
}
int input_index = engine->getBindingIndex(client.ModelInputBindingName) ;//get from config file
int output_index = engine->getBindingIndex(client.ModelOutputBindingName); //get from config file
RunInference(engine, context, input_index, output_index, x_fft, y_fft, z_fft, predicted_output, client.G_code, client.ClientName);
// Synchronize the GPU to ensure all operations are completed
cudaDeviceSynchronize();
// Check for CUDA errors after synchronization
cudaError_t err = cudaGetLastError();
if (err != cudaSuccess) {
write_log(LOG_ERROR, "CUDA error after synchronization in thread '%s': %s", client.ClientName, cudaGetErrorString(err));
} else {
write_log(LOG_INFO, "GPU synchronized successfully for thread '%s'", client.ClientName);
}
context->destroy();
engine->destroy();
runtime->destroy();
I want to point out that the vibrations are detected by the application, but I don’t understand why the range of values doesn’t change depending on the trained model from the two scenarios. I suspect the problem might be with the model conversion or the inference process / function in TensorRT using C/C++.
Do you have any suggestions?
