Thanks!
Based on above understanding, i have made changes i the attached file. Could you please quickly review my changes in the attached file. The code is not compiled yet but i need your blessing to know if the flow is correct.
I am integrating VisionWorks + imageNet code (taken from GitHub - dusty-nv/jetson-inference: Hello AI World guide to deploying deep-learning inference networks and deep vision primitives with TensorRT and NVIDIA Jetson.. Just the inference part i.e imageNet.cpp and ./utils/cuda/)
#include <iostream>
#include <sstream>
#include <iomanip>
#include <memory>
#include <VX/vx.h>
#include <NVX/nvx_timer.hpp>
#include "OVX/FrameSourceOVX.hpp"
#include "OVX/RenderOVX.hpp"
#include "NVX/Application.hpp"
#include "OVX/UtilityOVX.hpp"
#include "imageNet.h"
/*
* DEFINES
*/
#define __ESC__ 27 // Graceful exit.
#define __SPACEBAR__ 32 // Pause
#define ERROR_CHECK_STATUS(s) { \
vx_status status_ = (s); \
if(status_ != VX_SUCCESS) { \
cout <<"ERROR: failed with status = ("<< status_ <<") at " << __FILE__ << " #" << __LINE__ <<endl; \
exit(1); \
} \
}
/*
* GLOBALS
*/
imageNet* net = NULL;
struct EventData
{
EventData(): alive(true), pause(false) {}
bool alive;
bool pause;
};
static void keyboardEventCallback(void* context, vx_char key, vx_uint32 /*x*/, vx_uint32 /*y*/)
{
EventData* eventData = static_cast<EventData*>(context);
if (key == __ESC__) // escape to exit
{
eventData->alive = false;
}
else if (key == __SPACEBAR__) // spacebar to pause
{
eventData->pause = !eventData->pause;
}
}
static void parseResolution(const std::string & resolution, ovxio::FrameSource::Parameters & config)
{
std::istringstream stream(resolution);
std::string item;
vx_uint32 * frameParams[] = { &config.frameWidth, &config.frameHeight };
vx_uint32 index = 0;
while (std::getline(stream, item, 'x'))
{
std::stringstream ss(item);
ss >> *frameParams[index++];
}
}
// Custom Node for Deep Vision Inference APIs
/*
* 1. Processing callback function
*/
vx_status imageClassification_kernel(vx_node node, const vx_reference *parameters, vx_uint32 num)
{
if (num != 4)
return VX_FAILURE;
vx_image frame = (vx_image)parameters[0];
vx_uint32 width = (vx_unit32)parameters[1];
vx_uint32 height = (vx_unit32)parameters[2];
vx_float32* confidence = (vx_float32*)parameters[3];
// Get valid region
vx_rectangle_t rect;
vxGetValidRegionImage(frame, &rect);
// Map VisionWorks data object into CUDA device memory
vx_map_id imgRGB_map_id;
vx_unit8* imgRGB_ptr;
vx_imagepatch_addressing_t imgRGB_addr;
vxMapImagePatch(frame, &rect, 0, &imgRGB_map_id, &imgRGB_addr, (void **)&imgRGB_ptr, VX_READ_AND_WRITE, NVX_MEMORY_TYPE_CUDA, 0);
const int img_class = net->Classify((float*)imgRGB_ptr, width, height, confidence);
if (img_class >= 0)
{
vxAddLogEntry((vx_reference)node, VX_SUCCESS,
"[image_classification] " confidence * 100.0f " class #" img_class " " net->GetClassDesc(img_class));
// FIXME: if something needs to be display, atleast for debugging.
}
vxUnmapArrayRange(frame);
return VX_SUCCESS;
}
/*
* 2. Validation callback function
*/
vx_status imageClassification_validate(vx_node node, const vx_reference parameters[],
vx_uint32 num, vx_meta_format metas[])
{
if ( num != 4 ) return VX_ERROR_INVALID_PARAMETERS;
vx_status status = VX_SUCCESS;
{
vx_image frame = (vx_image)parameters[0];
vx_unit32 expWidth = (vx_uint32)parameters[1];
vx_unit32 expHeight = (vx_uint32)parameters[2];
vx_df_image format = 0;
vx_unit32 width = 0;
vx_unit32 height = 0;
vxQueryImage(frame, VX_IMAGE_ATTRIBUTE_FORMAT, &format, sizeof(format));
if (format != VX_DF_IMAGE_RGB)
{
status = VX_ERROR_INVALID_FORMAT;
vxAddLogEntry((vx_reference)node, status,
"[image_classification] Invalid format for frame, it should be VX_DF_IMAGE_RGB");
}
vxQueryImage(frame, VX_IMAGE_ATTRIBUTE_WIDTH, &width, sizeof(width));
if (width != expWidth)
{
status = VX_ERROR_INVALID_DIMENSION;
vxAddLogEntry((vx_reference)node, status,
"[image_classification] Invalid width for frame, it should be 1280");
}
vxQueryImage(frame, VX_IMAGE_ATTRIBUTE_HEIGHT, &height, sizeof(height));
if (height != expHeight)
{
status = VX_ERROR_INVALID_DIMENSION;
vxAddLogEntry((vx_reference)node, status,
"[image_classification] Invalid height for frame, it should be 720");
}
}
return status;
}
/*
* 3. Kernerl registration
*/
enum {
// Library ID
USER_LIBRARY = 0x1,
// Kernel ID
USER_KERNEL_IMAGE_CLASSIFIER = VX_KERNEL_BASE(VX_ID_DEFAULT, USER_LIBRARY) + 0x0,
};
vx_char image_classification_name[] = "user.kernel.image_classification";
vx_status registerKeypointArraySortKernel(vx_context context)
{
vx_status status = VX_SUCCESS;
vx_uint32 num_params = 4;
vx_kernel kernel = vxAddUserKernel(context, image_classification_name, USER_KERNEL_IMAGE_CLASSIFIER,
imageClassification_kernel,
num_params,
imageClassification_validate,
NULL, // init
NULL // deinit
);
status = vxGetStatus((vx_reference)kernel);
if (status != VX_SUCCESS)
{
vxAddLogEntry((vx_reference)context, status, "Failed to create Image Classification Kernel");
return status;
}
status |= vxAddParameterToKernel(kernel, 0, VX_INPUT , VX_TYPE_IMAGE , VX_PARAMETER_STATE_REQUIRED); // frame
status |= vxAddParameterToKernel(kernel, 1, VX_INPUT, VX_TYPE_UINT32 , VX_PARAMETER_STATE_REQUIRED); // width
status |= vxAddParameterToKernel(kernel, 2, VX_INPUT , VX_TYPE_UINT32, VX_PARAMETER_STATE_REQUIRED); // height
status |= vxAddParameterToKernel(kernel, 3, VX_OUTPUT, VX_TYPE_FLOAT32 , VX_PARAMETER_STATE_REQUIRED); // confidence
if (status != VX_SUCCESS)
{
vxReleaseKernel(&kernel);
vxAddLogEntry((vx_reference)context, status, "Failed to initialize Image Classification Kernel parameters");
return VX_FAILURE;
}
status = vxFinalizeKernel(kernel);
if (status != VX_SUCCESS)
{
vxReleaseKernel(&kernel);
vxAddLogEntry((vx_reference)context, status, "Failed to finalize Image Classification Kernel");
return VX_FAILURE;
}
return status;
}
/*
* 4. User custom node factory
*/
vx_node imageClassifierNode(vx_graph graph, vx_image frame, vx_uint32 width, vx_uint32 height, vx_float32* confidence)
{
vx_node node = NULL;
vx_context context = vxGetContext((vx_reference)graph);
vx_kernel kernel = vxGetKernelByEnum(context, USER_KERNEL_IMAGE_CLASSIFIER);
if (vxGetStatus((vx_reference)kernel) == VX_SUCCESS)
{
node = vxCreateGenericNode(graph, kernel);
vxReleaseKernel(&kernel);
if (vxGetStatus((vx_reference)node) == VX_SUCCESS)
{
vxSetParameterByIndex(node, 0, (vx_reference)frame);
vxSetParameterByIndex(node, 1, (vx_reference)width);
vxSetParameterByIndex(node, 2, (vx_reference)height);
vxSetParameterByIndex(node, 3, (vx_reference)confidence);
}
}
return node;
}
/*
* main - Application entry point
*/
int main(int argc, char** argv)
{
/*
* Set the application configuration parameters.
*/
vx_size max_keypoint_count = 1000; // maximum number of keypoints to track FIXME
vx_float32 strength_threshold = 0.0005f; // minimum corner strength to keep a corner
vx_float32 harris_min_distance = 5.0f; // radial L2 distance for non-max suppression
vx_float32 harris_sensitivity = 0.04f; // multiplier k in det(A) - k * trace(A)^2
vx_int32 harris_gradient_size = 3; // window size for gradient computation
vx_int32 harris_block_size = 3; // block window size for Harris corner score
vx_bool fast_nonmax_suppression = vx_true_e; // Avoid multiple interest points in adjacent location.
vx_uint32 lk_pyramid_levels = 4; // number of pyramid levels for optical flow
vx_float32 lk_pyramid_scale = VX_SCALE_PYRAMID_HALF; // pyramid levels scale by factor of two
vx_enum lk_termination = VX_TERM_CRITERIA_BOTH; // iteration termination criteria (eps & iterations)
vx_float32 lk_epsilon = 0.01f; // convergence criterion
vx_uint32 lk_num_iterations = 5; // maximum number of iterations
vx_bool lk_use_initial_estimate = vx_false_e; // don't use initial estimate
vx_uint32 lk_window_dimension = 10; // window size for evaluation
vx_float32 confidence = 0.0f
nvxio::Application &app = nvxio::Application::get();
ovxio::printVersionInfo();
ovxio::FrameSource::Parameters config;
/*
* Default camera resolution and acceptable to TensorRT too.
*/
config.frameWidth = 1280;
config.frameHeight = 720;
/*
* Parse command line arguments
*
* Default camera is the one onboard, else use following method to give index on other CSI camera
* https://devtalk.nvidia.com/default/topic/995818/convert-nvbuffer-to-vx_image/
*/
std::string resolution = "1280x720", input = "device:///nvcamera";
app.setDescription("This application captures frames from NVIDIA GStreamer camera");
app.addOption('r', "resolution", "Input frame resolution", nvxio::OptionHandler::oneOf(&resolution,
{ "2592x1944", "2592x1458", "1280x720", "1920x1080" }));
app.addOption('f', "fps", "Frames per second", nvxio::OptionHandler::unsignedInteger(&config.fps,
nvxio::ranges::atLeast(10u) & nvxio::ranges::atMost(120u)));
app.init(argc, argv);
parseResolution(resolution, config);
/*
* Create ImageNet
*/
net = imageNet::Create(argc, argv);
if (!net) {
cout << "imagenet: failed to initialize imagenet\n";
return 0;
}"
/*
* Create OpenVX context
*/
ovxio::ContextGuard context;
/*
* Messages generated by the OpenVX framework will be processed by ovxio::stdoutLogCallback
*/
vxRegisterLogCallback(context, &ovxio::stdoutLogCallback, vx_false_e);
vxAddLogEntry((vx_reference) context, VX_SUCCESS, "Hello MelloVision -- Queues Eliminator\n");
/*
* Create a Frame Source
*/
std::unique_ptr<ovxio::FrameSource> source(ovxio::createDefaultFrameSource(context, input));
if (!source)
{
std::cout << "Error: cannot open source!" << std::endl;
return nvxio::Application::APP_EXIT_CODE_NO_RESOURCE;
}
if (!source->setConfiguration(config))
{
std::cout << "Error: cannot setup configuration the framesource!" << std::endl;
return nvxio::Application::APP_EXIT_CODE_INVALID_VALUE;
}
if (!source->open())
{
std::cout << "Error: cannot open source!" << std::endl;
return nvxio::Application::APP_EXIT_CODE_NO_RESOURCE;
}
config = source->getConfiguration();
/*
* Create a Render
*/
std::unique_ptr<ovxio::Render> render(ovxio::createDefaultRender(
context, "NVIDIA GStreamer Camera Capture Sample", config.frameWidth, config.frameHeight));
if (!render)
{
std::cout << "Error: Cannot open default render!" << std::endl;
return nvxio::Application::APP_EXIT_CODE_NO_RENDER;
}
EventData eventData;
render->setOnKeyboardEventCallback(keyboardEventCallback, &eventData);
vx_image inputRGB = vxCreateImage(context, config.frameWidth,
config.frameHeight, config.format);
NVXIO_CHECK_REFERENCE(inputRGB);
ovxio::Render::TextBoxStyle style = {{255,255,255,255}, {0,0,0,127}, {10,10}};
nvx::Timer totalTimer;
totalTimer.tic();
/*
* User Custom node
*/
ERROR_CHECK_STATUS(registerKeypointArraySortKernel(context));
/*
* Tracking: Data Objects
* ----------------------
*/
vx_image inputGray = vxCreateImage(context, config.frameWidth,
config.frameHeight, config.format);
NVXIO_CHECK_REFERENCE(inputGray);
// Image pyramids for two successive frames (2 slots delay object)
vx_pyramid pyramid_exemplar = vxCreatePyramid(context, lk_pyramid_levels,
lk_pyramid_scale, config.frameWidth, config.frameHeight,
config.format);
NVXIO_CHECK_REFERENCE(pyramid_exemplar);
vx_delay pyramid_delay = vxCreateDelay(context, (vx_reference)pyramid_exemplar, 2);
NVXIO_CHECK_REFERENCE(pyramid_delay);
vxReleasePyramid(&pyramid_exemplar);
// Tracked points need to be stored for two successive frames (2 slot delay object)
vx_array keypoint_examplar = vxCreateArray(context, VX_TYPE_KEYPOINT, max_keypoint_count);
NVXIO_CHECK_REFERENCE(keypoint_examplar);
vx_delay keypoint_delay = vxCreateDelay(context, (vx_reference)keypoint_exemplar), 2;
NVXIO_CHECK_REFERENCE(keypoint_delay);
vxReleaseArray(&keypoint_exemplar);
/*
* 3. Tracking: Node Creation
* --------------------------
*/
//Create the graph (necessary for creating virtual objects)
vx_graph graph = vxCreateGraph(context);
NVXIO_CHECK_REFERENCE(graph);
// RGB to Y conversion nodes
NVXIO_CHECK_REFERENCE(vxColorConvertNode(graph, inputRGB, inputGray));
// Pyramid node
vx_node pyr_node = vxGaussianPyramidNode(graph, inputGray, (vx_pyramid)vxGetReferenceFromDelay(pyramid_delay));
NVXIO_CHECK_REFERENCE(pyr_node);
// Lucas-Kanade optical flow node, previous keypoints as 'new points estimates'
NVXIO_CHECK_REFERENCE(vxOpticalFlowPyrLKNode(graph,
(vx_pyramid)vxGetReferenceFromDelay(pyramid_delay, -1), // Previous pyramid
(vx_pyramid)vxGetReferenceFromDelay(pyramid_delay, 0), // Current pyramid
(vx_array)vxGetReferenceFromDelay(keypoint_delay, -1), // Previous keypoints
(vx_array)vxGetReferenceFromDelay(keypoint_delay, -1), // New keypoints estimate
(vx_array)vxGetReferenceFromDelay(keypoint_delay, 0), // New keypoints
VX_TERM_CRITERIA_BOTH, epsilon, num_iterations,
use_initial_estimate, lk_window_dimension));
// Image Classifier node
vx_node classifier = imageClassifierNode(graph, inputRGB, config.frameWidth,
config.frameHeight, &confidence);
NVXIO_CHECK_REFERENCE(classifier);
// Graph verification
status = vxVerifyGraph(graph);
if (status != VX_SUCCESS)
{
vxAddLogEntry((vx_reference)context, status, "Failed to verify graph of object tracking");
exit(1);
}
/*
* 4. Detection: First frame processing
* ------------------------------------
*/
// RGB to Y conversion
ERROR_CHECK_STATUS(vxuColorConvert(context, inputRGB, inputGray));
#define REALTIME_DETECT 1
if (REALTIME_DETECT)
{
/*
* Keypoint detection: FAST corner
*
* fast_nonmax_suppression: For details http://docs.opencv.org/3.0-beta/doc/py_tutorials/py_feature2d/py_fast/py_fast.html
*/
vx_scalar strength_thresh = vxCreateScalar(context, VX_TYPE_FLOAT32, &strength_threshold);
NVXIO_CHECK_REFERENCE(strength_thresh);
vxuFastCorners(context, inputGray, strength_thresh, fast_nonmax_suppression,
(vx_array)vxGetReferenceFromDelay(keypoint_delay, -1), 0);
}
else
{
vx_scalar strength_thresh = vxCreateScalar(context, VX_TYPE_FLOAT32, &strength_threshold);
vx_scalar min_distance = vxCreateScalar(context, VX_TYPE_FLOAT32, &harris_min_distance);
vx_scalar sensitivity = vxCreateScalar(context, VX_TYPE_FLOAT32, &harris_sensitivity);
NVXIO_CHECK_REFERENCE(strength_thresh);
NVXIO_CHECK_REFERENCE(min_distance);
NVXIO_CHECK_REFERENCE(sensitivity);
// Keypoint detection: Harris corner
vxuHarrisCorners(context, inputGray, strength_thresh, min_distance,
sensitivity, harris_gradient_size, harris_block_size,
(vx_array)vxGetReferenceFromDelay(keypoint_delay, -1), 0);
}
// Create the first pyramid needed for optical flow
vxuGaussianPyramid(context, inputGray, (vx_pyramid)vxGetReferenceFromDelay(pyramid_delay, -1));
/*
* Processing Loop
*/
while (eventData.alive)
{
ovxio::FrameSource::FrameStatus status = ovxio::FrameSource::OK;
if (!eventData.pause)
{
status = source->fetch(inputRGB);
}
switch(status)
{
case ovxio::FrameSource::OK:
{
void *ptr = NULL; // Null means 'map'
vx_imagepatch_addressing_t addr; // The memory layout will be returned here
vx_rectangle_t rect = {0, 0, config.frameWidth, config.frameHeight};
double total_ms = totalTimer.toc();
totalTimer.tic();
std::ostringstream txt;
txt << std::fixed << std::setprecision(1);
txt << "Camera mode: " << config.frameWidth << 'x' << config.frameHeight << ' ' << config.fps << " FPS" << std::endl;
txt << "Algorithm: " << "No Processing" << std::endl;
txt << "Display: " << total_ms << " ms / " << 1000.0 / total_ms << " FPS" << std::endl;
txt << std::setprecision(6);
txt.unsetf(std::ios_base::floatfield);
txt << "FRAME RATE IS NOT CONSTRAINED" << std::endl;
//std::cout << txt.str();
txt << "Space - pause/resume" << std::endl;
txt << "Esc - close the demo";
ERROR_CHECK_STATUS(vxAccessImagePatch(inputRGB, &rect, 0, &addr, &ptr, VX_READ_AND_WRITE));
ERROR_CHECK_STATUS(vxCommitImagePatch(inoutRGB, &rect, 0, &addr, devptr));
// Process graph
vxProcessGraph(graph);
// 'Age' delay object for the next frame processing
vxAgeDelay(pyramid_delay);
vxAgeDelay(keypoint_delay);
render->putImage(inputRGB);
//render->putTextViewport(txt.str(), style);
if (!render->flush())
eventData.alive = false;
}
break;
case ovxio::FrameSource::TIMEOUT:
{
// Do nothing
}
break;
case ovxio::FrameSource::CLOSED:
eventData.alive = false;
break;
}
}
//
// Release all objects
//
ERROR_CHECK_STATUS(vxReleaseImage(&inputRGB));
ERROR_CHECK_STATUS(vxReleaseImage(&inputGray));
ERROR_CHECK_STATUS(vxReleaseGraph(&graph));
ERROR_CHECK_STATUS(vxReleaseImage(&inputGray));
ERROR_CHECK_STATUS(vxReleaseDelay(&pyramid_delay));
ERROR_CHECK_STATUS(vxReleaseDelay(&keypoint_delay));
ERROR_CHECK_STATUS(vxReleaseScalar(&strength_thresh));
ERROR_CHECK_STATUS(vxReleaseScalar(&min_distance));
ERROR_CHECK_STATUS(vxReleaseScalar(&sensitivity));
ERROR_CHECK_STATUS(vxReleaseScalar(&epsilon));
ERROR_CHECK_STATUS(vxReleaseScalar(&num_iterations));
ERROR_CHECK_STATUS(vxReleaseScalar(&use_initial_estimate));
ERROR_CHECK_STATUS(vxReleaseContext(&context));
return nvxio::Application::APP_EXIT_CODE_SUCCESS;
}