I am working on a face recognition project by setting this pipeline:
face detection → face alignment → face recognition
Currently, I am facing issues related to face alignment, which requires landmarks objected from the face detection model (yolo)
How can we transfer the landmarks to user_meta in cpp/cu and get it in Python binding?
Please suggest some solution on how to add this feature.
Thank you,
PS: This is the code I have to extract landmarks from the Yolo-face model.
static std::vector<NvDsInferParseObjectInfo> decodeTensorYoloUM(const float* detection, const uint& outputSize, const uint& netW, const uint& netH,
const std::vector<float>& preclusterThreshold, NvDsUserMetaList *obj_user_meta_list)
{
std::vector<NvDsInferParseObjectInfo> binfo;
// adding User-meta information
// 'Something to see if obj user meta list exists';
// NvDsUserMetaList *obj_user_meta_list = g_new0(NvDsUserMetaList, 1);
// for face landmarks
std::vector<std::array<float, 10>> blmk;
for (uint b = 0; b < outputSize; ++b) {
float maxProb = detection[b * ARRAY_SIZE + 4];
int maxIndex = (int) detection[b * ARRAY_SIZE + 15];
if (maxProb < preclusterThreshold[maxIndex])
continue;
float bxc = detection[b * ARRAY_SIZE + 0];
float byc = detection[b * ARRAY_SIZE + 1];
float bw = detection[b * ARRAY_SIZE + 2];
float bh = detection[b * ARRAY_SIZE + 3];
float bx1 = bxc - bw / 2;
float by1 = byc - bh / 2;
float bx2 = bx1 + bw;
float by2 = by1 + bh;
addBBoxProposal(bx1, by1, bx2, by2, netW, netH, maxIndex, maxProb, binfo);
std::array<float, 10> face_landmarks;
face_landmarks[0] = detection[b * ARRAY_SIZE + 5];
face_landmarks[1] = detection[b * ARRAY_SIZE + 6];
face_landmarks[2] = detection[b * ARRAY_SIZE + 7];
face_landmarks[3] = detection[b * ARRAY_SIZE + 8];
face_landmarks[4] = detection[b * ARRAY_SIZE + 9];
face_landmarks[5] = detection[b * ARRAY_SIZE + 10];
face_landmarks[6] = detection[b * ARRAY_SIZE + 11];
face_landmarks[7] = detection[b * ARRAY_SIZE + 12];
face_landmarks[8] = detection[b * ARRAY_SIZE + 13];
face_landmarks[9] = detection[b * ARRAY_SIZE + 14];
blmk.push_back(face_landmarks);
}
// std::cout << "number of face with landmarks : " << blmk.size() << "/" << binfo.size() << std::endl;
// assert( binfo.size() == blmk.size());
for (uint m = 0; m < blmk.size(); ++m)
{
NvDsInferParseObjectInfo item = binfo[m];
std::array<float, 10> lmks = blmk[m];
NvDsUserMeta* user_meta = g_new0(NvDsUserMeta, 1);
user_meta->user_meta_data = set_metadata_ptr(lmks);
/*
std::cout << "BBox Cords: " << item.left << " " << item.top << " " << (item.left+item.width) << " " << (item.top+item.height) << " " << item.detectionConfidence << std::endl;
std::cout << "LNMs Cords: ";
for(auto i = 0; i < 10; ++i)
{
std::cout << lmks[i] << " ";
}
std::cout << std::endl;
*/
obj_user_meta_list = g_list_append(obj_user_meta_list, user_meta);
}
return binfo;
}
static bool
NvDsInferParseCustomYoloUM(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
{
if (outputLayersInfo.empty()) {
std::cerr << "ERROR: Could not find output layer in bbox parsing" << std::endl;
return false;
}
std::vector<NvDsInferParseObjectInfo> objects;
const NvDsInferLayerInfo& layer = outputLayersInfo[0];
const uint outputSize = layer.inferDims.d[0];
NvDsUserMetaList *obj_user_meta_list = g_new0(NvDsUserMetaList, 1);
std::vector<NvDsInferParseObjectInfo> outObjs =
decodeTensorYoloUM((const float*) (layer.buffer), outputSize, networkInfo.width,
networkInfo.height, detectionParams.perClassPreclusterThreshold, obj_user_meta_list);
objects.insert(objects.end(), outObjs.begin(), outObjs.end());
objectList = objects;
// related to usermeta
// Now add user metadata
// for (auto & obj : objectList) {
// Assuming landmarks are available for each object and are retrieved somehow.
// Example placeholder code for setting landmarks data:
// NvDsInferFaceLandmarks* landmarksData = new NvDsInferFaceLandmarks;
// NvDsUserMeta* userMeta = nvds_acquire_user_meta_from_pool(outObjs);
// if (userMeta) {
// userMeta->user_meta_data = (void*)landmarksData;
// userMeta->base_meta.meta_type = NVDSINFER_TLT_PGIE_USER_META;
// nvds_add_user_meta_to_obj(objMeta, userMeta);
// } else {
// delete landmarksData;
// }
// }
return true;
}
extern "C" bool
NvDsInferParseYoloUM(std::vector<NvDsInferLayerInfo> const& outputLayersInfo, NvDsInferNetworkInfo const& networkInfo,
NvDsInferParseDetectionParams const& detectionParams, std::vector<NvDsInferParseObjectInfo>& objectList)
{
return NvDsInferParseCustomYoloUM(outputLayersInfo, networkInfo, detectionParams, objectList);
}
CHECK_CUSTOM_PARSE_FUNC_PROTOTYPE(NvDsInferParseYoloUM);
Please provide complete information as applicable to your setup.
• Hardware Platform (Jetson / GPU) Jetson
• DeepStream Version 6.2
• JetPack Version (valid for Jetson only) 5.1.2
• TensorRT Version 8.2