Please provide the following information when requesting support.
• Hardware
nvidia GPU (RTX 3060 Ti 12Gb)
• Network Type
Mask-RCNN
• TLT Version
[docker image] nvcr.io/nvidia/tlt-streamanalytics v3.0-dp-py3
• Training spec file
seed: 123
use_amp: False
warmup_steps: 1000
checkpoint: "/workspace/tlt-experiments/maskrcnn_copecPureoPtz/pretrained_resnet50/tlt_instance_segmentation_vresnet50/resnet50.hdf5"
learning_rate_steps: "[10000, 15000, 20000]"
learning_rate_decay_levels: "[0.1, 0.02, 0.01]"
total_steps: 25000
train_batch_size: 2
eval_batch_size: 4
num_steps_per_eval: 5000
momentum: 0.9
l2_weight_decay: 0.0001
warmup_learning_rate: 0.0001
init_learning_rate: 0.01
data_config{
image_size: "(1920,1024)"
augment_input_data: True
eval_samples: 20
training_file_pattern: "/workspace/tlt-experiments/data_copecPureoPtz/train*.tfrecord"
validation_file_pattern: "/workspace/tlt-experiments/data_copecPureoPtz/val*.tfrecord"
val_json_file: "/workspace/tlt-experiments/data_copecPureoPtz/raw-data/eac55e46-95e5-4c00-8ea1-7e13c57ccdbc_validation_coco.json"
# dataset specific parameters
num_classes: 6
skip_crowd_during_training: True
}
maskrcnn_config {
nlayers: 50
arch: "resnet"
freeze_bn: True
freeze_blocks: "[0,1]"
gt_mask_size: 112
# Region Proposal Network
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_min_size: 0.
# Proposal layer.
batch_size_per_im: 512
fg_fraction: 0.25
fg_thresh: 0.5
bg_thresh_hi: 0.5
bg_thresh_lo: 0.
# Faster-RCNN heads.
fast_rcnn_mlp_head_dim: 1024
bbox_reg_weights: "(10., 10., 5., 5.)"
# Mask-RCNN heads.
include_mask: True
mrcnn_resolution: 28
# training
train_rpn_pre_nms_topn: 2000
train_rpn_post_nms_topn: 1000
train_rpn_nms_threshold: 0.7
# evaluation
test_detections_per_image: 100
test_nms: 0.5
test_rpn_pre_nms_topn: 1000
test_rpn_post_nms_topn: 1000
test_rpn_nms_thresh: 0.7
# model architecture
min_level: 2
max_level: 6
num_scales: 1
aspect_ratios: "[(1.0, 1.0), (1.4, 0.7), (0.7, 1.4)]"
anchor_scale: 8
# localization loss
rpn_box_loss_weight: 1.0
fast_rcnn_box_loss_weight: 1.0
mrcnn_weight_loss_mask: 1.0
}
• Model
!wget -O $USER_EXPERIMENT_DIR/pretrained_resnet50/tlt_instance_segmentation_vresnet50/resnet50.hdf5 'https://api.ngc.nvidia.com/v2/models/nvidia/tao/pretrained_instance_segmentation/versions/resnet50/files/resnet50.hdf5'
• Dataset Info
The dataset is a custom 100 Img dataset.
It has 3 subsets.
training (70 imgs)
testing (20 imgs)
validation (10 imgs)
All images are jpg, (3x1920x1080)
the dataset is a COCO segmentation dataset.
cocolabels.txt:
truck-tank-cap
truck-front
truck-tank-cover
station-arm
truck-tank
• TFrecords script
Is almost the same as the original, but ignores the captions
# Copyright 2017 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Convert raw COCO dataset to TFRecord for object_detection.
Example usage:
python create_coco_tf_record.py --logtostderr \
--train_image_dir="${TRAIN_IMAGE_DIR}" \
--val_image_dir="${VAL_IMAGE_DIR}" \
--test_image_dir="${TEST_IMAGE_DIR}" \
--train_annotations_file="${TRAIN_ANNOTATIONS_FILE}" \
--val_annotations_file="${VAL_ANNOTATIONS_FILE}" \
--testdev_annotations_file="${TESTDEV_ANNOTATIONS_FILE}" \
--output_dir="${OUTPUT_DIR}"
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import hashlib
import io
import json
import multiprocessing
import os
from absl import app
from absl import flags
import numpy as np
import PIL.Image
from pycocotools import mask
from research.object_detection.utils import dataset_util
from research.object_detection.utils import label_map_util
import tensorflow as tf
flags.DEFINE_boolean('include_masks', False,
'Whether to include instance segmentations masks '
'(PNG encoded) in the result. default: False.')
flags.DEFINE_string('train_image_dir', '', 'Training image directory.')
flags.DEFINE_string('val_image_dir', '', 'Validation image directory.')
flags.DEFINE_string('test_image_dir', '', 'Test image directory.')
flags.DEFINE_string('train_object_annotations_file', '', '')
flags.DEFINE_string('val_object_annotations_file', '', '')
flags.DEFINE_string('train_caption_annotations_file', '', '')
flags.DEFINE_string('val_caption_annotations_file', '', '')
flags.DEFINE_string('testdev_annotations_file', '',
'Test-dev annotations JSON file.')
flags.DEFINE_string('output_dir', '/tmp/', 'Output data directory.')
FLAGS = flags.FLAGS
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
def create_tf_example(image,
bbox_annotations,
# caption_annotations,
image_dir,
category_index,
include_masks=False):
"""Converts image and annotations to a tf.Example proto.
Args:
image: dict with keys:
[u'license', u'file_name', u'coco_url', u'height', u'width',
u'date_captured', u'flickr_url', u'id']
bbox_annotations:
list of dicts with keys:
[u'segmentation', u'area', u'iscrowd', u'image_id',
u'bbox', u'category_id', u'id']
Notice that bounding box coordinates in the official COCO dataset are
given as [x, y, width, height] tuples using absolute coordinates where
x, y represent the top-left (0-indexed) corner. This function converts
to the format expected by the Tensorflow Object Detection API (which is
which is [ymin, xmin, ymax, xmax] with coordinates normalized relative
to image size).
image_dir: directory containing the image files.
category_index: a dict containing COCO category information keyed
by the 'id' field of each category. See the
label_map_util.create_category_index function.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
Returns:
example: The converted tf.Example
num_annotations_skipped: Number of (invalid) annotations that were ignored.
Raises:
ValueError: if the image pointed to by data['filename'] is not a valid JPEG
"""
image_height = image['height']
image_width = image['width']
filename = image['file_name']
image_id = image['id']
full_path = os.path.join(image_dir, filename)
with tf.io.gfile.GFile(full_path, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
image = PIL.Image.open(encoded_jpg_io)
key = hashlib.sha256(encoded_jpg).hexdigest()
xmin = []
xmax = []
ymin = []
ymax = []
is_crowd = []
category_names = []
category_ids = []
area = []
encoded_mask_png = []
num_annotations_skipped = 0
for object_annotations in bbox_annotations:
(x, y, width, height) = tuple(object_annotations['bbox'])
if width <= 0 or height <= 0:
num_annotations_skipped += 1
continue
if x + width > image_width or y + height > image_height:
num_annotations_skipped += 1
continue
xmin.append(float(x) / image_width)
xmax.append(float(x + width) / image_width)
ymin.append(float(y) / image_height)
ymax.append(float(y + height) / image_height)
is_crowd.append(object_annotations['iscrowd'])
category_id = int(object_annotations['category_id'])
category_ids.append(category_id)
try:
category_names.append(category_index[category_id]['name'].encode('utf8'))
except:
print(object_annotations)
print(category_id)
print(category_index)
print(category_index[category_id])
area.append(object_annotations['area'])
if include_masks:
# print(object_annotations['segmentation'])
# print(type(object_annotations['segmentation']))
run_len_encoding = mask.frPyObjects(object_annotations['segmentation'], image_height, image_width)
binary_mask = mask.decode(run_len_encoding)
if not object_annotations['iscrowd']:
binary_mask = np.amax(binary_mask, axis=2)
pil_image = PIL.Image.fromarray(binary_mask)
output_io = io.BytesIO()
pil_image.save(output_io, format='PNG')
encoded_mask_png.append(output_io.getvalue())
captions = []
# for caption_annotation in caption_annotations:
# captions.append(caption_annotation['caption'].encode('utf8'))
feature_dict = {
'image/height':
dataset_util.int64_feature(image_height),
'image/width':
dataset_util.int64_feature(image_width),
'image/filename':
dataset_util.bytes_feature(filename.encode('utf8')),
'image/source_id':
dataset_util.bytes_feature(str(image_id).encode('utf8')),
'image/key/sha256':
dataset_util.bytes_feature(key.encode('utf8')),
'image/encoded':
dataset_util.bytes_feature(encoded_jpg),
'image/caption':
dataset_util.bytes_list_feature(captions),
'image/format':
dataset_util.bytes_feature('jpeg'.encode('utf8')),
'image/object/bbox/xmin':
dataset_util.float_list_feature(xmin),
'image/object/bbox/xmax':
dataset_util.float_list_feature(xmax),
'image/object/bbox/ymin':
dataset_util.float_list_feature(ymin),
'image/object/bbox/ymax':
dataset_util.float_list_feature(ymax),
'image/object/class/text':
dataset_util.bytes_list_feature(category_names),
'image/object/class/label':
dataset_util.int64_list_feature(category_ids),
'image/object/is_crowd':
dataset_util.int64_list_feature(is_crowd),
'image/object/area':
dataset_util.float_list_feature(area),
}
if include_masks:
feature_dict['image/object/mask'] = (
dataset_util.bytes_list_feature(encoded_mask_png))
example = tf.train.Example(features=tf.train.Features(feature=feature_dict))
return key, example, num_annotations_skipped
def _pool_create_tf_example(args):
return create_tf_example(*args)
def _load_object_annotations(object_annotations_file):
with tf.io.gfile.GFile(object_annotations_file, 'r') as fid:
obj_annotations = json.load(fid)
images = obj_annotations['images']
category_index = label_map_util.create_category_index(
obj_annotations['categories'])
img_to_obj_annotation = collections.defaultdict(list)
tf.compat.v1.logging.info('Building bounding box index.')
for annotation in obj_annotations['annotations']:
image_id = annotation['image_id']
img_to_obj_annotation[image_id].append(annotation)
missing_annotation_count = 0
for image in images:
image_id = image['id']
if image_id not in img_to_obj_annotation:
missing_annotation_count += 1
tf.compat.v1.logging.info('%d images are missing bboxes.', missing_annotation_count)
return images, img_to_obj_annotation, category_index
def _load_caption_annotations(caption_annotations_file):
with tf.io.gfile.GFile(caption_annotations_file, 'r') as fid:
caption_annotations = json.load(fid)
img_to_caption_annotation = collections.defaultdict(list)
tf.compat.v1.logging.info('Building caption index.')
for annotation in caption_annotations['annotations']:
image_id = annotation['image_id']
img_to_caption_annotation[image_id].append(annotation)
missing_annotation_count = 0
images = caption_annotations['images']
for image in images:
image_id = image['id']
if image_id not in img_to_caption_annotation:
missing_annotation_count += 1
tf.compat.v1.logging.info('%d images are missing captions.', missing_annotation_count)
return img_to_caption_annotation
def _create_tf_record_from_coco_annotations(
object_annotations_file,
caption_annotations_file,
image_dir, output_path, include_masks, num_shards):
"""Loads COCO annotation json files and converts to tf.Record format.
Args:
object_annotations_file: JSON file containing bounding box annotations.
caption_annotations_file: JSON file containing caption annotations.
image_dir: Directory containing the image files.
output_path: Path to output tf.Record file.
include_masks: Whether to include instance segmentations masks
(PNG encoded) in the result. default: False.
num_shards: Number of output files to create.
"""
tf.compat.v1.logging.info('writing to output path: %s', output_path)
writers = [
tf.io.TFRecordWriter(output_path + '-%05d-of-%05d.tfrecord' %
(i, num_shards)) for i in range(num_shards)
]
images, img_to_obj_annotation, category_index = (
_load_object_annotations(object_annotations_file))
# img_to_caption_annotation = (_load_caption_annotations(caption_annotations_file))
img_to_caption_annotation = ()
pool = multiprocessing.Pool()
total_num_annotations_skipped = 0
for idx, (_, tf_example, num_annotations_skipped) in enumerate(
pool.imap(_pool_create_tf_example,
[(image,
img_to_obj_annotation[image['id']],
# img_to_caption_annotation[image['id']],
image_dir,
category_index,
include_masks)
for image in images])):
if idx % 100 == 0:
tf.compat.v1.logging.info('On image %d of %d', idx, len(images))
total_num_annotations_skipped += num_annotations_skipped
writers[idx % num_shards].write(tf_example.SerializeToString())
pool.close()
pool.join()
for writer in writers:
writer.close()
tf.compat.v1.logging.info('Finished writing, skipped %d annotations.',
total_num_annotations_skipped)
def main(_):
assert FLAGS.train_image_dir, '`train_image_dir` missing.'
assert FLAGS.val_image_dir, '`val_image_dir` missing.'
assert FLAGS.test_image_dir, '`test_image_dir` missing.'
if not tf.io.gfile.isdir(FLAGS.output_dir):
tf.io.gfile.makedirs(FLAGS.output_dir)
train_output_path = os.path.join(FLAGS.output_dir, 'train')
val_output_path = os.path.join(FLAGS.output_dir, 'val')
testdev_output_path = os.path.join(FLAGS.output_dir, 'test-dev')
_create_tf_record_from_coco_annotations(
FLAGS.train_object_annotations_file,
FLAGS.train_caption_annotations_file,
FLAGS.train_image_dir,
train_output_path,
FLAGS.include_masks,
num_shards=70)
_create_tf_record_from_coco_annotations(
FLAGS.val_object_annotations_file,
FLAGS.val_caption_annotations_file,
FLAGS.val_image_dir,
val_output_path,
FLAGS.include_masks,
num_shards=9)
if __name__ == '__main__':
tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO)
app.run(main)
• Problem Description
Training fails with the following exception:
ValueError: Total size of new array must be unchanged for box_head/class-predict/kernel lh_shape: [(1024, 91)], rh_shape: [(1024, 6)]
• Full error log
For multi-GPU, change --gpus based on your machine.
Using TensorFlow backend.
WARNING:tensorflow:Deprecation warnings have been disabled. Set TF_ENABLE_DEPRECATION_WARNINGS=1 to re-enable them.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/__init__.py:117: The name tf.global_variables is deprecated. Please use tf.compat.v1.global_variables instead.
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/horovod/tensorflow/__init__.py:143: The name tf.get_default_graph is deprecated. Please use tf.compat.v1.get_default_graph instead.
Using TensorFlow backend.
[MaskRCNN] INFO : Loading weights from /workspace/tlt-experiments/maskrcnn_copecPureoPtz/experiment_dir_unpruned/model.step-0.tlt
[MaskRCNN] INFO : Loading weights from /workspace/tlt-experiments/maskrcnn_copecPureoPtz/experiment_dir_unpruned/model.step-0.tlt
[MaskRCNN] INFO : Create EncryptCheckpointSaverHook.
[MaskRCNN] INFO : =================================
[MaskRCNN] INFO : Start training cycle 01
[MaskRCNN] INFO : =================================
WARNING:tensorflow:Entity <function InputReader.__call__.<locals>._prefetch_dataset at 0x7fe447c22268> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <function InputReader.__call__.<locals>._prefetch_dataset at 0x7fe447c22268>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:From /usr/local/lib/python3.6/dist-packages/tensorflow_core/python/autograph/converters/directives.py:119: The name tf.set_random_seed is deprecated. Please use tf.compat.v1.set_random_seed instead.
WARNING:tensorflow:Entity <function dataset_parser at 0x7fe42400b9d8> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <function dataset_parser at 0x7fe42400b9d8>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:The operation `tf.image.convert_image_dtype` will be skipped since the input and output dtypes are identical.
WARNING:tensorflow:The operation `tf.image.convert_image_dtype` will be skipped since the input and output dtypes are identical.
WARNING:tensorflow:The operation `tf.image.convert_image_dtype` will be skipped since the input and output dtypes are identical.
WARNING:tensorflow:The operation `tf.image.convert_image_dtype` will be skipped since the input and output dtypes are identical.
WARNING:tensorflow:Entity <bound method FPNNetwork.call of <iva.mask_rcnn.models.fpn.FPNNetwork object at 0x7fe3d0e5d0b8>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method FPNNetwork.call of <iva.mask_rcnn.models.fpn.FPNNetwork object at 0x7fe3d0e5d0b8>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method RPN_Head_Model.call of <iva.mask_rcnn.models.heads.RPN_Head_Model object at 0x7fe3d0dcde10>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
[MaskRCNN] INFO : [ROI OPs] Using Batched NMS... Scope: multilevel_propose_rois/level_2/
[MaskRCNN] INFO : [ROI OPs] Using Batched NMS... Scope: multilevel_propose_rois/level_3/
[MaskRCNN] INFO : [ROI OPs] Using Batched NMS... Scope: multilevel_propose_rois/level_4/
[MaskRCNN] INFO : [ROI OPs] Using Batched NMS... Scope: multilevel_propose_rois/level_5/
[MaskRCNN] INFO : [ROI OPs] Using Batched NMS... Scope: multilevel_propose_rois/level_6/
WARNING:tensorflow:Entity <bound method Box_Head_Model.call of <iva.mask_rcnn.models.heads.Box_Head_Model object at 0x7fe3d0ba4f98>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method Box_Head_Model.call of <iva.mask_rcnn.models.heads.Box_Head_Model object at 0x7fe3d0ba4f98>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
WARNING:tensorflow:Entity <bound method Mask_Head_Model.call of <iva.mask_rcnn.models.heads.Mask_Head_Model object at 0x7fe3d09779b0>> could not be transformed and will be executed as-is. Please report this to the AutoGraph team. When filing the bug, set the verbosity to 10 (on Linux, `export AUTOGRAPH_VERBOSITY=10`) and attach the full output. Cause: Unable to locate the source code of <bound method Mask_Head_Model.call of <iva.mask_rcnn.models.heads.Mask_Head_Model object at 0x7fe3d09779b0>>. Note that functions defined in certain environments, like the interactive Python shell do not expose their source code. If that is the case, you should to define them in a .py source file. If you are certain the code is graph-compatible, wrap the call using @tf.autograph.do_not_convert. Original error: could not get source code
4 ops no flops stats due to incomplete shapes.
Parsing Inputs...
[MaskRCNN] INFO : [Training Compute Statistics] 852.6 GFLOPS/image
Traceback (most recent call last):
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/scripts/train.py", line 196, in <module>
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/scripts/train.py", line 192, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/scripts/train.py", line 91, in run_executer
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/executer/distributed_executer.py", line 394, in train_and_eval
File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 370, in train
loss = self._train_model(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1161, in _train_model
return self._train_model_default(input_fn, hooks, saving_listeners)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1195, in _train_model_default
saving_listeners)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_estimator/python/estimator/estimator.py", line 1490, in _train_with_estimator_spec
log_step_count_steps=log_step_count_steps) as mon_sess:
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 584, in MonitoredTrainingSession
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 1014, in __init__
stop_grace_period_secs=stop_grace_period_secs)
File "/usr/local/lib/python3.6/dist-packages/tensorflow_core/python/training/monitored_session.py", line 713, in __init__
h.begin()
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/hooks/pretrained_restore_hook.py", line 209, in begin
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/hooks/pretrained_restore_hook.py", line 113, in assign_from_checkpoint
ValueError: Total size of new array must be unchanged for box_head/class-predict/kernel lh_shape: [(1024, 91)], rh_shape: [(1024, 6)]
[MaskRCNN] ERROR : Job finished with an uncaught exception: `FAILURE`
Traceback (most recent call last):
File "/usr/local/bin/mask_rcnn", line 8, in <module>
sys.exit(main())
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/mask_rcnn/entrypoint/mask_rcnn.py", line 12, in main
File "/home/vpraveen/.cache/dazel/_dazel_vpraveen/216c8b41e526c3295d3b802489ac2034/execroot/ai_infra/bazel-out/k8-fastbuild/bin/magnet/packages/iva/build_wheel.runfiles/ai_infra/iva/common/entrypoint/entrypoint.py", line 296, in launch_job
AssertionError: Process run failed.```
• **Aditional info**
The original example works, is when I add custom data that I have the problem.