research/object_detection/dataset_tools/seq_example_util_test.py from tensorflow/models

research/object_detection/dataset_tools/seq_example_util_test.py
Summary

Maintainability

6 days
Test Coverage

Issues
# Copyright 2020 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Tests for object_detection.utils.seq_example_util."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import numpy as np
import six
import tensorflow.compat.v1 as tf

from object_detection.dataset_tools import seq_example_util
from object_detection.utils import tf_version


class SeqExampleUtilTest(tf.test.TestCase):

  def materialize_tensors(self, list_of_tensors):
    if tf_version.is_tf2():
      return [tensor.numpy() for tensor in list_of_tensors]
    else:
      with self.cached_session() as sess:
        return sess.run(list_of_tensors)

  def test_make_unlabeled_example(self):
    num_frames = 5
    image_height = 100
    image_width = 200
    dataset_name = b'unlabeled_dataset'
    video_id = b'video_000'
    images = tf.cast(tf.random.uniform(
        [num_frames, image_height, image_width, 3],
        maxval=256,
        dtype=tf.int32), dtype=tf.uint8)
    image_source_ids = [str(idx) for idx in range(num_frames)]
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
    encoded_images = self.materialize_tensors(encoded_images_list)
    seq_example = seq_example_util.make_sequence_example(
        dataset_name=dataset_name,
        video_id=video_id,
        encoded_images=encoded_images,
        image_height=image_height,
        image_width=image_width,
        image_format='JPEG',
        image_source_ids=image_source_ids)

    context_feature_dict = seq_example.context.feature
    self.assertEqual(
        dataset_name,
        context_feature_dict['example/dataset_name'].bytes_list.value[0])
    self.assertEqual(
        0,
        context_feature_dict['clip/start/timestamp'].int64_list.value[0])
    self.assertEqual(
        num_frames - 1,
        context_feature_dict['clip/end/timestamp'].int64_list.value[0])
    self.assertEqual(
        num_frames,
        context_feature_dict['clip/frames'].int64_list.value[0])
    self.assertEqual(
        3,
        context_feature_dict['image/channels'].int64_list.value[0])
    self.assertEqual(
        b'JPEG',
        context_feature_dict['image/format'].bytes_list.value[0])
    self.assertEqual(
        image_height,
        context_feature_dict['image/height'].int64_list.value[0])
    self.assertEqual(
        image_width,
        context_feature_dict['image/width'].int64_list.value[0])
    self.assertEqual(
        video_id,
        context_feature_dict['clip/media_id'].bytes_list.value[0])

    seq_feature_dict = seq_example.feature_lists.feature_list
    self.assertLen(
        seq_feature_dict['image/encoded'].feature[:],
        num_frames)
    timestamps = [
        feature.int64_list.value[0] for feature
        in seq_feature_dict['image/timestamp'].feature]
    self.assertAllEqual(list(range(num_frames)), timestamps)
    source_ids = [
        feature.bytes_list.value[0] for feature
        in seq_feature_dict['image/source_id'].feature]
    self.assertAllEqual(
        [six.ensure_binary(str(idx)) for idx in range(num_frames)],
        source_ids)

  def test_make_labeled_example(self):
    num_frames = 3
    image_height = 100
    image_width = 200
    dataset_name = b'unlabeled_dataset'
    video_id = b'video_000'
    labels = [b'dog', b'cat', b'wolf']
    images = tf.cast(tf.random.uniform(
        [num_frames, image_height, image_width, 3],
        maxval=256,
        dtype=tf.int32), dtype=tf.uint8)
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
    encoded_images = self.materialize_tensors(encoded_images_list)
    timestamps = [100000, 110000, 120000]
    is_annotated = [1, 0, 1]
    bboxes = [
        np.array([[0., 0., 0., 0.],
                  [0., 0., 1., 1.]], dtype=np.float32),
        np.zeros([0, 4], dtype=np.float32),
        np.array([], dtype=np.float32)
    ]
    label_strings = [
        np.array(labels),
        np.array([]),
        np.array([])
    ]

    seq_example = seq_example_util.make_sequence_example(
        dataset_name=dataset_name,
        video_id=video_id,
        encoded_images=encoded_images,
        image_height=image_height,
        image_width=image_width,
        timestamps=timestamps,
        is_annotated=is_annotated,
        bboxes=bboxes,
        label_strings=label_strings)

    context_feature_dict = seq_example.context.feature
    self.assertEqual(
        dataset_name,
        context_feature_dict['example/dataset_name'].bytes_list.value[0])
    self.assertEqual(
        timestamps[0],
        context_feature_dict['clip/start/timestamp'].int64_list.value[0])
    self.assertEqual(
        timestamps[-1],
        context_feature_dict['clip/end/timestamp'].int64_list.value[0])
    self.assertEqual(
        num_frames,
        context_feature_dict['clip/frames'].int64_list.value[0])

    seq_feature_dict = seq_example.feature_lists.feature_list
    self.assertLen(
        seq_feature_dict['image/encoded'].feature[:],
        num_frames)
    actual_timestamps = [
        feature.int64_list.value[0] for feature
        in seq_feature_dict['image/timestamp'].feature]
    self.assertAllEqual(timestamps, actual_timestamps)
    # Frame 0.
    self.assertAllEqual(
        is_annotated[0],
        seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 1.],
        seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
        labels,
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])

    # Frame 1.
    self.assertAllEqual(
        is_annotated[1],
        seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
        [],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])

  def test_make_labeled_example_with_context_features(self):
    num_frames = 2
    image_height = 100
    image_width = 200
    dataset_name = b'unlabeled_dataset'
    video_id = b'video_000'
    labels = [b'dog', b'cat']
    images = tf.cast(tf.random.uniform(
        [num_frames, image_height, image_width, 3],
        maxval=256,
        dtype=tf.int32), dtype=tf.uint8)
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
    encoded_images = self.materialize_tensors(encoded_images_list)
    timestamps = [100000, 110000]
    is_annotated = [1, 0]
    bboxes = [
        np.array([[0., 0., 0., 0.],
                  [0., 0., 1., 1.]], dtype=np.float32),
        np.zeros([0, 4], dtype=np.float32)
    ]
    label_strings = [
        np.array(labels),
        np.array([])
    ]
    context_features = [0.0, 0.1, 0.2, 0.3, 0.4, 0.5]
    context_feature_length = [3]
    context_features_image_id_list = [b'im_1', b'im_2']

    seq_example = seq_example_util.make_sequence_example(
        dataset_name=dataset_name,
        video_id=video_id,
        encoded_images=encoded_images,
        image_height=image_height,
        image_width=image_width,
        timestamps=timestamps,
        is_annotated=is_annotated,
        bboxes=bboxes,
        label_strings=label_strings,
        context_features=context_features,
        context_feature_length=context_feature_length,
        context_features_image_id_list=context_features_image_id_list)

    context_feature_dict = seq_example.context.feature
    self.assertEqual(
        dataset_name,
        context_feature_dict['example/dataset_name'].bytes_list.value[0])
    self.assertEqual(
        timestamps[0],
        context_feature_dict['clip/start/timestamp'].int64_list.value[0])
    self.assertEqual(
        timestamps[-1],
        context_feature_dict['clip/end/timestamp'].int64_list.value[0])
    self.assertEqual(
        num_frames,
        context_feature_dict['clip/frames'].int64_list.value[0])

    self.assertAllClose(
        context_features,
        context_feature_dict['image/context_features'].float_list.value[:])
    self.assertEqual(
        context_feature_length[0],
        context_feature_dict[
            'image/context_feature_length'].int64_list.value[0])
    self.assertEqual(
        context_features_image_id_list,
        context_feature_dict[
            'image/context_features_image_id_list'].bytes_list.value[:])

    seq_feature_dict = seq_example.feature_lists.feature_list
    self.assertLen(
        seq_feature_dict['image/encoded'].feature[:],
        num_frames)
    actual_timestamps = [
        feature.int64_list.value[0] for feature
        in seq_feature_dict['image/timestamp'].feature]
    self.assertAllEqual(timestamps, actual_timestamps)
    # Frame 0.
    self.assertAllEqual(
        is_annotated[0],
        seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 1.],
        seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
        labels,
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])

    # Frame 1.
    self.assertAllEqual(
        is_annotated[1],
        seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
        [],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])

  def test_make_labeled_example_with_predictions(self):
    num_frames = 2
    image_height = 100
    image_width = 200
    dataset_name = b'unlabeled_dataset'
    video_id = b'video_000'
    images = tf.cast(tf.random.uniform(
        [num_frames, image_height, image_width, 3],
        maxval=256,
        dtype=tf.int32), dtype=tf.uint8)
    images_list = tf.unstack(images, axis=0)
    encoded_images_list = [tf.io.encode_jpeg(image) for image in images_list]
    encoded_images = self.materialize_tensors(encoded_images_list)
    bboxes = [
        np.array([[0., 0., 0.75, 0.75],
                  [0., 0., 1., 1.]], dtype=np.float32),
        np.array([[0., 0.25, 0.5, 0.75]], dtype=np.float32)
    ]
    label_strings = [
        np.array(['cat', 'frog']),
        np.array(['cat'])
    ]
    detection_bboxes = [
        np.array([[0., 0., 0.75, 0.75]], dtype=np.float32),
        np.zeros([0, 4], dtype=np.float32)
    ]
    detection_classes = [
        np.array([5], dtype=np.int64),
        np.array([], dtype=np.int64)
    ]
    detection_scores = [
        np.array([0.9], dtype=np.float32),
        np.array([], dtype=np.float32)
    ]

    seq_example = seq_example_util.make_sequence_example(
        dataset_name=dataset_name,
        video_id=video_id,
        encoded_images=encoded_images,
        image_height=image_height,
        image_width=image_width,
        bboxes=bboxes,
        label_strings=label_strings,
        detection_bboxes=detection_bboxes,
        detection_classes=detection_classes,
        detection_scores=detection_scores)

    context_feature_dict = seq_example.context.feature
    self.assertEqual(
        dataset_name,
        context_feature_dict['example/dataset_name'].bytes_list.value[0])
    self.assertEqual(
        0,
        context_feature_dict['clip/start/timestamp'].int64_list.value[0])
    self.assertEqual(
        1,
        context_feature_dict['clip/end/timestamp'].int64_list.value[0])
    self.assertEqual(
        num_frames,
        context_feature_dict['clip/frames'].int64_list.value[0])

    seq_feature_dict = seq_example.feature_lists.feature_list
    self.assertLen(
        seq_feature_dict['image/encoded'].feature[:],
        num_frames)
    actual_timestamps = [
        feature.int64_list.value[0] for feature
        in seq_feature_dict['image/timestamp'].feature]
    self.assertAllEqual([0, 1], actual_timestamps)
    # Frame 0.
    self.assertAllEqual(
        1,
        seq_feature_dict['region/is_annotated'].feature[0].int64_list.value[0])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/ymin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0., 0.],
        seq_feature_dict['region/bbox/xmin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0.75, 1.],
        seq_feature_dict['region/bbox/ymax'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0.75, 1.],
        seq_feature_dict['region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
        [b'cat', b'frog'],
        seq_feature_dict['region/label/string'].feature[0].bytes_list.value[:])
    self.assertAllClose(
        [0.],
        seq_feature_dict[
            'predicted/region/bbox/ymin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0.],
        seq_feature_dict[
            'predicted/region/bbox/xmin'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0.75],
        seq_feature_dict[
            'predicted/region/bbox/ymax'].feature[0].float_list.value[:])
    self.assertAllClose(
        [0.75],
        seq_feature_dict[
            'predicted/region/bbox/xmax'].feature[0].float_list.value[:])
    self.assertAllEqual(
        [5],
        seq_feature_dict[
            'predicted/region/label/index'].feature[0].int64_list.value[:])
    self.assertAllClose(
        [0.9],
        seq_feature_dict[
            'predicted/region/label/confidence'].feature[0].float_list.value[:])

    # Frame 1.
    self.assertAllEqual(
        1,
        seq_feature_dict['region/is_annotated'].feature[1].int64_list.value[0])
    self.assertAllClose(
        [0.0],
        seq_feature_dict['region/bbox/ymin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [0.25],
        seq_feature_dict['region/bbox/xmin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [0.5],
        seq_feature_dict['region/bbox/ymax'].feature[1].float_list.value[:])
    self.assertAllClose(
        [0.75],
        seq_feature_dict['region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
        [b'cat'],
        seq_feature_dict['region/label/string'].feature[1].bytes_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict[
            'predicted/region/bbox/ymin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict[
            'predicted/region/bbox/xmin'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict[
            'predicted/region/bbox/ymax'].feature[1].float_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict[
            'predicted/region/bbox/xmax'].feature[1].float_list.value[:])
    self.assertAllEqual(
        [],
        seq_feature_dict[
            'predicted/region/label/index'].feature[1].int64_list.value[:])
    self.assertAllClose(
        [],
        seq_feature_dict[
            'predicted/region/label/confidence'].feature[1].float_list.value[:])


if __name__ == '__main__':
  tf.test.main()