official/vision/evaluation/panoptic_quality_evaluator.py from tensorflow/models

official/vision/evaluation/panoptic_quality_evaluator.py
Summary

Maintainability

2 days
Test Coverage

Issues
# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""The panoptic quality evaluator.

The following snippet demonstrates the use of interfaces:

  evaluator = PanopticQualityEvaluator(...)
  for _ in range(num_evals):
    for _ in range(num_batches_per_eval):
      predictions, groundtruth = predictor.predict(...)  # pop a batch.
      evaluator.update_state(groundtruths, predictions)
    evaluator.result()  # finish one full eval and reset states.

See also: https://github.com/cocodataset/cocoapi/
"""

import numpy as np
import tensorflow as tf, tf_keras

from official.vision.evaluation import panoptic_quality


def _crop_padding(mask, image_info):
  """Crops padded masks to match original image shape.

  Args:
    mask: a padded mask tensor.
    image_info: a tensor that holds information about original and preprocessed
      images.
  Returns:
    cropped and padded masks: tf.Tensor
  """
  image_shape = tf.cast(image_info[0, :], tf.int32)
  mask = tf.image.crop_to_bounding_box(
      tf.expand_dims(mask, axis=-1), 0, 0,
      image_shape[0], image_shape[1])
  return tf.expand_dims(mask[:, :, 0], axis=0)


class PanopticQualityEvaluator:
  """Panoptic Quality metric class."""

  def __init__(self, num_categories, ignored_label, max_instances_per_category,
               offset, is_thing=None, rescale_predictions=False):
    """Constructs Panoptic Quality evaluation class.

    The class provides the interface to Panoptic Quality metrics_fn.

    Args:
      num_categories: The number of segmentation categories (or "classes" in the
        dataset).
      ignored_label: A category id that is ignored in evaluation, e.g. the void
        label as defined in COCO panoptic segmentation dataset.
      max_instances_per_category: The maximum number of instances for each
        category. Used in ensuring unique instance labels.
      offset: The maximum number of unique labels. This is used, by multiplying
        the ground-truth labels, to generate unique ids for individual regions
        of overlap between ground-truth and predicted segments.
      is_thing: A boolean array of length `num_categories`. The entry
        `is_thing[category_id]` is True iff that category is a "thing" category
        instead of "stuff." Default to `None`, and it means categories are not
        classified into these two categories.
      rescale_predictions: `bool`, whether to scale back prediction to original
        image sizes. If True, groundtruths['image_info'] is used to rescale
        predictions.
    """
    self._pq_metric_module = panoptic_quality.PanopticQuality(
        num_categories, ignored_label, max_instances_per_category, offset)
    self._is_thing = is_thing
    self._rescale_predictions = rescale_predictions
    self._required_prediction_fields = ['category_mask', 'instance_mask']
    self._required_groundtruth_fields = ['category_mask', 'instance_mask']
    self.reset_states()

  @property
  def name(self):
    return 'panoptic_quality'

  def reset_states(self):
    """Resets internal states for a fresh run."""
    self._pq_metric_module.reset()

  def result(self):
    """Evaluates detection results, and reset_states."""
    results = self._pq_metric_module.result(self._is_thing)
    self.reset_states()
    return results

  def _convert_to_numpy(self, groundtruths, predictions):
    """Converts tesnors to numpy arrays."""
    if groundtruths:
      labels = tf.nest.map_structure(lambda x: x.numpy(), groundtruths)
      numpy_groundtruths = {}
      for key, val in labels.items():
        if isinstance(val, tuple):
          val = np.concatenate(val)
        numpy_groundtruths[key] = val
    else:
      numpy_groundtruths = groundtruths

    if predictions:
      outputs = tf.nest.map_structure(lambda x: x.numpy(), predictions)
      numpy_predictions = {}
      for key, val in outputs.items():
        if isinstance(val, tuple):
          val = np.concatenate(val)
        numpy_predictions[key] = val
    else:
      numpy_predictions = predictions

    return numpy_groundtruths, numpy_predictions

  def update_state(self, groundtruths, predictions):
    """Update and aggregate detection results and ground-truth data.

    Args:
      groundtruths: a dictionary of Tensors including the fields below. See also
        different parsers under `../dataloader` for more details.
        Required fields:
          - category_mask: a numpy array of uint16 of shape [batch_size, H, W].
          - instance_mask: a numpy array of uint16 of shape [batch_size, H, W].
          - image_info: [batch, 4, 2], a tensor that holds information about
          original and preprocessed images. Each entry is in the format of
          [[original_height, original_width], [input_height, input_width],
          [y_scale, x_scale], [y_offset, x_offset]], where [desired_height,
          desired_width] is the actual scaled image size, and [y_scale, x_scale]
          is the scaling factor, which is the ratio of scaled dimension /
          original dimension.
      predictions: a dictionary of tensors including the fields below. See
        different parsers under `../dataloader` for more details.
        Required fields:
          - category_mask: a numpy array of uint16 of shape [batch_size, H, W].
          - instance_mask: a numpy array of uint16 of shape [batch_size, H, W].

    Raises:
      ValueError: if the required prediction or ground-truth fields are not
        present in the incoming `predictions` or `groundtruths`.
    """
    groundtruths, predictions = self._convert_to_numpy(groundtruths,
                                                       predictions)
    for k in self._required_prediction_fields:
      if k not in predictions:
        raise ValueError(
            'Missing the required key `{}` in predictions!'.format(k))

    for k in self._required_groundtruth_fields:
      if k not in groundtruths:
        raise ValueError(
            'Missing the required key `{}` in groundtruths!'.format(k))

    if self._rescale_predictions:
      for idx in range(len(groundtruths['category_mask'])):
        image_info = groundtruths['image_info'][idx]
        groundtruths_ = {
            'category_mask':
                _crop_padding(groundtruths['category_mask'][idx], image_info),
            'instance_mask':
                _crop_padding(groundtruths['instance_mask'][idx], image_info),
            }
        predictions_ = {
            'category_mask':
                _crop_padding(predictions['category_mask'][idx], image_info),
            'instance_mask':
                _crop_padding(predictions['instance_mask'][idx], image_info),
            }
        groundtruths_, predictions_ = self._convert_to_numpy(
            groundtruths_, predictions_)

        self._pq_metric_module.compare_and_accumulate(
            groundtruths_, predictions_)
    else:
      for idx in range(len(groundtruths['category_mask'])):
        groundtruths_ = {
            'category_mask': groundtruths['category_mask'][idx],
            'instance_mask': groundtruths['instance_mask'][idx]
        }
        predictions_ = {
            'category_mask': predictions['category_mask'][idx],
            'instance_mask': predictions['instance_mask'][idx]
        }
        self._pq_metric_module.compare_and_accumulate(groundtruths_,
                                                      predictions_)