research/object_detection/metrics/oid_challenge_evaluation_utils.py from tensorflow/models

research/object_detection/metrics/oid_challenge_evaluation_utils.py
Summary

Maintainability

2 hrs
Test Coverage

Issues
# Copyright 2018 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
r"""Converts data from CSV to the OpenImagesDetectionChallengeEvaluator format."""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import base64
import zlib

import numpy as np
import pandas as pd
from pycocotools import mask as coco_mask

from object_detection.core import standard_fields


def _to_normalized_box(mask_np):
  """Decodes binary segmentation masks into np.arrays and boxes.

  Args:
    mask_np: np.ndarray of size NxWxH.

  Returns:
    a np.ndarray of the size Nx4, each row containing normalized coordinates
    [YMin, XMin, YMax, XMax] of a box computed of axis parallel enclosing box of
    a mask.
  """
  coord1, coord2 = np.nonzero(mask_np)
  if coord1.size > 0:
    ymin = float(min(coord1)) / mask_np.shape[0]
    ymax = float(max(coord1) + 1) / mask_np.shape[0]
    xmin = float(min(coord2)) / mask_np.shape[1]
    xmax = float((max(coord2) + 1)) / mask_np.shape[1]

    return np.array([ymin, xmin, ymax, xmax])
  else:
    return np.array([0.0, 0.0, 0.0, 0.0])


def _decode_raw_data_into_masks_and_boxes(segments, image_widths,
                                          image_heights):
  """Decods binary segmentation masks into np.arrays and boxes.

  Args:
    segments: pandas Series object containing either None entries, or strings
      with base64, zlib compressed, COCO RLE-encoded binary masks. All masks are
      expected to be the same size.
    image_widths: pandas Series of mask widths.
    image_heights: pandas Series of mask heights.

  Returns:
    a np.ndarray of the size NxWxH, where W and H is determined from the encoded
    masks; for the None values, zero arrays of size WxH are created. If input
    contains only None values, W=1, H=1.
  """
  segment_masks = []
  segment_boxes = []
  ind = segments.first_valid_index()
  if ind is not None:
    size = [int(image_heights[ind]), int(image_widths[ind])]
  else:
    # It does not matter which size we pick since no masks will ever be
    # evaluated.
    return np.zeros((segments.shape[0], 1, 1), dtype=np.uint8), np.zeros(
        (segments.shape[0], 4), dtype=np.float32)

  for segment, im_width, im_height in zip(segments, image_widths,
                                          image_heights):
    if pd.isnull(segment):
      segment_masks.append(np.zeros([1, size[0], size[1]], dtype=np.uint8))
      segment_boxes.append(np.expand_dims(np.array([0.0, 0.0, 0.0, 0.0]), 0))
    else:
      compressed_mask = base64.b64decode(segment)
      rle_encoded_mask = zlib.decompress(compressed_mask)
      decoding_dict = {
          'size': [im_height, im_width],
          'counts': rle_encoded_mask
      }
      mask_tensor = coco_mask.decode(decoding_dict)

      segment_masks.append(np.expand_dims(mask_tensor, 0))
      segment_boxes.append(np.expand_dims(_to_normalized_box(mask_tensor), 0))

  return np.concatenate(
      segment_masks, axis=0), np.concatenate(
          segment_boxes, axis=0)


def merge_boxes_and_masks(box_data, mask_data):
  return pd.merge(
      box_data,
      mask_data,
      how='outer',
      on=['LabelName', 'ImageID', 'XMin', 'XMax', 'YMin', 'YMax', 'IsGroupOf'])


def build_groundtruth_dictionary(data, class_label_map):
  """Builds a groundtruth dictionary from groundtruth data in CSV file.

  Args:
    data: Pandas DataFrame with the groundtruth data for a single image.
    class_label_map: Class labelmap from string label name to an integer.

  Returns:
    A dictionary with keys suitable for passing to
    OpenImagesDetectionChallengeEvaluator.add_single_ground_truth_image_info:
        standard_fields.InputDataFields.groundtruth_boxes: float32 numpy array
          of shape [num_boxes, 4] containing `num_boxes` groundtruth boxes of
          the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
        standard_fields.InputDataFields.groundtruth_classes: integer numpy array
          of shape [num_boxes] containing 1-indexed groundtruth classes for the
          boxes.
        standard_fields.InputDataFields.verified_labels: integer 1D numpy array
          containing all classes for which labels are verified.
        standard_fields.InputDataFields.groundtruth_group_of: Optional length
          M numpy boolean array denoting whether a groundtruth box contains a
          group of instances.
  """
  data_location = data[data.XMin.notnull()]
  data_labels = data[data.ConfidenceImageLabel.notnull()]

  dictionary = {
      standard_fields.InputDataFields.groundtruth_boxes:
          data_location[['YMin', 'XMin', 'YMax',
                         'XMax']].to_numpy().astype(float),
      standard_fields.InputDataFields.groundtruth_classes:
          data_location['LabelName'].map(lambda x: class_label_map[x]
                                        ).to_numpy(),
      standard_fields.InputDataFields.groundtruth_group_of:
          data_location['IsGroupOf'].to_numpy().astype(int),
      standard_fields.InputDataFields.groundtruth_image_classes:
          data_labels['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
  }

  if 'Mask' in data_location:
    segments, _ = _decode_raw_data_into_masks_and_boxes(
        data_location['Mask'], data_location['ImageWidth'],
        data_location['ImageHeight'])
    dictionary[
        standard_fields.InputDataFields.groundtruth_instance_masks] = segments

  return dictionary


def build_predictions_dictionary(data, class_label_map):
  """Builds a predictions dictionary from predictions data in CSV file.

  Args:
    data: Pandas DataFrame with the predictions data for a single image.
    class_label_map: Class labelmap from string label name to an integer.

  Returns:
    Dictionary with keys suitable for passing to
    OpenImagesDetectionChallengeEvaluator.add_single_detected_image_info:
        standard_fields.DetectionResultFields.detection_boxes: float32 numpy
          array of shape [num_boxes, 4] containing `num_boxes` detection boxes
          of the format [ymin, xmin, ymax, xmax] in absolute image coordinates.
        standard_fields.DetectionResultFields.detection_scores: float32 numpy
          array of shape [num_boxes] containing detection scores for the boxes.
        standard_fields.DetectionResultFields.detection_classes: integer numpy
          array of shape [num_boxes] containing 1-indexed detection classes for
          the boxes.

  """
  dictionary = {
      standard_fields.DetectionResultFields.detection_classes:
          data['LabelName'].map(lambda x: class_label_map[x]).to_numpy(),
      standard_fields.DetectionResultFields.detection_scores:
          data['Score'].to_numpy().astype(float)
  }

  if 'Mask' in data:
    segments, boxes = _decode_raw_data_into_masks_and_boxes(
        data['Mask'], data['ImageWidth'], data['ImageHeight'])
    dictionary[standard_fields.DetectionResultFields.detection_masks] = segments
    dictionary[standard_fields.DetectionResultFields.detection_boxes] = boxes
  else:
    dictionary[standard_fields.DetectionResultFields.detection_boxes] = data[[
        'YMin', 'XMin', 'YMax', 'XMax'
    ]].to_numpy().astype(float)

  return dictionary