official/projects/maskconver/modeling/maskconver_model.py from tensorflow/models

official/projects/maskconver/modeling/maskconver_model.py
Summary

Maintainability

3 hrs
Test Coverage

Issues
# Copyright 2024 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

"""Panoptic Segmentation model."""

from typing import Mapping, Union, Any, Dict, Optional, List

import tensorflow as tf, tf_keras

layers = tf_keras.layers


@tf_keras.utils.register_keras_serializable(package='Vision')
class MaskConverModel(tf_keras.Model):
  """A MaskConver class model."""

  def __init__(
      self,
      backbone: tf_keras.Model,
      decoder: tf_keras.Model,
      # panoptic_fpn_fusion: tf_keras.layers.Layer,
      class_head: tf_keras.layers.Layer,
      embedding_head: tf_keras.layers.Layer,
      per_pixel_embeddings_head: tf_keras.layers.Layer,
      mlp_embedding_head: tf_keras.layers.Layer,
      proposal_generator: tf_keras.layers.Layer,
      panoptic_generator: Optional[tf_keras.layers.Layer] = None,
      level: int = 3,
      padded_output_size: Optional[List[int]] = None,
      score_threshold: float = 0.1,
      l2_regularizer: Optional[Any] = None,
      embedding_size: int = 256,
      num_classes: int = 201,
      **kwargs):
    """MaskConver initialization function.

    Args:
      backbone: a backbone network.
      decoder: a decoder network. E.g. FPN.
      #  panoptic_fpn_fusion: a panoptic_fpn_fusion layer.
      class_head: class head.
      embedding_head: embedding head.
      per_pixel_embeddings_head: per_pixel_embeddings_head.
      mlp_embedding_head: mlp embedding head.
      proposal_generator: proposal_generator.
      panoptic_generator: panoptic generator.
      level: int.
      padded_output_size: padded output size. GPU or CPU only.
      score_threshold: score threshold, used for filtering.
      l2_regularizer: l2 regularizer.
      embedding_size: `int`, embedding size.
      num_classes: `int`, the total number of classes.
      **kwargs: keyword arguments to be passed.
    """
    super(MaskConverModel, self).__init__(**kwargs)
    self._config_dict = {
        'backbone': backbone,
        'decoder': decoder,
        'class_head': class_head,
        'embedding_head': embedding_head,
        'mlp_embedding_head': mlp_embedding_head,
        'proposal_generator': proposal_generator,
        'level': level,
        'padded_output_size': padded_output_size,
        'score_threshold': score_threshold,
        'per_pixel_embeddings_head': per_pixel_embeddings_head,
    }
    self.backbone = backbone
    self.decoder = decoder
    self.class_head = class_head
    self.embedding_head = embedding_head
    self.embedding_size = embedding_size
    self.mlp = mlp_embedding_head
    self.proposal_generator = proposal_generator
    self.panoptic_generator = panoptic_generator
    self.num_classes = num_classes

    self.level = level
    self.padded_output_size = padded_output_size
    self.score_threshold = score_threshold
    self.per_pixel_embeddings_head = per_pixel_embeddings_head
    self.class_embeddings = tf_keras.layers.Embedding(
        num_classes,
        self.embedding_size,
        embeddings_regularizer=l2_regularizer)

  def call(self, inputs: tf.Tensor,
           image_info: Optional[tf.Tensor] = None,
           box_indices: Optional[tf.Tensor] = None,
           classes: Optional[tf.Tensor] = None,
           training: bool = None
           ) -> Dict[str, Optional[Any]]:
    backbone_features = self.backbone(inputs)

    if self.decoder:
      decoder_features = self.decoder(backbone_features)
    else:
      decoder_features = backbone_features

    class_heatmaps = self.class_head((backbone_features, decoder_features),
                                     training=training)
    dense_mask_embeddings = self.embedding_head(
        (backbone_features, decoder_features), training=training)
    per_pixel_embeddings = self.per_pixel_embeddings_head(
        (backbone_features, decoder_features), training=training)

    if not training:
      proposals = self.proposal_generator(class_heatmaps)
      classes = proposals['classes']
      confidence = proposals['confidence']
      box_indices = proposals['embedding_indices']
      _ = proposals['num_proposals']

    mask_embeddings = tf.gather_nd(
        dense_mask_embeddings, box_indices, batch_dims=1)
    class_embeddings = self.class_embeddings(tf.maximum(classes, 0))
    mask_embeddings = mask_embeddings * tf.cast(
        class_embeddings, mask_embeddings.dtype)
    mask_embeddings = self.mlp(mask_embeddings)

    mask_proposal_logits = tf.einsum('bqc,bhwc->bhwq',
                                     mask_embeddings,
                                     per_pixel_embeddings)
    mask_proposal_logits = tf.cast(mask_proposal_logits, tf.float32)

    if not training:
      outputs = {'classes': classes,
                 'confidence': confidence,
                 'mask_proposal_logits': mask_proposal_logits,
                 'class_heatmaps': class_heatmaps}
      if self.panoptic_generator is not None:
        panoptic_outputs = self.panoptic_generator(
            outputs, images_info=image_info)
        outputs.update({'panoptic_outputs': panoptic_outputs})
      else:
        outputs['mask_proposal_logits'] = tf.image.resize(
            mask_proposal_logits, self.padded_output_size, 'bilinear')
    else:
      outputs = {'class_heatmaps': class_heatmaps,
                 'mask_proposal_logits': mask_proposal_logits}
    return outputs

  @property
  def checkpoint_items(
      self) -> Mapping[str, Union[tf_keras.Model, tf_keras.layers.Layer]]:
    """Returns a dictionary of items to be additionally checkpointed."""
    items = dict(backbone=self.backbone,
                 class_head=self.class_head,
                 embedding_head=self.embedding_head,
                 per_pixel_embeddings_head=self.per_pixel_embeddings_head)
    if self.decoder is not None:
      items.update(decoder=self.decoder)
    return items

  def get_config(self) -> Mapping[str, Any]:
    return self._config_dict

  @classmethod
  def from_config(cls, config, custom_objects=None):
    return cls(**config)