tensorflow/lite/tools/optimize/debugging/python/debugger.py from tensorflow/tensorflow

tensorflow/lite/tools/optimize/debugging/python/debugger.py
Summary

Maintainability

1 day
Test Coverage

Issues
# Copyright 2021 The TensorFlow Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================
"""Python TF-Lite QuantizationDebugger."""
import collections
import csv
import re
from typing import (Any, Callable, Dict, IO, Iterable, List, Mapping, Optional,
                    Sequence, Tuple)

import numpy as np

from tensorflow.lite.python import convert
from tensorflow.lite.python import interpreter as _interpreter
from tensorflow.lite.python.metrics import metrics as metrics_stub  # type: ignore
from tensorflow.python.util import tf_export


# TODO(b/198099651): move converter implementation out of lite.py
TFLiteConverter = Any  # importing tf.lite creates circular dependency

# Returns metrics based on difference of values for quantized/float ops.
_DEFAULT_LAYER_DEBUG_METRICS = {
    'num_elements': lambda diffs: diffs.size,
    'stddev': np.std,
    'mean_error': np.average,
    'max_abs_error': lambda diffs: np.max(np.abs(diffs)),
    'mean_squared_error': lambda diffs: np.average(diffs**2),
}

_NUMERIC_VERIFY_OP_NAME = 'NumericVerify'


def _get_quant_params(
    tensor_detail: Mapping[str, Any]) -> Optional[Tuple[float, int]]:
  """Returns first scale and zero point from tensor detail, if present."""
  quant_params = tensor_detail['quantization_parameters']
  if not quant_params:
    return None
  if quant_params['scales'] and quant_params['zero_points']:
    return (quant_params['scales'][0], quant_params['zero_points'][0])
  return None


@tf_export.tf_export('lite.experimental.QuantizationDebugOptions')
class QuantizationDebugOptions:
  """Debug options to set up a given QuantizationDebugger."""

  def __init__(self,
               layer_debug_metrics: Optional[Mapping[str,
                                                     Callable[[np.ndarray],
                                                              float]]] = None,
               model_debug_metrics: Optional[Mapping[
                   str, Callable[[Sequence[np.ndarray], Sequence[np.ndarray]],
                                 float]]] = None,
               layer_direct_compare_metrics: Optional[Mapping[str, Callable[
                   [Sequence[np.ndarray], Sequence[np.ndarray], float, int],
                   float]]] = None,
               denylisted_ops: Optional[List[str]] = None,
               denylisted_nodes: Optional[List[str]] = None,
               fully_quantize: bool = False) -> None:
    """Initializes debugger options.

    Args:
      layer_debug_metrics: a dict to specify layer debug functions
        {function_name_str: function} where the function accepts result of
          NumericVerify Op, which is value difference between float and
          dequantized op results. The function returns single scalar value.
      model_debug_metrics: a dict to specify model debug functions
        {function_name_str: function} where the function accepts outputs from
          two models, and returns single scalar value for a metric. (e.g.
          accuracy, IoU)
      layer_direct_compare_metrics: a dict to specify layer debug functions
        {function_name_str: function}. The signature is different from that of
          `layer_debug_metrics`, and this one gets passed (original float value,
          original quantized value, scale, zero point). The function's
          implementation is responsible for correctly dequantize the quantized
          value to compare. Use this one when comparing diff is not enough.
          (Note) quantized value is passed as int8, so cast to int32 is needed.
      denylisted_ops: a list of op names which is expected to be removed from
        quantization.
      denylisted_nodes: a list of op's output tensor names to be removed from
        quantization.
      fully_quantize: Bool indicating whether to fully quantize the model.
        Besides model body, the input/output will be quantized as well.
        Corresponding to mlir_quantize's fully_quantize parameter.

    Raises:
      ValueError: when there are duplicate keys
    """
    self.layer_debug_metrics = layer_debug_metrics
    self.model_debug_metrics = model_debug_metrics
    self.layer_direct_compare_metrics = layer_direct_compare_metrics

    keys = []
    for metrics in [
        layer_debug_metrics, model_debug_metrics, layer_direct_compare_metrics
    ]:
      if metrics is not None:
        keys.extend(metrics.keys())
    if len(keys) != len(set(keys)):
      raise ValueError('Provided metrics have duplicate keys.')

    self.denylisted_ops = denylisted_ops
    self.denylisted_nodes = denylisted_nodes
    self.fully_quantize = fully_quantize


@tf_export.tf_export('lite.experimental.QuantizationDebugger')
class QuantizationDebugger:
  """Debugger for Quantized TensorFlow Lite debug mode models.

  This can run the TensorFlow Lite converted models equipped with debug ops and
  collect debug information. This debugger calculates statistics from
  user-defined post-processing functions as well as default ones.
  """

  def __init__(self,
               quant_debug_model_path: Optional[str] = None,
               quant_debug_model_content: Optional[bytes] = None,
               float_model_path: Optional[str] = None,
               float_model_content: Optional[bytes] = None,
               debug_dataset: Optional[Callable[
                   [], Iterable[Sequence[np.ndarray]]]] = None,
               debug_options: Optional[QuantizationDebugOptions] = None,
               converter: Optional[TFLiteConverter] = None) -> None:
    """Runs the TFLite debugging model with given debug options.

    Args:
      quant_debug_model_path: Path to the quantized debug TFLite model file.
      quant_debug_model_content: Content of the quantized debug TFLite model.
      float_model_path: Path to float TFLite model file.
      float_model_content: Content of the float TFLite model.
      debug_dataset: a factory function that returns dataset generator which is
        used to generate input samples (list of np.ndarray) for the model. The
        generated elements must have same types and shape as inputs to the
        model.
      debug_options: Debug options to debug the given model.
      converter: Optional, use converter instead of quantized model.

    Raises:
      ValueError: If the debugger was unable to be created.

    Attributes:
      layer_statistics: results of error metrics for each NumericVerify op
        results. in {layer_name: {metric_name: metric}} format.
      model_statistics: results of error metrics for difference between float
        and quantized models. in {metric_name: metric} format.
    """
    self._data_gen = debug_dataset
    self._debug_options = debug_options or QuantizationDebugOptions()
    self.converter = None
    self.calibrated_model = None
    self.float_model = None
    self._float_interpreter = None
    if converter is not None:
      if self._debug_options.model_debug_metrics:
        old_optimizations = converter.optimizations
        self.converter = self._set_converter_options_for_float(converter)
        self.float_model = self.converter.convert()
        converter.optimizations = old_optimizations

      self.converter = self._set_converter_options_for_calibration(converter)
      self.calibrated_model = self.converter.convert()
      # Converter should be already set up with all options
      self._init_from_converter(
          self._debug_options,
          self.converter,
          self.calibrated_model,
          float_model=self.float_model)
    else:
      self._quant_interpreter = _interpreter.Interpreter(
          quant_debug_model_path,
          quant_debug_model_content,
          experimental_preserve_all_tensors=(
              self._debug_options.layer_direct_compare_metrics is not None))
      if self._debug_options.model_debug_metrics:
        self._float_interpreter = _interpreter.Interpreter(
            float_model_path, float_model_content)
    self._initialize_stats()

  @property
  def options(self) -> QuantizationDebugOptions:
    return self._debug_options

  @options.setter
  def options(self, options: QuantizationDebugOptions) -> None:
    self._debug_options = options
    if not self.converter or not self.calibrated_model:
      return
    self._init_from_converter(
        self._debug_options,
        self.converter,
        self.calibrated_model,
        float_model=self.float_model)
    self._initialize_stats()

  def _initialize_stats(self):
    """Helper function initializes stats."""
    # TODO(b/177749613) : Fix the dependency on tf.lite._get_ops_details()
    # Following code is needed to get op's name from the output tensor index,
    # since NumericVerify op only provides its quantized input tensor index.
    self._defining_op = dict()
    for op_info in self._quant_interpreter._get_ops_details():  # pylint: disable=protected-access
      self._defining_op.update(
          {tensor_idx: op_info['index'] for tensor_idx in op_info['outputs']})

    self._numeric_verify_tensor_details = None
    self._numeric_verify_op_details = None
    if not self._get_numeric_verify_tensor_details():
      raise ValueError('Please check if the quantized model is in debug mode')

    self._layer_debug_metrics = _DEFAULT_LAYER_DEBUG_METRICS.copy()
    if self._debug_options.layer_debug_metrics:
      self._layer_debug_metrics.update(self._debug_options.layer_debug_metrics)

    self.layer_statistics = None
    self.model_statistics = None

    self._metrics = metrics_stub.TFLiteMetrics()
    self._metrics.increase_counter_debugger_creation()

  def _get_quantized_model(self, is_debug: bool) -> bytes:
    if not self.converter:
      raise ValueError('No converter found, use this function with the '
                       'converter option in the constructor.')

    return convert.mlir_quantize(
        self.calibrated_model,
        disable_per_channel=self.converter._experimental_disable_per_channel,  # pylint: disable=protected-access
        fully_quantize=self._debug_options.fully_quantize,
        enable_numeric_verify=is_debug,
        denylisted_ops=self._debug_options.denylisted_ops,
        denylisted_nodes=self._debug_options.denylisted_nodes)

  def get_nondebug_quantized_model(self) -> bytes:
    """Returns a non-instrumented quantized model.

    Convert the quantized model with the initialized converter and
    return bytes for nondebug model. The model will not be instrumented with
    numeric verification operations.

    Returns:
      Model bytes corresponding to the model.
    Raises:
      ValueError: if converter is not passed to the debugger.
    """
    return self._get_quantized_model(is_debug=False)

  def get_debug_quantized_model(self) -> bytes:
    """Returns an instrumented quantized model.

    Convert the quantized model with the initialized converter and
    return bytes for model. The model will be instrumented with numeric
    verification operations and should only be used for debugging.

    Returns:
      Model bytes corresponding to the model.
    Raises:
      ValueError: if converter is not passed to the debugger.
    """
    return self._get_quantized_model(is_debug=True)

  def _init_from_converter(self,
                           options: QuantizationDebugOptions,
                           converter: TFLiteConverter,
                           calibrated_model: Optional[bytes] = None,
                           float_model: Optional[bytes] = None) -> None:
    """Convert the model and apply options.

    Converts the quantized model and initializes a quantized model interpreter
    with the quantized model. Returns a float model interpreter if float model
    is provided.

    Args:
      options: a QuantizationDebugOptions object.
      converter: an initialized tf.lite.TFLiteConverter.
      calibrated_model: Calibrated model bytes.
      float_model: Float model bytes.
    """
    self.quant_model = convert.mlir_quantize(
        calibrated_model,
        disable_per_channel=converter._experimental_disable_per_channel,  # pylint: disable=protected-access
        fully_quantize=options.fully_quantize,
        enable_numeric_verify=True,
        denylisted_ops=options.denylisted_ops,
        denylisted_nodes=options.denylisted_nodes)
    self._quant_interpreter = _interpreter.Interpreter(
        model_content=self.quant_model)
    self._float_interpreter = None
    if float_model is not None:
      self._float_interpreter = _interpreter.Interpreter(
          model_content=float_model)

  def _set_converter_options_for_float(
      self, converter: TFLiteConverter) -> TFLiteConverter:
    """Verify converter options and set required experimental options."""
    if converter.optimizations:
      converter.optimizations = []
    return converter

  def _set_converter_options_for_calibration(
      self, converter: TFLiteConverter) -> TFLiteConverter:
    """Verify converter options and set required experimental options."""
    if not converter.optimizations:
      raise ValueError(
          'converter object must set optimizations to lite.Optimize.DEFAULT')
    if not converter.representative_dataset:
      raise ValueError('converter object must set representative_dataset')

    converter.experimental_mlir_quantizer = True
    converter._experimental_calibrate_only = True  # pylint: disable=protected-access
    return converter

  def run(self) -> None:
    """Runs models and gets metrics."""
    self.layer_statistics = self._collect_layer_statistics()
    if self._debug_options.model_debug_metrics:
      self.model_statistics = self._collect_model_statistics()

  def _collect_layer_statistics(self) -> Dict[str, Dict[str, float]]:
    """Collects layer statistics by applying layer debug metrics.

    For all data from the given RepresentativeDataset, collect statistics per
    example by getting the NumericVerify op results in _quant_interpreter
    and calculating layer debug metrics on the results.

    Returns:
      aggregated per-layer statistics of NumericVerify results.
      {layer_name: {metric_name: metric}}
    """
    layer_statistics = collections.defaultdict(
        lambda: collections.defaultdict(list))

    initialize = True
    for tensor_data in self._data_gen():
      self._set_input_tensors(self._quant_interpreter, tensor_data, initialize)
      initialize = False

      # Run the model.
      self._quant_interpreter.invoke()

      # Collect the statistics of this invoke result.
      for tensor_detail in self._get_numeric_verify_tensor_details():
        tensor_name = tensor_detail['name']  # pytype: disable=unsupported-operands  # dynamic-method-lookup
        diffs = self._quant_interpreter.get_tensor(tensor_detail['index'])  # pytype: disable=unsupported-operands  # dynamic-method-lookup
        for metric_name, metric_fn in self._layer_debug_metrics.items():
          layer_statistics[tensor_name][metric_name].append(metric_fn(diffs))

      if self._debug_options.layer_direct_compare_metrics is not None:
        for tensor_detail in self._get_numeric_verify_tensor_details():
          tensor_name = tensor_detail['name']  # pytype: disable=unsupported-operands  # dynamic-method-lookup
          op_idx = self._defining_op[tensor_detail['index']]  # pytype: disable=unsupported-operands  # dynamic-method-lookup
          op_detail = self._quant_interpreter._get_op_details(op_idx)  # pylint: disable=protected-access
          q_idx, f_idx = op_detail['inputs']
          quant_input_detail = self._quant_interpreter._get_tensor_details(  # pylint: disable=protected-access
              q_idx, subgraph_index=0)
          for (metric_name, metric_fn
              ) in self._debug_options.layer_direct_compare_metrics.items():
            layer_statistics[tensor_name][metric_name].append(
                metric_fn(
                    self._quant_interpreter.get_tensor(f_idx),
                    self._quant_interpreter.get_tensor(q_idx),
                    quant_input_detail['quantization_parameters']['scales'][0],
                    quant_input_detail['quantization_parameters']['zero_points']
                    [0]))

    # Calculate final aggregated metrics for each layer.
    for metrics in layer_statistics.values():
      for metric_name in metrics:
        metrics[metric_name] = np.nanmean(metrics[metric_name])

    return layer_statistics

  def _collect_model_statistics(self) -> Dict[str, float]:
    """Collects model output metrics.

    For all data from the given RepresentativeDataset, collect all model output
    results from float model & quantized debug model, and calculate metrics
    by using model output functions. As a result, self.model_results is filled,

    where self.model_results[model_output_function_name] = `aggregated model
    output function value` (a scalar).

    Returns:
      aggregated per-model output discrepancy metrics.
      {metric_name: aggregated_metric}
    """

    model_statistics = collections.defaultdict(list)

    initialize = True
    for tensor_data in self._data_gen():
      # Run quantized debug model and collect output results.
      self._set_input_tensors(self._quant_interpreter, tensor_data, initialize)
      self._quant_interpreter.invoke()
      quant_tensor_data = self._get_output_tensors(self._quant_interpreter)

      # Run float model if it's initialized.
      float_tensor_data = []
      if self._float_interpreter:
        self._set_input_tensors(
            self._float_interpreter, tensor_data, initialize)
        self._float_interpreter.invoke()
        float_tensor_data = self._get_output_tensors(self._float_interpreter)

      initialize = False

      # Calculate the metrics.
      for (metric_name,
           metric_fn) in self._debug_options.model_debug_metrics.items():
        model_statistics[metric_name].append(
            metric_fn(float_tensor_data, quant_tensor_data))

    # Calculate final aggregated metrics for each outputs.
    return {
        metric_name: np.mean(metric)
        for metric_name, metric in model_statistics.items()
    }

  def _set_input_tensors(self, interpreter: _interpreter.Interpreter,
                         tensor_data: Sequence[np.ndarray],
                         initialize: bool) -> None:
    """Sets input tensors into TFLite model Interpreter.

    Args:
      interpreter: a tf.lite.Interpreter object with allocated tensors.
      tensor_data: a list of Numpy array data.
      initialize: set to true when input is first set for the interpreter, to
        set input shapes and allocate tensors.

    Raises:
      ValueError: when inputs can't be set, or size of provided inputs does not
      match size of model inputs.
    """
    input_details = interpreter.get_input_details()
    if len(input_details) != len(tensor_data):
      raise ValueError(
          'Number of inputs provided ({}) does not match number of inputs to '
          'the model ({})'.format(len(tensor_data), len(input_details)))

    if initialize:
      for input_detail, tensor in zip(input_details, tensor_data):
        interpreter.resize_tensor_input(input_detail['index'], tensor.shape)
      interpreter.allocate_tensors()

    for input_detail, tensor in zip(input_details, tensor_data):
      if tensor.dtype == np.float32 and input_detail['dtype'] == np.int8:
        quant_params = _get_quant_params(input_detail)
        if quant_params:
          scale, zero_point = quant_params
          tensor = np.round((tensor / scale) + zero_point).astype(np.int8)
      interpreter.set_tensor(input_detail['index'], tensor)

  def _get_output_tensors(
      self, interpreter: _interpreter.Interpreter) -> List[np.ndarray]:
    """Returns output tensors of given TFLite model Interpreter.

    Args:
      interpreter: a tf.lite.Interpreter object with allocated tensors.

    Returns:
      a list of numpy arrays representing output tensor results.
    """

    outputs = []
    for output_detail in interpreter.get_output_details():
      tensor = interpreter.get_tensor(output_detail['index'])
      if output_detail['dtype'] == np.int8:
        quant_params = _get_quant_params(output_detail)
        if quant_params:
          scale, zero_point = quant_params
          tensor = ((tensor.astype(np.float32) - zero_point) * scale).astype(
              np.float32)
      outputs.append(tensor)

    return outputs

  def _get_numeric_verify_tensor_details(self) -> List[str]:
    """Returns all names of all tensors from NumericVerify op."""
    # pylint: disable=protected-access
    if not self._numeric_verify_tensor_details:
      self._numeric_verify_tensor_details = []
      self._numeric_verify_op_details = {}
      for op_info in self._quant_interpreter._get_ops_details():
        if op_info['op_name'] == _NUMERIC_VERIFY_OP_NAME:
          self._numeric_verify_tensor_details.append(
              self._quant_interpreter._get_tensor_details(
                  op_info['outputs'][0], subgraph_index=0))
          tensor_name = self._numeric_verify_tensor_details[-1]['name']
          self._numeric_verify_op_details[tensor_name] = op_info
    # pylint: enable=protected-access
    return self._numeric_verify_tensor_details

  def _get_operand_name_and_index(self,
                                  numeric_verify_name: str) -> Tuple[str, int]:
    """Gets the index and name of NumericVerify Op's quantized input tensor.

    Args:
      numeric_verify_name: name of the NumericVerify op's output tensor. It has
        format of `NumericVerify/{quantized_tensor_name}:{quantized_tensor_idx}`

    Returns:
      Tuple of (tensor_name, tensor_idx) for quantized op's output tensor.
    """
    tensor_name, tensor_idx = numeric_verify_name.rsplit(':', 1)
    float_tensor_name = tensor_name[len(_NUMERIC_VERIFY_OP_NAME) + 1:]
    if re.match(r'\d', float_tensor_name[-1]):
      float_tensor_name = float_tensor_name[:-1]

    return (float_tensor_name, int(tensor_idx))

  def layer_statistics_dump(self, file: IO[str]) -> None:
    """Dumps layer statistics into file, in csv format.

    Args:
      file: file, or file-like object to write.
    """
    # order of `fields` is the order of fields in csv.
    fields = ['op_name', 'tensor_idx'] + list(self._layer_debug_metrics.keys())
    if self._debug_options.layer_direct_compare_metrics is not None:
      fields += list(self._debug_options.layer_direct_compare_metrics.keys())
    fields += ['scale', 'zero_point', 'tensor_name']
    writer = csv.DictWriter(file, fields)
    writer.writeheader()
    if self.layer_statistics:
      for name, metrics in self.layer_statistics.items():
        data = metrics.copy()
        (data['tensor_name'], _) = self._get_operand_name_and_index(name)
        data['tensor_idx'] = self._numeric_verify_op_details[name]['inputs'][0]
        data['op_name'] = self._quant_interpreter._get_op_details(  # pylint: disable=protected-access
            self._defining_op[data['tensor_idx']])['op_name']
        details = self._quant_interpreter._get_tensor_details(  # pylint: disable=protected-access
            data['tensor_idx'], subgraph_index=0)
        data['scale'], data['zero_point'] = (
            details['quantization_parameters']['scales'][0],
            details['quantization_parameters']['zero_points'][0])
        writer.writerow(data)