research/efficient-hrl/utils/eval_utils.py from tensorflow/models

research/efficient-hrl/utils/eval_utils.py
Summary

Maintainability

2 hrs
Test Coverage

Issues
# Copyright 2018 The TensorFlow Authors All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

"""Evaluation utility functions.
"""

from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
import tensorflow as tf
from collections import namedtuple
logging = tf.logging
import gin.tf


@gin.configurable
def evaluate_checkpoint_repeatedly(checkpoint_dir,
                                   evaluate_checkpoint_fn,
                                   eval_interval_secs=600,
                                   max_number_of_evaluations=None,
                                   checkpoint_timeout=None,
                                   timeout_fn=None):
  """Evaluates a checkpointed model at a set interval."""
  if max_number_of_evaluations is not None and max_number_of_evaluations <= 0:
    raise ValueError(
        '`max_number_of_evaluations` must be either None or a positive number.')

  number_of_evaluations = 0
  for checkpoint_path in tf.contrib.training.checkpoints_iterator(
      checkpoint_dir,
      min_interval_secs=eval_interval_secs,
      timeout=checkpoint_timeout,
      timeout_fn=timeout_fn):
    retries = 3
    for _ in range(retries):
      try:
        should_stop = evaluate_checkpoint_fn(checkpoint_path)
        break
      except tf.errors.DataLossError as e:
        logging.warn(
            'Encountered a DataLossError while evaluating a checkpoint. This '
            'can happen when reading a checkpoint before it is fully written. '
            'Retrying...'
        )
        time.sleep(2.0)


def compute_model_loss(sess, model_rollout_fn, states, actions):
  """Computes model loss."""
  preds, losses = [], []
  preds.append(states[0])
  losses.append(0)
  for state, action in zip(states[1:], actions[1:]):
    pred = model_rollout_fn(sess, preds[-1], action)
    loss = np.sqrt(np.sum((state - pred) ** 2))
    preds.append(pred)
    losses.append(loss)
  return preds, losses


def compute_average_reward(sess, env_base, step_fn, gamma, num_steps,
                           num_episodes):
  """Computes the discounted reward for a given number of steps.

  Args:
    sess: The tensorflow session.
    env_base: A python environment.
    step_fn: A function that takes in `sess` and returns a list of
      [state, action, reward, discount, transition_type] values.
    gamma: discounting factor to apply to the reward.
    num_steps: number of steps to compute the reward over.
    num_episodes: number of episodes to average the reward over.
  Returns:
    average_reward: a scalar of discounted reward.
    last_reward: last reward received.
  """
  average_reward = 0
  average_last_reward = 0
  average_meta_reward = 0
  average_last_meta_reward = 0
  average_success = 0.
  states, actions = None, None
  for i in range(num_episodes):
    env_base.end_episode()
    env_base.begin_episode()
    (reward, last_reward, meta_reward, last_meta_reward,
     states, actions) = compute_reward(
        sess, step_fn, gamma, num_steps)
    s_reward = last_meta_reward  # Navigation
    success = (s_reward > -5.0)  # When using diff=False
    logging.info('Episode = %d, reward = %s, meta_reward = %f, '
                 'last_reward = %s, last meta_reward = %f, success = %s',
                 i, reward, meta_reward, last_reward, last_meta_reward,
                 success)
    average_reward += reward
    average_last_reward += last_reward
    average_meta_reward += meta_reward
    average_last_meta_reward += last_meta_reward
    average_success += success
  average_reward /= num_episodes
  average_last_reward /= num_episodes
  average_meta_reward /= num_episodes
  average_last_meta_reward /= num_episodes
  average_success /= num_episodes
  return (average_reward, average_last_reward,
          average_meta_reward, average_last_meta_reward,
          average_success,
          states, actions)


def compute_reward(sess, step_fn, gamma, num_steps):
  """Computes the discounted reward for a given number of steps.

  Args:
    sess: The tensorflow session.
    step_fn: A function that takes in `sess` and returns a list of
      [state, action, reward, discount, transition_type] values.
    gamma: discounting factor to apply to the reward.
    num_steps: number of steps to compute the reward over.
  Returns:
    reward: cumulative discounted reward.
    last_reward: reward received at final step.
  """

  total_reward = 0
  total_meta_reward = 0
  gamma_step = 1
  states = []
  actions = []
  for _ in range(num_steps):
    state, action, transition_type, reward, meta_reward, discount, _, _ = step_fn(sess)
    total_reward += reward * gamma_step * discount
    total_meta_reward += meta_reward * gamma_step * discount
    gamma_step *= gamma
    states.append(state)
    actions.append(action)
  return (total_reward, reward, total_meta_reward, meta_reward,
          states, actions)