tensorflow/models

View on GitHub
research/efficient-hrl/context/configs/ant_maze.gin

Summary

Maintainability
Test Coverage
#-*-Python-*-
create_maze_env.env_name = "AntMaze"
context_range = (%CONTEXT_RANGE_MIN, %CONTEXT_RANGE_MAX)
meta_context_range = ((-4, -4), (20, 20))

RESET_EPISODE_PERIOD = 500
RESET_ENV_PERIOD = 1
# End episode every N steps
UvfAgent.reset_episode_cond_fn = @every_n_steps
every_n_steps.n = %RESET_EPISODE_PERIOD
train_uvf.max_steps_per_episode = %RESET_EPISODE_PERIOD
# Do a manual reset every N episodes
UvfAgent.reset_env_cond_fn = @every_n_episodes
every_n_episodes.n = %RESET_ENV_PERIOD
every_n_episodes.steps_per_episode = %RESET_EPISODE_PERIOD

## Config defaults
EVAL_MODES = ["eval1", "eval2", "eval3"]

## Config agent
CONTEXT = @agent/Context
META_CONTEXT = @meta/Context

## Config agent context
agent/Context.context_ranges = [%context_range]
agent/Context.context_shapes = [%SUBGOAL_DIM]
agent/Context.meta_action_every_n = 10
agent/Context.samplers = {
    "train": [@train/DirectionSampler],
    "explore": [@train/DirectionSampler],
}

agent/Context.context_transition_fn = @relative_context_transition_fn
agent/Context.context_multi_transition_fn = @relative_context_multi_transition_fn

agent/Context.reward_fn = @uvf/negative_distance

## Config meta context
meta/Context.context_ranges = [%meta_context_range]
meta/Context.context_shapes = [2]
meta/Context.samplers = {
    "train": [@train/RandomSampler],
    "explore": [@train/RandomSampler],
    "eval1": [@eval1/ConstantSampler],
    "eval2": [@eval2/ConstantSampler],
    "eval3": [@eval3/ConstantSampler],
}
meta/Context.reward_fn = @task/negative_distance

## Config rewards
task/negative_distance.state_indices = [0, 1]
task/negative_distance.relative_context = False
task/negative_distance.diff = False
task/negative_distance.offset = 0.0

## Config samplers
train/RandomSampler.context_range = %meta_context_range
train/DirectionSampler.context_range = %context_range
train/DirectionSampler.k = %SUBGOAL_DIM
relative_context_transition_fn.k = %SUBGOAL_DIM
relative_context_multi_transition_fn.k = %SUBGOAL_DIM
MetaAgent.k = %SUBGOAL_DIM

eval1/ConstantSampler.value = [16, 0]
eval2/ConstantSampler.value = [16, 16]
eval3/ConstantSampler.value = [0, 16]