takuseno/d3rlpy

View on GitHub
scripts/create_pendulum_dataset

Summary

Maintainability
Test Coverage
#!/usr/bin/env python

import gym
import d3rlpy

d3rlpy.seed(100)

# prepare environment
env = gym.make('Pendulum-v0')
eval_env = gym.make('Pendulum-v0')

# prepare algorithms
sac = d3rlpy.algos.SAC(action_scaler='min_max')

# prepare utilities
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)

# start training
sac.fit_online(env, buffer=buffer, eval_env=eval_env, n_steps=100000)

# export replay buffer as MDPDataset
dataset = buffer.to_mdp_dataset()

# save MDPDataset
dataset.dump('pendulum.h5')