scripts/create_pendulum_random_dataset
#!/usr/bin/env python
import gym
import d3rlpy
d3rlpy.seed(100)
# prepare environment
env = gym.make('Pendulum-v0')
# prepare algorithms
policy = d3rlpy.algos.RandomPolicy(
distribution='normal',
action_scaler='min_max',
)
# prepare utilities
buffer = d3rlpy.online.buffers.ReplayBuffer(maxlen=1000000, env=env)
# start training
policy.collect(env, buffer=buffer, n_steps=100000)
# export replay buffer as MDPDataset
dataset = buffer.to_mdp_dataset()
# save MDPDataset
dataset.dump('pendulum_random.h5')