self.step_obs_ph = tf.placeholder(
                tf.float32, [params.num_envs] + list(params.state_shape),