self.train_obs_ph = tf.placeholder(
                tf.float32, [params.batch_size] + list(params.state_shape),