raw_policy_t = deterministic_policy_function(
                params.fcs, self.obs_t_ph, params.num_actions, tf.nn.tanh,