max_q_old = np.max(self.old_q[next_state, :]) if not absorbing else 0.