if (self.attention_dropout_rate > 0.0 and
        self.parameters.mode == base_layers.TRAIN):
      attn_mask *= self.random_drop_to_zero(attn_mask,
                                            self.attention_dropout_rate)