if (self.attention_dropout_rate > 0.0 and
          self.parameters.mode == base_layers.TRAIN):
        attn_mask *= self.random_drop_to_zero(attn_mask,
                                              self.attention_dropout_rate)