Cog-Creators/Red-DiscordBot

View on GitHub
redbot/core/utils/common_filters.py

Summary

Maintainability
A
0 mins
Test Coverage
import re

__all__ = [
    "URL_RE",
    "INVITE_URL_RE",
    "MASS_MENTION_RE",
    "filter_urls",
    "filter_invites",
    "filter_mass_mentions",
    "filter_various_mentions",
    "normalize_smartquotes",
    "escape_spoilers",
    "escape_spoilers_and_mass_mentions",
]

# regexes
URL_RE = re.compile(r"(https?|s?ftp)://(\S+)", re.I)

INVITE_URL_RE = re.compile(r"(discord\.(?:gg|io|me|li)|discord(?:app)?\.com\/invite)\/(\S+)", re.I)

MASS_MENTION_RE = re.compile(r"(@)(?=everyone|here)")  # This only matches the @ for sanitizing

OTHER_MENTION_RE = re.compile(r"(<)(@[!&]?|#)(\d+>)")

SMART_QUOTE_REPLACEMENT_DICT = {
    "\u2018": "'",  # Left single quote
    "\u2019": "'",  # Right single quote
    "\u201C": '"',  # Left double quote
    "\u201D": '"',  # Right double quote
}

SMART_QUOTE_REPLACE_RE = re.compile("|".join(SMART_QUOTE_REPLACEMENT_DICT.keys()))

SPOILER_CONTENT_RE = re.compile(
    r"(?s)(?<!\\)(?P<OPEN>\|{2})(?P<SPOILERED>.*?)(?<!\\)(?P<CLOSE>\|{2})"
)


# convenience wrappers
def filter_urls(to_filter: str) -> str:
    """Get a string with URLs sanitized.

    This will match any URLs starting with these protocols:

     - ``http://``
     - ``https://``
     - ``ftp://``
     - ``sftp://``

    Parameters
    ----------
    to_filter : str
        The string to filter.

    Returns
    -------
    str
        The sanitized string.

    """
    return URL_RE.sub("[SANITIZED URL]", to_filter)


def filter_invites(to_filter: str) -> str:
    """Get a string with discord invites sanitized.

    Will match any discord.gg, discordapp.com/invite, discord.com/invite, discord.me, or discord.io/discord.li
    invite URL.

    Parameters
    ----------
    to_filter : str
        The string to filter.

    Returns
    -------
    str
        The sanitized string.

    """
    return INVITE_URL_RE.sub("[SANITIZED INVITE]", to_filter)


def filter_mass_mentions(to_filter: str) -> str:
    """Get a string with mass mentions sanitized.

    Will match any *here* and/or *everyone* mentions.

    Parameters
    ----------
    to_filter : str
        The string to filter.

    Returns
    -------
    str
        The sanitized string.

    """
    return MASS_MENTION_RE.sub("@\u200b", to_filter)


def filter_various_mentions(to_filter: str) -> str:
    """
    Get a string with role, user, and channel mentions sanitized.

    This is mainly for use on user display names, not message content,
    and should be applied sparingly.

    Parameters
    ----------
    to_filter : str
        The string to filter.

    Returns
    -------
    str
        The sanitized string.
    """
    return OTHER_MENTION_RE.sub(r"\1\\\2\3", to_filter)


def normalize_smartquotes(to_normalize: str) -> str:
    """
    Get a string with smart quotes replaced with normal ones

    Parameters
    ----------
    to_normalize : str
        The string to normalize.

    Returns
    -------
    str
        The normalized string.
    """

    def replacement_for(obj):
        return SMART_QUOTE_REPLACEMENT_DICT.get(obj.group(0), "")

    return SMART_QUOTE_REPLACE_RE.sub(replacement_for, to_normalize)


def escape_spoilers(content: str) -> str:
    """
    Get a string with spoiler syntax escaped.

    Parameters
    ----------
    content : str
        The string to escape.

    Returns
    -------
    str
        The escaped string.
    """
    return SPOILER_CONTENT_RE.sub(r"\\\g<OPEN>\g<SPOILERED>\\\g<CLOSE>", content)


def escape_spoilers_and_mass_mentions(content: str) -> str:
    """
    Get a string with spoiler syntax and mass mentions escaped

    Parameters
    ----------
    content : str
        The string to escape.

    Returns
    -------
    str
        The escaped string.
    """
    return escape_spoilers(filter_mass_mentions(content))