digitalfabrik/integreat-cms

View on GitHub
integreat_cms/cms/utils/internal_link_utils.py

Summary

Maintainability
A
0 mins
Test Coverage
B
83%
"""
This file contains utility functions for recognizing and modifying internal links
"""

from __future__ import annotations

import logging
from typing import TYPE_CHECKING
from urllib.parse import unquote, urlparse

from django.conf import settings

from ..constants import status
from ..models import (
    EventTranslation,
    ImprintPageTranslation,
    PageTranslation,
    POITranslation,
)

if TYPE_CHECKING:
    from typing import Optional

    from lxml.html import Element

    from ..models.abstract_content_translation import AbstractContentTranslation

logger = logging.getLogger(__name__)


# pylint: disable=compare-to-zero
def update_link(
    link: Element, target_language_slug: str
) -> Optional[tuple[str, Element | str]]:
    """
    Fixes the internal link, if it is broken.
    This includes:

    - Changing the link language to `target_language_slug`
    - Fixing the link path if any part of it points to an outdated version of a content translation

    Returns a tuple of the translated url and (potentially) modified title.
    For example, with current_link = 'https://integreat.app/augsburg/de/willkommen/' and language_slug = 'en'
    a possible return value could be ('https://integreat.app/augsburg/en/welcome/, 'Welcome').
    Note that the resulting link might refer to a fallback language and not the actual target language.

    :param link: The link to be updated
    :param target_language_slug: The language slug for the target translation
    :returns: a tuple of (url, innerHtml) of the target translation, or None
    """
    if not (current_url := link.get("href")):
        return None

    if not (source_translation := get_public_translation_for_link(current_url)):
        return None

    if target_translation := source_translation.foreign_object.get_public_translation(
        target_language_slug
    ):
        # Always use the full url, even if the url was previously a short url
        fixed_link = target_translation.full_url

        # Update the title if it was previously the url, otherwise use the new title
        link_html = None
        if len(link) == 0 and link.text and current_url.strip() == link.text.strip():
            link_html = fixed_link
        elif link.get("data-integreat-auto-update") == "true":
            link_html = target_translation.link_title

        return fixed_link, link_html

    return None


WEBAPP_NETLOC: str = urlparse(settings.WEBAPP_URL).netloc
SHORT_LINKS_NETLOC: str = urlparse(settings.SHORT_LINKS_URL).netloc


def get_public_translation_for_link(url: str) -> AbstractContentTranslation | None:
    """
    This function gets the public content translation object corresponding to the path of an internal url.
    If the url does not refer to any object, this function will return None.
    This function handles webapp links and short urls.
    If the language of the url is the same as `current_language_slug`, this function will return None.

    :param url: The url
    :returns: The latest corresponding content translation
    """
    parsed_url = urlparse(url)
    if parsed_url.netloc == WEBAPP_NETLOC:
        return get_public_translation_for_webapp_link(parsed_url.path)
    if parsed_url.netloc == SHORT_LINKS_NETLOC:
        return get_public_translation_for_short_link(parsed_url.path)
    return None


def get_public_translation_for_webapp_link(
    path: str,
) -> AbstractContentTranslation | None:
    """
    Calculates the content object that corresponds to the webapp url path and returns its latest public translation.

    :param path: The url path, for example given the url 'https://integreat.app/augsburg/de/willkommen/' it would be '/augsburg/de/willkommen/'
    :returns: The latest corresponding content translation
    """
    parts: list[str] = unquote(path).strip("/").split("/")
    if len(parts) < 3:
        # Not a relevant internal url
        return None

    region_slug, language_slug, *path_parts = parts

    return get_public_translation_for_webapp_link_parts(
        region_slug, language_slug, path_parts
    )


def get_public_translation_for_webapp_link_parts(
    region_slug: str, language_slug: str, path_parts: list[str]
) -> AbstractContentTranslation | None:
    """
    Calculates the content translation that corresponds to the given path, region slug and language slug.

    :param region_slug: The slug of the region of the translation
    :param language_slug: The slug of the language of the translation
    :param path_parts: A list of the path parts of the translations url. For example ['events', 'test-event']
    """
    path = path_parts[0]
    object_slug = path_parts[-1]

    object_type, foreign_object = {
        "events": (EventTranslation, "event"),
        "locations": (POITranslation, "poi"),
        "disclaimer": (ImprintPageTranslation, "page"),
    }.get(path, (PageTranslation, "page"))
    filter_args = {
        f"{foreign_object}__region__slug": region_slug,
        "language__slug": language_slug,
        "status": status.PUBLIC,
    }
    if object_type != ImprintPageTranslation:
        filter_args[f"{foreign_object}__translations__slug"] = object_slug

    instances = (
        object_type.objects.filter(**filter_args)
        .select_related("language", f"{foreign_object}__region")
        .order_by("-version")
    )

    return instances.first()


def get_public_translation_for_short_link(
    path: str,
) -> AbstractContentTranslation | None:
    """
    Calculates the content object that corresponds to the short url path and returns its latest public translation.

    :param path: The url path, for example given the url 'http://localhost:8000/s/p/124/' it would be '/s/p/124/'
    :returns: The latest corresponding content translation
    """
    parts: list[str] = unquote(path).strip("/").split("/")
    if len(parts) != 3 or parts[0] != "s":
        # Not a relevant internal url
        return None

    if parts[1] == "p":
        object_type = PageTranslation
    elif parts[1] == "i":
        object_type = ImprintPageTranslation
    else:
        return None

    try:
        object_id = int(parts[2])
    except ValueError:
        return None

    if not (instance := object_type.objects.get(id=object_id)):
        return None

    return instance.public_version