django/django

View on GitHub
django/contrib/sitemaps/views.py

Summary

Maintainability
B
6 hrs
Test Coverage
import datetime
from dataclasses import dataclass
from functools import wraps

from django.contrib.sites.shortcuts import get_current_site
from django.core.paginator import EmptyPage, PageNotAnInteger
from django.http import Http404
from django.template.response import TemplateResponse
from django.urls import reverse
from django.utils import timezone
from django.utils.http import http_date


@dataclass
class SitemapIndexItem:
    location: str
    last_mod: bool = None


def x_robots_tag(func):
    @wraps(func)
    def inner(request, *args, **kwargs):
        response = func(request, *args, **kwargs)
        response.headers["X-Robots-Tag"] = "noindex, noodp, noarchive"
        return response

    return inner


def _get_latest_lastmod(current_lastmod, new_lastmod):
    """
    Returns the latest `lastmod` where `lastmod` can be either a date or a
    datetime.
    """
    if not isinstance(new_lastmod, datetime.datetime):
        new_lastmod = datetime.datetime.combine(new_lastmod, datetime.time.min)
    if timezone.is_naive(new_lastmod):
        new_lastmod = timezone.make_aware(new_lastmod, datetime.timezone.utc)
    return new_lastmod if current_lastmod is None else max(current_lastmod, new_lastmod)


@x_robots_tag
def index(
    request,
    sitemaps,
    template_name="sitemap_index.xml",
    content_type="application/xml",
    sitemap_url_name="django.contrib.sitemaps.views.sitemap",
):
    req_protocol = request.scheme
    req_site = get_current_site(request)

    sites = []  # all sections' sitemap URLs
    all_indexes_lastmod = True
    latest_lastmod = None
    for section, site in sitemaps.items():
        # For each section label, add links of all pages of its sitemap
        # (usually generated by the `sitemap` view).
        if callable(site):
            site = site()
        protocol = req_protocol if site.protocol is None else site.protocol
        sitemap_url = reverse(sitemap_url_name, kwargs={"section": section})
        absolute_url = "%s://%s%s" % (protocol, req_site.domain, sitemap_url)
        site_lastmod = site.get_latest_lastmod()
        if all_indexes_lastmod:
            if site_lastmod is not None:
                latest_lastmod = _get_latest_lastmod(latest_lastmod, site_lastmod)
            else:
                all_indexes_lastmod = False
        sites.append(SitemapIndexItem(absolute_url, site_lastmod))
        # Add links to all pages of the sitemap.
        for page in range(2, site.paginator.num_pages + 1):
            sites.append(
                SitemapIndexItem("%s?p=%s" % (absolute_url, page), site_lastmod)
            )
    # If lastmod is defined for all sites, set header so as
    # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
    if all_indexes_lastmod and latest_lastmod:
        headers = {"Last-Modified": http_date(latest_lastmod.timestamp())}
    else:
        headers = None
    return TemplateResponse(
        request,
        template_name,
        {"sitemaps": sites},
        content_type=content_type,
        headers=headers,
    )


@x_robots_tag
def sitemap(
    request,
    sitemaps,
    section=None,
    template_name="sitemap.xml",
    content_type="application/xml",
):
    req_protocol = request.scheme
    req_site = get_current_site(request)

    if section is not None:
        if section not in sitemaps:
            raise Http404("No sitemap available for section: %r" % section)
        maps = [sitemaps[section]]
    else:
        maps = sitemaps.values()
    page = request.GET.get("p", 1)

    lastmod = None
    all_sites_lastmod = True
    urls = []
    for site in maps:
        try:
            if callable(site):
                site = site()
            urls.extend(site.get_urls(page=page, site=req_site, protocol=req_protocol))
            if all_sites_lastmod:
                site_lastmod = getattr(site, "latest_lastmod", None)
                if site_lastmod is not None:
                    lastmod = _get_latest_lastmod(lastmod, site_lastmod)
                else:
                    all_sites_lastmod = False
        except EmptyPage:
            raise Http404("Page %s empty" % page)
        except PageNotAnInteger:
            raise Http404("No page '%s'" % page)
    # If lastmod is defined for all sites, set header so as
    # ConditionalGetMiddleware is able to send 304 NOT MODIFIED
    if all_sites_lastmod:
        headers = {"Last-Modified": http_date(lastmod.timestamp())} if lastmod else None
    else:
        headers = None
    return TemplateResponse(
        request,
        template_name,
        {"urlset": urls},
        content_type=content_type,
        headers=headers,
    )