trac/dist.py from edgewall/trac

trac/dist.py
Summary

Maintainability

5 days
Test Coverage

Issues
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011-2023 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://trac.edgewall.org/wiki/TracLicense.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at https://trac.edgewall.org/log/.

"""Extra commands for setup.py.

We provide a few extra command classes in `l10n_cmdclass` for
localization tasks.  We also modify the standard commands
`distutils.command.build` and `setuptools.command.install_lib` classes
in order to call the l10n commands for compiling catalogs at the right
time during install.

"""

from html.parser import HTMLParser
import io
import os
import pkg_resources
import re
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING

import jinja2
from jinja2.ext import babel_extract as jinja2_extractor

from distutils import log as distlog
from distutils.cmd import Command
from distutils.command.build import build as _build
from distutils.errors import DistutilsOptionError
from setuptools.command.install_lib import install_lib as _install_lib


_jinja2_ext_with = pkg_resources.parse_version(jinja2.__version__) < \
                   pkg_resources.parse_version('3')


def simplify_message(message):
    """Transforms an extracted message (string or tuple) into one in
    which the repeated white-space has been simplified to a single
    space.

    """
    tuple_len = len(message) if isinstance(message, tuple) else 0
    if tuple_len:
        message = message[0]
    message = ' '.join(message.split())
    if tuple_len:
        message = (message,) + (None,) * (tuple_len - 1)
    return message


class ScriptExtractor(HTMLParser):
    def __init__(self, out):
        HTMLParser.__init__(self)
        self.out = out
        self.in_javascript = False

    def handle_starttag(self, tag, attrs):
        if tag == 'script':
            self.in_javascript = True

    def handle_startendtag(self, tag, attrs):
        self.in_javascript = False

    def handle_charref(self, name):
        if self.in_javascript:
            self.out.write('&#%s;' % name)

    def handle_entityref(self, name):
        if self.in_javascript:
            self.out.write('&%s;' % name)

    def handle_data(self, data):
        if self.in_javascript:
            self.out.write(data)

    def handle_endtag(self, tag):
        self.in_javascript = False

    def no_op(*args, **kwargs):
        pass

    handle_comment = handle_decl = handle_pi = no_op


try:
    from babel.messages.catalog import TranslationError
    from babel.messages.extract import extract_javascript
    from babel.messages.frontend import extract_messages, init_catalog, \
                                        compile_catalog, update_catalog
    from babel.messages.pofile import read_po
    from babel.support import Translations
    from babel.util import parse_encoding

    _DEFAULT_KWARGS_MAPS = {
        'Option': {'doc': 4},
        'BoolOption': {'doc': 4},
        'IntOption': {'doc': 4},
        'FloatOption': {'doc': 4},
        'ListOption': {'doc': 6},
        'ChoiceOption': {'doc': 4},
        'PathOption': {'doc': 4},
        'ExtensionOption': {'doc': 5},
        'OrderedExtensionsOption': {'doc': 6},
    }

    _DEFAULT_CLEANDOC_KEYWORDS = (
        'ConfigSection', 'Option', 'BoolOption', 'IntOption', 'FloatOption',
        'ListOption', 'ChoiceOption', 'PathOption', 'ExtensionOption',
        'OrderedExtensionsOption', 'cleandoc_',
    )

    def extract_python(fileobj, keywords, comment_tags, options):
        """Extract messages from Python source code, This is patched
        extract_python from Babel to support keyword argument mapping.

        `kwargs_maps` option: names of keyword arguments will be mapping to
        index of messages array.

        `cleandoc_keywords` option: a list of keywords to clean up the
        extracted messages with `cleandoc`.
        """
        from trac.util.text import cleandoc

        funcname = lineno = message_lineno = None
        kwargs_maps = func_kwargs_map = None
        call_stack = -1
        buf = []
        messages = []
        messages_kwargs = {}
        translator_comments = []
        in_def = in_translator_comments = False
        comment_tag = None

        encoding = str(parse_encoding(fileobj) or
                       options.get('encoding', 'iso-8859-1'))
        kwargs_maps = _DEFAULT_KWARGS_MAPS.copy()
        if 'kwargs_maps' in options:
            kwargs_maps.update(options['kwargs_maps'])
        cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS)
        if 'cleandoc_keywords' in options:
            cleandoc_keywords.update(options['cleandoc_keywords'])

        tokens = generate_tokens(lambda: fileobj.readline().decode(encoding))
        tok = value = None
        for _ in tokens:
            prev_tok, prev_value = tok, value
            tok, value, (lineno, _), _, _ = _
            if call_stack == -1 and tok == NAME and value in ('def', 'class'):
                in_def = True
            elif tok == OP and value == '(':
                if in_def:
                    # Avoid false positives for declarations such as:
                    # def gettext(arg='message'):
                    in_def = False
                    continue
                if funcname:
                    message_lineno = lineno
                    call_stack += 1
                kwarg_name = None
            elif in_def and tok == OP and value == ':':
                # End of a class definition without parens
                in_def = False
                continue
            elif call_stack == -1 and tok == COMMENT:
                # Strip the comment token from the line
                value = value[1:].strip()
                if in_translator_comments and \
                        translator_comments[-1][0] == lineno - 1:
                    # We're already inside a translator comment, continue
                    # appending
                    translator_comments.append((lineno, value))
                    continue
                # If execution reaches this point, let's see if comment line
                # starts with one of the comment tags
                for comment_tag in comment_tags:
                    if value.startswith(comment_tag):
                        in_translator_comments = True
                        translator_comments.append((lineno, value))
                        break
            elif funcname and call_stack == 0:
                if tok == OP and value == ')':
                    if buf:
                        message = ''.join(buf)
                        if kwarg_name in func_kwargs_map:
                            messages_kwargs[kwarg_name] = message
                        else:
                            messages.append(message)
                        del buf[:]
                    else:
                        messages.append(None)

                    for name, message in messages_kwargs.items():
                        if name not in func_kwargs_map:
                            continue
                        index = func_kwargs_map[name]
                        while index >= len(messages):
                            messages.append(None)
                        messages[index - 1] = message

                    if funcname in cleandoc_keywords:
                        messages = [m and cleandoc(m) for m in messages]
                    if len(messages) > 1:
                        messages = tuple(messages)
                    else:
                        messages = messages[0]
                    # Comments don't apply unless they immediately precede the
                    # message
                    if translator_comments and \
                            translator_comments[-1][0] < message_lineno - 1:
                        translator_comments = []

                    yield (message_lineno, funcname, messages,
                           [comment[1] for comment in translator_comments])

                    funcname = lineno = message_lineno = None
                    kwarg_name = func_kwargs_map = None
                    call_stack = -1
                    messages = []
                    messages_kwargs = {}
                    translator_comments = []
                    in_translator_comments = False
                elif tok == STRING:
                    # Unwrap quotes in a safe manner, maintaining the string's
                    # encoding
                    # https://sourceforge.net/tracker/?func=detail&atid=355470&
                    # aid=617979&group_id=5470
                    value = eval('# coding=%s\n%s' % (encoding, value),
                                 {'__builtins__':{}}, {})
                    if isinstance(value, bytes):
                        value = value.decode(encoding)
                    buf.append(value)
                elif tok == OP and value == '=' and prev_tok == NAME:
                    kwarg_name = prev_value
                elif tok == OP and value == ',':
                    if buf:
                        message = ''.join(buf)
                        if kwarg_name in func_kwargs_map:
                            messages_kwargs[kwarg_name] = message
                        else:
                            messages.append(message)
                        del buf[:]
                    else:
                        messages.append(None)
                    kwarg_name = None
                    if translator_comments:
                        # We have translator comments, and since we're on a
                        # comma(,) user is allowed to break into a new line
                        # Let's increase the last comment's lineno in order
                        # for the comment to still be a valid one
                        old_lineno, old_comment = translator_comments.pop()
                        translator_comments.append((old_lineno+1, old_comment))
            elif call_stack > 0 and tok == OP and value == ')':
                call_stack -= 1
            elif funcname and call_stack == -1:
                funcname = func_kwargs_map = kwarg_name = None
            elif tok == NAME and value in keywords:
                funcname = value
                func_kwargs_map = kwargs_maps.get(funcname, {})
                kwarg_name = None


    def extract_javascript_script(fileobj, keywords, comment_tags, options):
        """Extract messages from Javascript embedded in <script> tags.

        Select <script type="javascript/text"> tags and delegate to
        `extract_javascript`.
        """
        if not fileobj.name:
            return []
        out = io.StringIO()
        extractor = ScriptExtractor(out)
        extractor.feed(str(fileobj.read(), 'utf-8'))
        extractor.close()
        # extract_javascript expects a binary file object
        out = io.BytesIO(out.getvalue().encode('utf-8'))
        return extract_javascript(out, keywords, comment_tags, options)


    def extract_html(fileobj, keywords, comment_tags, options):
        """Extracts translatable texts from templates.

        We simplify white-space found in translatable texts collected
        via the ``gettext`` function (which is what the ``trans``
        directives use), otherwise we would have near duplicates
        (e.g. admin.html, prefs.html).

        We assume the template function ``gettext`` will do the same
        before trying to fetch the translation from the catalog.

        """
        if fileobj:
            extractor = jinja2_extractor
            options.setdefault('extensions', 'jinja2.ext.do, jinja2.ext.with_'
                                             if _jinja2_ext_with else
                                             'jinja2.ext.do')
            fileobj.seek(0)
            for m in extractor(fileobj, keywords, comment_tags, options):
                # lineno, func, message, comments = m
                if m[1] in ('gettext', None):
                    # Jinja2 trans
                    yield m[0], m[1], simplify_message(m[2]), m[3]
                else:
                    yield m


    extract_text = extract_html


    class generate_messages_js(Command):
        """Generating message javascripts command for use ``setup.py`` scripts.
        """

        description = 'generate message javascript files from binary MO files'
        user_options = [
            ('domain=', 'D',
             "domain of PO file (default 'messages')"),
            ('input-dir=', 'I',
             'path to base directory containing the catalogs'),
            ('input-file=', 'i',
             'name of the input file'),
            ('output-dir=', 'O',
             "name of the output directory"),
            ('output-file=', 'o',
             "name of the output file (default "
             "'<output_dir>/<locale>.js')"),
            ('locale=', 'l',
             'locale of the catalog to compile'),
        ]

        def initialize_options(self):
            self.domain = 'messages'
            self.input_dir = None
            self.input_file = None
            self.output_dir = None
            self.output_file = None
            self.locale = None

        def finalize_options(self):
            if not self.input_file and not self.input_dir:
                raise DistutilsOptionError('you must specify either the input '
                                           'file or directory')
            if not self.output_file and not self.output_dir:
                raise DistutilsOptionError('you must specify either the '
                                           'output file or directory')

        def run(self):
            mo_files = []
            js_files = []

            def js_path(dir, locale):
                return os.path.join(dir, locale + '.js')

            if not self.input_file:
                if self.locale:
                    mo_files.append((self.locale,
                                     os.path.join(self.input_dir, self.locale,
                                                  'LC_MESSAGES',
                                                  self.domain + '.mo')))
                    js_files.append(js_path(self.output_dir, self.locale))
                else:
                    for locale in os.listdir(self.input_dir):
                        mo_file = os.path.join(self.input_dir, locale,
                                               'LC_MESSAGES',
                                               self.domain + '.mo')
                        if os.path.exists(mo_file):
                            mo_files.append((locale, mo_file))
                            js_files.append(js_path(self.output_dir, locale))
            else:
                mo_files.append((self.locale, self.input_file))
                if self.output_file:
                    js_files.append(self.output_file)
                else:
                    js_files.append(js_path(self.output_dir, self.locale))

            if not mo_files:
                raise DistutilsOptionError('no compiled catalogs found')

            if not os.path.isdir(self.output_dir):
                os.mkdir(self.output_dir)

            for idx, (locale, mo_file) in enumerate(mo_files):
                js_file = js_files[idx]
                distlog.info('generating messages javascript %r to %r',
                             mo_file, js_file)

                with open(mo_file, 'rb') as infile:
                    t = Translations(infile, self.domain)
                    catalog = t._catalog

                with open(js_file, 'w', encoding='utf-8') as outfile:
                    write_js(outfile, catalog, self.domain, locale)


    class check_catalog(Command):
        """Check message catalog command for use ``setup.py`` scripts."""

        description = 'check message catalog files, like `msgfmt --check`'
        user_options = [
            ('domain=', 'D',
             "domain of PO file (default 'messages')"),
            ('input-dir=', 'I',
             'path to base directory containing the catalogs'),
            ('input-file=', 'i',
             'name of the input file'),
            ('locale=', 'l',
             'locale of the catalog to compile'),
        ]

        def initialize_options(self):
            self.domain = 'messages'
            self.input_dir = None
            self.input_file = None
            self.locale = None

        def finalize_options(self):
            if not self.input_file and not self.input_dir:
                raise DistutilsOptionError('you must specify either the input '
                                           'file or directory')

        def run(self):
            for filename in self._get_po_files():
                distlog.info('checking catalog %s', filename)
                with open(filename, 'rb') as f:
                    catalog = read_po(f, domain=self.domain)
                for message in catalog:
                    for error in self._check_message(catalog, message):
                        distlog.warn('%s:%d: %s', filename, message.lineno,
                                     error)

        def _get_po_files(self):
            if self.input_file:
                return [self.input_file]

            if self.locale:
                return [os.path.join(self.input_dir, self.locale,
                                     'LC_MESSAGES', self.domain + '.po')]

            files = []
            for locale in os.listdir(self.input_dir):
                filename = os.path.join(self.input_dir, locale, 'LC_MESSAGES',
                                        self.domain + '.po')
                if os.path.exists(filename):
                    files.append(filename)
            return sorted(files)

        def _check_message(self, catalog, message):
            for e in message.check(catalog):
                yield e
            for e in check_markup(catalog, message):
                yield e

    def check_markup(catalog, message):
        """Verify markups in the translation."""
        def to_array(value):
            if not isinstance(value, (list, tuple)):
                value = (value,)
            return value
        msgids = to_array(message.id)
        msgstrs = to_array(message.string)
        for msgid_idx, msgid in enumerate(msgids):
            msgid_name = 'msgid' if msgid_idx == 0 else 'msgid_plural'
            for msgstr_idx, msgstr in enumerate(msgstrs):
                if msgid and msgstr and msgid != msgstr:
                    msgstr_name = 'msgstr' if len(msgids) == 1 else \
                                  'msgstr[%d]' % msgstr_idx
                    for e in _check_markup_0(msgid, msgid_name, msgstr,
                                             msgstr_name):
                        yield e

    def _check_markup_0(msgid, msgid_name, msgstr, msgstr_name):
        from xml.etree import ElementTree

        def count_tags(text):
            buf = io.StringIO()
            buf.write('<html>\n')
            buf.write(text)
            buf.write('</html>')
            buf.seek(0, 0)
            counts = {}
            for event in ElementTree.iterparse(buf):
                tag = event[1].tag
                counts.setdefault(tag, 0)
                counts[tag] += 1
            counts['html'] -= 1
            return counts

        try:
            msgid_counts = count_tags(msgid)
        except ElementTree.ParseError:
            return
        try:
            msgstr_counts = count_tags(msgstr)
        except ElementTree.ParseError as e:
            yield TranslationError(e)
            return

        for tag in (set(msgid_counts) | set(msgstr_counts)):
            msgid_count = msgid_counts.get(tag, 0)
            msgstr_count = msgstr_counts.get(tag, 0)
            if msgid_count != msgstr_count:
                yield TranslationError(
                    "mismatched '%s' tag between %s and %s (%d != %d)" %
                    (tag, msgid_name, msgstr_name, msgid_count, msgstr_count))

    def write_js(fileobj, catalog, domain, locale):
        from trac.util.presentation import to_json
        data = {'domain': domain, 'locale': locale}

        messages = {}
        for msgid, msgstr in catalog.items():
            if isinstance(msgid, (list, tuple)):
                messages.setdefault(msgid[0], {})
                messages[msgid[0]][msgid[1]] = msgstr
            elif msgid:
                messages[msgid] = msgstr
            else:
                for line in msgstr.splitlines():
                    line = line.strip()
                    if not line:
                        continue
                    if ':' not in line:
                        continue
                    name, val = line.split(':', 1)
                    name = name.strip().lower()
                    if name == 'plural-forms':
                        data['plural_expr'] = pluralexpr(val)
                        break
        data['messages'] = messages
        data = to_json(data)
        if isinstance(data, bytes):
            data = str(data, 'utf-8')

        fileobj.write('// Generated messages javascript file '
                      'from compiled MO file\n')
        fileobj.write('babel.Translations.load(')
        fileobj.write(data)
        fileobj.write(').install();\n')

    def pluralexpr(forms):
        match = re.search(r'\bplural\s*=\s*([^;]+)', forms)
        if not match:
            raise ValueError('Failed to parse plural_forms %r' % (forms,))
        return match.group(1)


    def get_command_overriders():
        # 'bdist_wininst' runs a 'build', so make the latter
        # run a 'compile_catalog' before 'build_py'
        class build(_build):
            sub_commands = [('compile_catalog', None)] + _build.sub_commands

        # 'bdist_egg' isn't that nice, all it does is an 'install_lib'
        class install_lib(_install_lib): # playing setuptools' own tricks ;-)
            def l10n_run(self):
                self.run_command('compile_catalog')
            def run(self):
                self.l10n_run()
                # When bdist_egg is called on distribute 0.6.29 and later, the
                # egg file includes no *.mo and *.js files which are generated
                # in l10n_run() method.
                # We remove build_py.data_files property to re-compute in order
                # to avoid the issue (#11640).
                build_py = self.get_finalized_command('build_py')
                if 'data_files' in build_py.__dict__ and \
                   not any(any(name.endswith('.mo') for name in filenames)
                           for pkg, src_dir, build_dir, filenames
                           in build_py.data_files):
                    del build_py.__dict__['data_files']
                _install_lib.run(self)
        return build, install_lib

    def get_l10n_cmdclass():
        build, install_lib = get_command_overriders()
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
        }

    def get_l10n_js_cmdclass():
        build, _install_lib = get_command_overriders()
        build.sub_commands.insert(0, ('generate_messages_js', None))
        build.sub_commands.insert(0, ('compile_catalog_js', None))
        class install_lib(_install_lib):
            def l10n_run(self):
                self.run_command('compile_catalog_js')
                self.run_command('generate_messages_js')
                self.run_command('compile_catalog')
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
            'extract_messages_js': extract_messages,
            'init_catalog_js': init_catalog,
            'compile_catalog_js': compile_catalog,
            'update_catalog_js': update_catalog,
            'generate_messages_js': generate_messages_js,
            'check_catalog_js': check_catalog,
        }

    def get_l10n_trac_cmdclass():
        build, _install_lib = get_command_overriders()
        build.sub_commands.insert(0, ('generate_messages_js', None))
        build.sub_commands.insert(0, ('compile_catalog_js', None))
        build.sub_commands.insert(0, ('compile_catalog_tracini', None))
        class install_lib(_install_lib):
            def l10n_run(self):
                self.run_command('compile_catalog_tracini')
                self.run_command('compile_catalog_js')
                self.run_command('generate_messages_js')
                self.run_command('compile_catalog')
        return {
            'build': build, 'install_lib': install_lib,
            'check_catalog': check_catalog,
            'extract_messages_js': extract_messages,
            'init_catalog_js': init_catalog,
            'compile_catalog_js': compile_catalog,
            'update_catalog_js': update_catalog,
            'generate_messages_js': generate_messages_js,
            'check_catalog_js': check_catalog,
            'extract_messages_tracini': extract_messages,
            'init_catalog_tracini': init_catalog,
            'compile_catalog_tracini': compile_catalog,
            'update_catalog_tracini': update_catalog,
            'check_catalog_tracini': check_catalog,
        }

except ImportError:
    def get_l10n_cmdclass():
        return
    def get_l10n_js_cmdclass():
        return
    def get_l10n_trac_cmdclass():
        return