trac/dist.py
# -*- coding: utf-8 -*-
#
# Copyright (C) 2011-2023 Edgewall Software
# All rights reserved.
#
# This software is licensed as described in the file COPYING, which
# you should have received as part of this distribution. The terms
# are also available at https://trac.edgewall.org/wiki/TracLicense.
#
# This software consists of voluntary contributions made by many
# individuals. For the exact contribution history, see the revision
# history and logs, available at https://trac.edgewall.org/log/.
"""Extra commands for setup.py.
We provide a few extra command classes in `l10n_cmdclass` for
localization tasks. We also modify the standard commands
`distutils.command.build` and `setuptools.command.install_lib` classes
in order to call the l10n commands for compiling catalogs at the right
time during install.
"""
from html.parser import HTMLParser
import io
import os
import pkg_resources
import re
from tokenize import generate_tokens, COMMENT, NAME, OP, STRING
import jinja2
from jinja2.ext import babel_extract as jinja2_extractor
from distutils import log as distlog
from distutils.cmd import Command
from distutils.command.build import build as _build
from distutils.errors import DistutilsOptionError
from setuptools.command.install_lib import install_lib as _install_lib
_jinja2_ext_with = pkg_resources.parse_version(jinja2.__version__) < \
pkg_resources.parse_version('3')
def simplify_message(message):
"""Transforms an extracted message (string or tuple) into one in
which the repeated white-space has been simplified to a single
space.
"""
tuple_len = len(message) if isinstance(message, tuple) else 0
if tuple_len:
message = message[0]
message = ' '.join(message.split())
if tuple_len:
message = (message,) + (None,) * (tuple_len - 1)
return message
class ScriptExtractor(HTMLParser):
def __init__(self, out):
HTMLParser.__init__(self)
self.out = out
self.in_javascript = False
def handle_starttag(self, tag, attrs):
if tag == 'script':
self.in_javascript = True
def handle_startendtag(self, tag, attrs):
self.in_javascript = False
def handle_charref(self, name):
if self.in_javascript:
self.out.write('&#%s;' % name)
def handle_entityref(self, name):
if self.in_javascript:
self.out.write('&%s;' % name)
def handle_data(self, data):
if self.in_javascript:
self.out.write(data)
def handle_endtag(self, tag):
self.in_javascript = False
def no_op(*args, **kwargs):
pass
handle_comment = handle_decl = handle_pi = no_op
try:
from babel.messages.catalog import TranslationError
from babel.messages.extract import extract_javascript
from babel.messages.frontend import extract_messages, init_catalog, \
compile_catalog, update_catalog
from babel.messages.pofile import read_po
from babel.support import Translations
from babel.util import parse_encoding
_DEFAULT_KWARGS_MAPS = {
'Option': {'doc': 4},
'BoolOption': {'doc': 4},
'IntOption': {'doc': 4},
'FloatOption': {'doc': 4},
'ListOption': {'doc': 6},
'ChoiceOption': {'doc': 4},
'PathOption': {'doc': 4},
'ExtensionOption': {'doc': 5},
'OrderedExtensionsOption': {'doc': 6},
}
_DEFAULT_CLEANDOC_KEYWORDS = (
'ConfigSection', 'Option', 'BoolOption', 'IntOption', 'FloatOption',
'ListOption', 'ChoiceOption', 'PathOption', 'ExtensionOption',
'OrderedExtensionsOption', 'cleandoc_',
)
def extract_python(fileobj, keywords, comment_tags, options):
"""Extract messages from Python source code, This is patched
extract_python from Babel to support keyword argument mapping.
`kwargs_maps` option: names of keyword arguments will be mapping to
index of messages array.
`cleandoc_keywords` option: a list of keywords to clean up the
extracted messages with `cleandoc`.
"""
from trac.util.text import cleandoc
funcname = lineno = message_lineno = None
kwargs_maps = func_kwargs_map = None
call_stack = -1
buf = []
messages = []
messages_kwargs = {}
translator_comments = []
in_def = in_translator_comments = False
comment_tag = None
encoding = str(parse_encoding(fileobj) or
options.get('encoding', 'iso-8859-1'))
kwargs_maps = _DEFAULT_KWARGS_MAPS.copy()
if 'kwargs_maps' in options:
kwargs_maps.update(options['kwargs_maps'])
cleandoc_keywords = set(_DEFAULT_CLEANDOC_KEYWORDS)
if 'cleandoc_keywords' in options:
cleandoc_keywords.update(options['cleandoc_keywords'])
tokens = generate_tokens(lambda: fileobj.readline().decode(encoding))
tok = value = None
for _ in tokens:
prev_tok, prev_value = tok, value
tok, value, (lineno, _), _, _ = _
if call_stack == -1 and tok == NAME and value in ('def', 'class'):
in_def = True
elif tok == OP and value == '(':
if in_def:
# Avoid false positives for declarations such as:
# def gettext(arg='message'):
in_def = False
continue
if funcname:
message_lineno = lineno
call_stack += 1
kwarg_name = None
elif in_def and tok == OP and value == ':':
# End of a class definition without parens
in_def = False
continue
elif call_stack == -1 and tok == COMMENT:
# Strip the comment token from the line
value = value[1:].strip()
if in_translator_comments and \
translator_comments[-1][0] == lineno - 1:
# We're already inside a translator comment, continue
# appending
translator_comments.append((lineno, value))
continue
# If execution reaches this point, let's see if comment line
# starts with one of the comment tags
for comment_tag in comment_tags:
if value.startswith(comment_tag):
in_translator_comments = True
translator_comments.append((lineno, value))
break
elif funcname and call_stack == 0:
if tok == OP and value == ')':
if buf:
message = ''.join(buf)
if kwarg_name in func_kwargs_map:
messages_kwargs[kwarg_name] = message
else:
messages.append(message)
del buf[:]
else:
messages.append(None)
for name, message in messages_kwargs.items():
if name not in func_kwargs_map:
continue
index = func_kwargs_map[name]
while index >= len(messages):
messages.append(None)
messages[index - 1] = message
if funcname in cleandoc_keywords:
messages = [m and cleandoc(m) for m in messages]
if len(messages) > 1:
messages = tuple(messages)
else:
messages = messages[0]
# Comments don't apply unless they immediately precede the
# message
if translator_comments and \
translator_comments[-1][0] < message_lineno - 1:
translator_comments = []
yield (message_lineno, funcname, messages,
[comment[1] for comment in translator_comments])
funcname = lineno = message_lineno = None
kwarg_name = func_kwargs_map = None
call_stack = -1
messages = []
messages_kwargs = {}
translator_comments = []
in_translator_comments = False
elif tok == STRING:
# Unwrap quotes in a safe manner, maintaining the string's
# encoding
# https://sourceforge.net/tracker/?func=detail&atid=355470&
# aid=617979&group_id=5470
value = eval('# coding=%s\n%s' % (encoding, value),
{'__builtins__':{}}, {})
if isinstance(value, bytes):
value = value.decode(encoding)
buf.append(value)
elif tok == OP and value == '=' and prev_tok == NAME:
kwarg_name = prev_value
elif tok == OP and value == ',':
if buf:
message = ''.join(buf)
if kwarg_name in func_kwargs_map:
messages_kwargs[kwarg_name] = message
else:
messages.append(message)
del buf[:]
else:
messages.append(None)
kwarg_name = None
if translator_comments:
# We have translator comments, and since we're on a
# comma(,) user is allowed to break into a new line
# Let's increase the last comment's lineno in order
# for the comment to still be a valid one
old_lineno, old_comment = translator_comments.pop()
translator_comments.append((old_lineno+1, old_comment))
elif call_stack > 0 and tok == OP and value == ')':
call_stack -= 1
elif funcname and call_stack == -1:
funcname = func_kwargs_map = kwarg_name = None
elif tok == NAME and value in keywords:
funcname = value
func_kwargs_map = kwargs_maps.get(funcname, {})
kwarg_name = None
def extract_javascript_script(fileobj, keywords, comment_tags, options):
"""Extract messages from Javascript embedded in <script> tags.
Select <script type="javascript/text"> tags and delegate to
`extract_javascript`.
"""
if not fileobj.name:
return []
out = io.StringIO()
extractor = ScriptExtractor(out)
extractor.feed(str(fileobj.read(), 'utf-8'))
extractor.close()
# extract_javascript expects a binary file object
out = io.BytesIO(out.getvalue().encode('utf-8'))
return extract_javascript(out, keywords, comment_tags, options)
def extract_html(fileobj, keywords, comment_tags, options):
"""Extracts translatable texts from templates.
We simplify white-space found in translatable texts collected
via the ``gettext`` function (which is what the ``trans``
directives use), otherwise we would have near duplicates
(e.g. admin.html, prefs.html).
We assume the template function ``gettext`` will do the same
before trying to fetch the translation from the catalog.
"""
if fileobj:
extractor = jinja2_extractor
options.setdefault('extensions', 'jinja2.ext.do, jinja2.ext.with_'
if _jinja2_ext_with else
'jinja2.ext.do')
fileobj.seek(0)
for m in extractor(fileobj, keywords, comment_tags, options):
# lineno, func, message, comments = m
if m[1] in ('gettext', None):
# Jinja2 trans
yield m[0], m[1], simplify_message(m[2]), m[3]
else:
yield m
extract_text = extract_html
class generate_messages_js(Command):
"""Generating message javascripts command for use ``setup.py`` scripts.
"""
description = 'generate message javascript files from binary MO files'
user_options = [
('domain=', 'D',
"domain of PO file (default 'messages')"),
('input-dir=', 'I',
'path to base directory containing the catalogs'),
('input-file=', 'i',
'name of the input file'),
('output-dir=', 'O',
"name of the output directory"),
('output-file=', 'o',
"name of the output file (default "
"'<output_dir>/<locale>.js')"),
('locale=', 'l',
'locale of the catalog to compile'),
]
def initialize_options(self):
self.domain = 'messages'
self.input_dir = None
self.input_file = None
self.output_dir = None
self.output_file = None
self.locale = None
def finalize_options(self):
if not self.input_file and not self.input_dir:
raise DistutilsOptionError('you must specify either the input '
'file or directory')
if not self.output_file and not self.output_dir:
raise DistutilsOptionError('you must specify either the '
'output file or directory')
def run(self):
mo_files = []
js_files = []
def js_path(dir, locale):
return os.path.join(dir, locale + '.js')
if not self.input_file:
if self.locale:
mo_files.append((self.locale,
os.path.join(self.input_dir, self.locale,
'LC_MESSAGES',
self.domain + '.mo')))
js_files.append(js_path(self.output_dir, self.locale))
else:
for locale in os.listdir(self.input_dir):
mo_file = os.path.join(self.input_dir, locale,
'LC_MESSAGES',
self.domain + '.mo')
if os.path.exists(mo_file):
mo_files.append((locale, mo_file))
js_files.append(js_path(self.output_dir, locale))
else:
mo_files.append((self.locale, self.input_file))
if self.output_file:
js_files.append(self.output_file)
else:
js_files.append(js_path(self.output_dir, self.locale))
if not mo_files:
raise DistutilsOptionError('no compiled catalogs found')
if not os.path.isdir(self.output_dir):
os.mkdir(self.output_dir)
for idx, (locale, mo_file) in enumerate(mo_files):
js_file = js_files[idx]
distlog.info('generating messages javascript %r to %r',
mo_file, js_file)
with open(mo_file, 'rb') as infile:
t = Translations(infile, self.domain)
catalog = t._catalog
with open(js_file, 'w', encoding='utf-8') as outfile:
write_js(outfile, catalog, self.domain, locale)
class check_catalog(Command):
"""Check message catalog command for use ``setup.py`` scripts."""
description = 'check message catalog files, like `msgfmt --check`'
user_options = [
('domain=', 'D',
"domain of PO file (default 'messages')"),
('input-dir=', 'I',
'path to base directory containing the catalogs'),
('input-file=', 'i',
'name of the input file'),
('locale=', 'l',
'locale of the catalog to compile'),
]
def initialize_options(self):
self.domain = 'messages'
self.input_dir = None
self.input_file = None
self.locale = None
def finalize_options(self):
if not self.input_file and not self.input_dir:
raise DistutilsOptionError('you must specify either the input '
'file or directory')
def run(self):
for filename in self._get_po_files():
distlog.info('checking catalog %s', filename)
with open(filename, 'rb') as f:
catalog = read_po(f, domain=self.domain)
for message in catalog:
for error in self._check_message(catalog, message):
distlog.warn('%s:%d: %s', filename, message.lineno,
error)
def _get_po_files(self):
if self.input_file:
return [self.input_file]
if self.locale:
return [os.path.join(self.input_dir, self.locale,
'LC_MESSAGES', self.domain + '.po')]
files = []
for locale in os.listdir(self.input_dir):
filename = os.path.join(self.input_dir, locale, 'LC_MESSAGES',
self.domain + '.po')
if os.path.exists(filename):
files.append(filename)
return sorted(files)
def _check_message(self, catalog, message):
for e in message.check(catalog):
yield e
for e in check_markup(catalog, message):
yield e
def check_markup(catalog, message):
"""Verify markups in the translation."""
def to_array(value):
if not isinstance(value, (list, tuple)):
value = (value,)
return value
msgids = to_array(message.id)
msgstrs = to_array(message.string)
for msgid_idx, msgid in enumerate(msgids):
msgid_name = 'msgid' if msgid_idx == 0 else 'msgid_plural'
for msgstr_idx, msgstr in enumerate(msgstrs):
if msgid and msgstr and msgid != msgstr:
msgstr_name = 'msgstr' if len(msgids) == 1 else \
'msgstr[%d]' % msgstr_idx
for e in _check_markup_0(msgid, msgid_name, msgstr,
msgstr_name):
yield e
def _check_markup_0(msgid, msgid_name, msgstr, msgstr_name):
from xml.etree import ElementTree
def count_tags(text):
buf = io.StringIO()
buf.write('<html>\n')
buf.write(text)
buf.write('</html>')
buf.seek(0, 0)
counts = {}
for event in ElementTree.iterparse(buf):
tag = event[1].tag
counts.setdefault(tag, 0)
counts[tag] += 1
counts['html'] -= 1
return counts
try:
msgid_counts = count_tags(msgid)
except ElementTree.ParseError:
return
try:
msgstr_counts = count_tags(msgstr)
except ElementTree.ParseError as e:
yield TranslationError(e)
return
for tag in (set(msgid_counts) | set(msgstr_counts)):
msgid_count = msgid_counts.get(tag, 0)
msgstr_count = msgstr_counts.get(tag, 0)
if msgid_count != msgstr_count:
yield TranslationError(
"mismatched '%s' tag between %s and %s (%d != %d)" %
(tag, msgid_name, msgstr_name, msgid_count, msgstr_count))
def write_js(fileobj, catalog, domain, locale):
from trac.util.presentation import to_json
data = {'domain': domain, 'locale': locale}
messages = {}
for msgid, msgstr in catalog.items():
if isinstance(msgid, (list, tuple)):
messages.setdefault(msgid[0], {})
messages[msgid[0]][msgid[1]] = msgstr
elif msgid:
messages[msgid] = msgstr
else:
for line in msgstr.splitlines():
line = line.strip()
if not line:
continue
if ':' not in line:
continue
name, val = line.split(':', 1)
name = name.strip().lower()
if name == 'plural-forms':
data['plural_expr'] = pluralexpr(val)
break
data['messages'] = messages
data = to_json(data)
if isinstance(data, bytes):
data = str(data, 'utf-8')
fileobj.write('// Generated messages javascript file '
'from compiled MO file\n')
fileobj.write('babel.Translations.load(')
fileobj.write(data)
fileobj.write(').install();\n')
def pluralexpr(forms):
match = re.search(r'\bplural\s*=\s*([^;]+)', forms)
if not match:
raise ValueError('Failed to parse plural_forms %r' % (forms,))
return match.group(1)
def get_command_overriders():
# 'bdist_wininst' runs a 'build', so make the latter
# run a 'compile_catalog' before 'build_py'
class build(_build):
sub_commands = [('compile_catalog', None)] + _build.sub_commands
# 'bdist_egg' isn't that nice, all it does is an 'install_lib'
class install_lib(_install_lib): # playing setuptools' own tricks ;-)
def l10n_run(self):
self.run_command('compile_catalog')
def run(self):
self.l10n_run()
# When bdist_egg is called on distribute 0.6.29 and later, the
# egg file includes no *.mo and *.js files which are generated
# in l10n_run() method.
# We remove build_py.data_files property to re-compute in order
# to avoid the issue (#11640).
build_py = self.get_finalized_command('build_py')
if 'data_files' in build_py.__dict__ and \
not any(any(name.endswith('.mo') for name in filenames)
for pkg, src_dir, build_dir, filenames
in build_py.data_files):
del build_py.__dict__['data_files']
_install_lib.run(self)
return build, install_lib
def get_l10n_cmdclass():
build, install_lib = get_command_overriders()
return {
'build': build, 'install_lib': install_lib,
'check_catalog': check_catalog,
}
def get_l10n_js_cmdclass():
build, _install_lib = get_command_overriders()
build.sub_commands.insert(0, ('generate_messages_js', None))
build.sub_commands.insert(0, ('compile_catalog_js', None))
class install_lib(_install_lib):
def l10n_run(self):
self.run_command('compile_catalog_js')
self.run_command('generate_messages_js')
self.run_command('compile_catalog')
return {
'build': build, 'install_lib': install_lib,
'check_catalog': check_catalog,
'extract_messages_js': extract_messages,
'init_catalog_js': init_catalog,
'compile_catalog_js': compile_catalog,
'update_catalog_js': update_catalog,
'generate_messages_js': generate_messages_js,
'check_catalog_js': check_catalog,
}
def get_l10n_trac_cmdclass():
build, _install_lib = get_command_overriders()
build.sub_commands.insert(0, ('generate_messages_js', None))
build.sub_commands.insert(0, ('compile_catalog_js', None))
build.sub_commands.insert(0, ('compile_catalog_tracini', None))
class install_lib(_install_lib):
def l10n_run(self):
self.run_command('compile_catalog_tracini')
self.run_command('compile_catalog_js')
self.run_command('generate_messages_js')
self.run_command('compile_catalog')
return {
'build': build, 'install_lib': install_lib,
'check_catalog': check_catalog,
'extract_messages_js': extract_messages,
'init_catalog_js': init_catalog,
'compile_catalog_js': compile_catalog,
'update_catalog_js': update_catalog,
'generate_messages_js': generate_messages_js,
'check_catalog_js': check_catalog,
'extract_messages_tracini': extract_messages,
'init_catalog_tracini': init_catalog,
'compile_catalog_tracini': compile_catalog,
'update_catalog_tracini': update_catalog,
'check_catalog_tracini': check_catalog,
}
except ImportError:
def get_l10n_cmdclass():
return
def get_l10n_js_cmdclass():
return
def get_l10n_trac_cmdclass():
return