scripts/listpages.py
#!/usr/bin/env python3
r"""Print a list of pages, as defined by page generator parameters.
Optionally, it also prints page content to STDOUT or save it to a file
in the current directory.
These parameters are supported to specify which pages titles to print:
-format Defines the output format.
Can be a custom string according to python string.format()
notation or can be selected by a number from following list
(1 is default format):
1 - '{num:4d} {page.title}'
--> 10 PageTitle
2 - '{num:4d} [[{page.title}]]'
--> 10 [[PageTitle]]
3 - '{page.title}'
--> PageTitle
4 - '[[{page.title}]]'
--> [[PageTitle]]
5 - '{num:4d} <<lightred>>{page.loc_title:<40}<<default>>'
--> 10 localised_Namespace:PageTitle (colorised in lightred)
6 - '{num:4d} {page.loc_title:<40} {page.can_title:<40}'
--> 10 localised_Namespace:PageTitle
canonical_Namespace:PageTitle
7 - '{num:4d} {page.loc_title:<40} {page.trs_title:<40}'
--> 10 localised_Namespace:PageTitle
outputlang_Namespace:PageTitle
(*) requires "outputlang:lang" set.
num is the sequential number of the listed page.
An empty format is equal to ``-notitle`` and just shows the
total amount of pages.
-outputlang
Language for translation of namespaces.
-notitle Page title is not printed.
-get Page content is printed.
-tofile Save Page titles to a single file. File name can be set
with -tofile:filename or -tofile:dir_name/filename.
-save Save Page content to a file named as
:code:`page.title(as_filename=True)`. Directory can be set
with ``-save:dir_name``. If no dir is specified, current
directory will be used.
-encode File encoding can be specified with '-encode:name' (name
must be a valid python encoding: utf-8, etc.). If not
specified, it defaults to :code:`config.textfile_encoding`.
-put: [str] Save the list to the defined page of the wiki. By
default it does not overwrite an existing page.
-overwrite Overwrite the page if it exists. Can only by applied with
-put.
-summary: [str] The summary text when the page is written. If it's one
word just containing letters, dashes and underscores it uses
that as a translation key.
Custom format can be applied to the following items extrapolated from a
page object:
*site*
Obtained from :code:`page._link._site`.
*title*
Obtained from :code:`page._link._title`.
*loc_title*
Obtained from :code:`page._link.canonical_title()`.
*can_title*
Obtained from :code:`page._link.ns_title()`. Based either the
canonical namespace name or on the namespace name in the language
specified by the ``-trans`` param; a default value ``******`` will
be used if no *ns* is found.
*onsite*
Obtained from :code:`pywikibot.Site(outputlang, self.site.family)`.
*trs_title*
Obtained from :code:`page._link.ns_title(onsite=onsite)`. If
selected, format requires *trs_title*, ``-outputlang`` must be set.
¶ms;
"""
#
# (C) Pywikibot team, 2008-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import os
import pywikibot
from pywikibot import config
from pywikibot.bot import AutomaticTWSummaryBot, SingleSiteBot, suggest_help
from pywikibot.exceptions import Error
from pywikibot.pagegenerators import GeneratorFactory, parameterHelp
docuReplacements = {'¶ms;': parameterHelp} # noqa: N816
class Formatter:
"""Structure with Page attributes exposed for formatting from cmd line."""
fmt_options = {
'1': '{num:4d} {page.title}',
'2': '{num:4d} [[{page.title}]]',
'3': '{page.title}',
'4': '[[{page.title}]]',
'5': '{num:4d} <<lightred>>{page.loc_title:<40}<<default>>',
'6': '{num:4d} {page.loc_title:<40} {page.can_title:<40}',
'7': '{num:4d} {page.loc_title:<40} {page.trs_title:<40}',
}
# Identify which formats need outputlang
fmt_need_lang = [k for k, v in fmt_options.items() if 'trs_title' in v]
def __init__(self, page, outputlang=None, default: str = '******') -> None:
"""Initializer.
:param page: the page to be formatted.
:type page: Page object.
:param outputlang: language code in which namespace before title should
be translated.
Page ns will be searched in Site(outputlang, page.site.family)
and, if found, its custom name will be used in page.title().
:type outputlang: str or None, if no translation is wanted.
:param default: default string to be used if no corresponding
namespace is found when outputlang is not None.
"""
self.site = page._link.site
self.title = page._link.title
self.loc_title = page._link.canonical_title()
self.can_title = page._link.ns_title()
self.outputlang = outputlang
if outputlang is not None:
# Cache onsite in case of translations.
if not hasattr(self, 'onsite'):
self.onsite = pywikibot.Site(outputlang, self.site.family)
try:
self.trs_title = page._link.ns_title(onsite=self.onsite)
# Fallback if no corresponding namespace is found in onsite.
except Error:
self.trs_title = f'{default}:{page._link.title}'
def output(self, num=None, fmt: str = '1') -> str:
"""Output formatted string."""
fmt = self.fmt_options.get(fmt, fmt)
# If selected format requires trs_title, outputlang must be set.
if (fmt in self.fmt_need_lang
or 'trs_title' in fmt
and self.outputlang is None):
raise ValueError(
"Required format code needs 'outputlang' parameter set.")
if num is None:
return fmt.format(page=self)
return fmt.format(num=num, page=self)
class ListPagesBot(AutomaticTWSummaryBot, SingleSiteBot):
"""Print a list of pages."""
summary_key = 'listpages-save-list'
available_options = {
'always': True,
'save': None,
'tofile': None,
'encode': config.textfile_encoding,
'format': '1',
'notitle': False,
'outputlang': None,
'overwrite': False,
'preloading': None,
'summary': '',
'get': False,
'put': None,
}
def treat(self, page) -> None:
"""Process one page and add it to the `output_list`."""
self.num += 1
if self.opt.tofile or not self.opt.notitle:
page_fmt = Formatter(page, self.opt.outputlang)
self.output_list.append(
page_fmt.output(num=self.num, fmt=self.opt.format))
if self.opt['get']:
try:
pywikibot.stdout(page.text)
except Error as err:
pywikibot.error(err)
if self.opt.save:
filename = os.path.join(self.opt.save,
page.title(as_filename=True))
pywikibot.info(f'Saving {page.title()} to {filename}')
with open(filename, mode='wb') as f:
f.write(page.text.encode(self.opt.encode))
self.counter['save'] += 1
if self.opt.preloading is False:
pywikibot.stdout(self.output_list[-1]
if self.opt.put else self.output_list.pop())
def setup(self) -> None:
"""Initialize `output_list` and `num` and adjust base directory."""
self.output_list = []
self.num = 0
if self.opt.save is not None:
base_dir = os.path.expanduser(self.opt.save or '.')
if not os.path.isabs(base_dir):
base_dir = os.path.normpath(os.path.join(os.getcwd(),
base_dir))
if not os.path.exists(base_dir):
pywikibot.info(f'Directory "{base_dir}" does not exist.')
choice = pywikibot.input_yn('Do you want to create it ("No" '
'to continue without saving)?')
if choice:
os.makedirs(base_dir,
mode=config.private_folder_permission)
else:
base_dir = None
elif not os.path.isdir(base_dir):
# base_dir is a file.
pywikibot.warning(
f'Not a directory: "{base_dir}"\nSkipping saving ...')
base_dir = None
self.opt.save = base_dir
def teardown(self) -> None:
"""Print list, if selected put it to wiki page or save it to a file."""
text = '\n'.join(self.output_list)
if self.opt.put:
self.current_page = self.opt.put
self.put_current(text, summary=self.opt.summary, show_diff=False)
if self.opt.tofile:
pywikibot.info(f'Writing page titles to {self.opt.tofile}')
with open(self.opt.tofile, 'w', encoding='utf-8') as f:
f.write(text)
if self.opt.preloading is True:
pywikibot.stdout(text)
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
page_target = None
additional_text = ''
unknown_args = []
# Process global args and generator args
gen_factory = GeneratorFactory()
local_args = pywikibot.handle_args(args)
local_args = gen_factory.handle_args(local_args)
for arg in local_args:
option, _, value = arg.partition(':')
opt = option[1:]
if option in ('-get', '-notitle', '-overwrite'):
options[opt] = True
elif option == '-format':
if not value.strip():
options['notitle'] = True
options['format'] = value
elif option in ('-encode', '-outputlang', '-save', '-summary'):
options[opt] = value
elif option == '-put':
page_target = value
else:
unknown_args.append(arg)
site = pywikibot.Site()
if page_target:
page_target = pywikibot.Page(site, page_target)
if not options.get('overwrite') and page_target.exists():
additional_text = (f'Page {page_target} already exists.\n'
'You can use the -overwrite argument to '
'replace the content of this page.'
)
gen = gen_factory.getCombinedGenerator()
options['preloading'] = gen_factory.is_preloading
if not suggest_help(missing_generator=not gen,
unknown_parameters=unknown_args,
additional_text=additional_text):
bot = ListPagesBot(site=site, generator=gen, put=page_target,
**options)
bot.run()
if __name__ == '__main__':
main()