scripts/nowcommons.py
#!/usr/bin/env python3
r"""Script to delete files that are also present on Wikimedia Commons.
Do not run this script on Wikimedia Commons itself. It works based on
a given array of templates defined below.
Files are downloaded and compared. If the files match, it can be deleted on
the source wiki. If multiple versions of the file exist, the script will not
delete. If the SHA1 comparison is not equal, the script will not delete.
A sysop rights on the local wiki is required if you want all features of
this script to work properly.
This script understands various command-line arguments:
-always run automatically, do not ask any questions. All files
that qualify for deletion are deleted. Reduced screen
output.
-replace replace links if the files are equal and the file names
differ
-replacealways replace links if the files are equal and the file names
differ without asking for confirmation
-replaceloose Do loose replacements. This will replace all occurrences
of the name of the file (and not just explicit file
syntax). This should work to catch all instances of the
file, including where it is used as a template parameter
or in galleries. However, it can also make more mistakes.
-replaceonly Use this if you do not have a local sysop rights, but do
wish to replace links from the NowCommons template.
Example
-------
python pwb.py nowcommons -replaceonly -replaceloose -replacealways -replace
.. note:: This script is a
:py:obj:`ConfigParserBot <bot.ConfigParserBot>`. All options
can be set within a settings file which is scripts.ini by default.
"""
#
# (C) Pywikibot team, 2006-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import sys
from itertools import chain
import pywikibot
from pywikibot import i18n
from pywikibot import pagegenerators as pg
from pywikibot.bot import ConfigParserBot, CurrentPageBot
from pywikibot.exceptions import IsRedirectPageError, NoPageError
from pywikibot.tools.itertools import filter_unique
try:
from scripts.image import ImageRobot as ImageBot
except ModuleNotFoundError:
from pywikibot_scripts.image import ImageRobot as ImageBot
nowcommons = {
'_default': [
'NowCommons'
],
'ar': [
'الآن كومنز',
'الآن كومونز',
],
'ckb': [
'لە کۆمنز بەردەستە',
'NowCommons',
'Ncd',
],
'de': [
'NowCommons',
'NC',
'Nowcommons',
'Now Commons',
'NowCommons/Mängel',
'NC/M',
],
'en': [
'NowCommons',
'Ncd',
],
'eo': [
'Nun en komunejo',
'NowCommons',
],
'fa': [
'موجود در انبار',
'NowCommons',
],
'fr': [
'Image sur Commons',
'DoublonCommons',
'Déjà sur Commons',
'Maintenant sur commons',
'Désormais sur Commons',
'NC',
'NowCommons',
'Nowcommons',
'Sharedupload',
'Sur Commons',
'Sur Commons2',
],
'he': [
'גם בוויקישיתוף'
],
'hu': [
'Azonnali-commons',
'NowCommons',
'Nowcommons',
'NC'
],
'ia': [
'OraInCommons'
],
'it': [
'NowCommons',
],
'ja': [
'NowCommons',
],
'ko': [
'공용중복',
'공용 중복',
'NowCommons',
'Now Commons',
'Nowcommons',
],
'nds-nl': [
'NoenCommons',
'NowCommons',
],
'nl': [
'NuCommons',
'Nucommons',
'NowCommons',
'Nowcommons',
'NCT',
'Nct',
],
'ro': [
'NowCommons'
],
'ru': [
'NowCommons',
'NCT',
'Nowcommons',
'Now Commons',
'Db-commons',
'Перенесено на Викисклад',
'На Викискладе',
],
'sr': [
'NowCommons',
'На Остави',
],
'zh': [
'NowCommons',
'Nowcommons',
'NCT',
],
}
namespace_in_template = [
'en',
'ia',
'it',
'ja',
'ko',
'lt',
'ro',
'zh',
]
class NowCommonsDeleteBot(CurrentPageBot, ConfigParserBot):
"""Bot to delete migrated files.
.. versionchanged:: 7.0
NowCommonsDeleteBot is a ConfigParserBot
"""
update_options = {
'replace': False,
'replacealways': False,
'replaceloose': False,
'replaceonly': False,
}
def __init__(self, **kwargs) -> None:
"""Initializer."""
super().__init__(**kwargs)
self.site = pywikibot.Site()
if not self.site.has_image_repository:
sys.exit('There must be a file repository to run this script')
self.commons = self.site.image_repository()
if self.site == self.commons:
sys.exit(
'You cannot run this bot on file repository like Commons.')
self.summary = i18n.twtranslate(self.site,
'imagetransfer-nowcommons_notice')
def nc_templates_list(self):
"""Return nowcommons templates."""
if self.site.lang in nowcommons:
return nowcommons[self.site.lang]
return nowcommons['_default']
@property
def nc_templates(self):
"""A set of now commons template Page instances."""
if not hasattr(self, '_nc_templates'):
self._nc_templates = {pywikibot.Page(self.site, title, ns=10)
for title in self.nc_templates_list()}
return self._nc_templates
@property
def generator(self):
"""Generator method."""
gens = (t.getReferences(follow_redirects=True, namespaces=[6],
only_template_inclusion=True)
for t in self.nc_templates)
gen = chain(*gens)
gen = filter_unique(gen, key=lambda p: '{}:{}:{}'.format(*p._cmpkey()))
return pg.PreloadingGenerator(gen)
def find_file_on_commons(self, local_file_page):
"""Find filename on Commons."""
for template_name, params in local_file_page.templatesWithParams():
if template_name not in self.nc_templates:
continue
if not params:
file_on_commons = local_file_page.title(with_ns=False)
elif self.site.lang in namespace_in_template:
skip = False
file_on_commons = None
for par in params:
val = par.split('=')
if len(val) == 1 and not skip:
file_on_commons = par[par.find(':') + 1:]
break
if val[0].strip() == '1':
file_on_commons = \
val[1].strip()[val[1].strip().find(':') + 1:]
break
skip = True
if not file_on_commons:
file_on_commons = local_file_page.title(with_ns=False)
else:
val = params[0].split('=')
if len(val) == 1:
file_on_commons = params[0].strip()
else:
file_on_commons = val[1].strip()
return file_on_commons
def init_page(self, item: pywikibot.Page) -> pywikibot.FilePage:
"""Ensure that generator retrieves FilePage objects."""
return pywikibot.FilePage(item)
def skip_page(self, page) -> bool:
"""Skip shared files."""
if page.file_is_shared():
pywikibot.info('File is already on Commons.')
return True
return super().skip_page(page)
def treat_page(self) -> None:
"""Treat a single page."""
local_file_page = self.current_page
file_on_commons = self.find_file_on_commons(local_file_page)
if not file_on_commons:
pywikibot.info('NowCommons template not found.')
return
commons_file_page = pywikibot.FilePage(self.commons,
'File:' + file_on_commons)
if (local_file_page.title(with_ns=False)
!= commons_file_page.title(with_ns=False)):
using_pages = list(local_file_page.using_pages())
if using_pages and using_pages != [local_file_page]:
pywikibot.info(
f'"<<lightred>>{local_file_page.title(with_ns=False)}'
f'<<default>>" is still used in {len(using_pages)} pages.'
)
if self.opt.replace:
pywikibot.info(
'Replacing "<<lightred>>'
f'{local_file_page.title(with_ns=False)}'
'<<default>>" by "<<lightgreen>>'
f'{commons_file_page.title(with_ns=False)}'
'<<default>>".'
)
bot = ImageBot(local_file_page.using_pages(),
local_file_page.title(with_ns=False),
commons_file_page.title(with_ns=False),
always=self.opt.replacealways,
loose=self.opt.replaceloose)
bot.run()
# If the image is used with the urlname
# the previous function won't work
if local_file_page.file_is_used and self.opt.replaceloose:
bot = ImageBot(local_file_page.using_pages(),
local_file_page.title(with_ns=False,
as_url=True),
commons_file_page.title(with_ns=False),
always=self.opt.replacealways,
loose=self.opt.replaceloose)
bot.run()
self.counter['replace'] += 1
else:
pywikibot.info('Please change them manually.')
return
pywikibot.info(
'No page is using "<<lightgreen>>'
f'{local_file_page.title(with_ns=False)}<<default>>" anymore.'
)
try:
commons_text = commons_file_page.get()
except (NoPageError, IsRedirectPageError) as e:
pywikibot.error(e)
return
if not self.opt.replaceonly:
sha1 = local_file_page.latest_file_info.sha1
if sha1 == commons_file_page.latest_file_info.sha1:
pywikibot.info(
'The file is identical to the one on Commons.')
if len(local_file_page.get_file_history()) > 1:
pywikibot.info(
'This file has a version history. Please '
'delete it manually after making sure that '
'the old versions are not worth keeping.')
return
if self.opt.always is False:
format_str = (
'\n\n>>>> Description on '
'<<<lightpurple>>{}<<default>> <<<<\n'
)
pywikibot.info(
format_str.format(local_file_page.title()))
pywikibot.info(local_file_page.get())
pywikibot.info(
format_str.format(commons_file_page.title()))
pywikibot.info(commons_text)
if self.opt.always or pywikibot.input_yn(
'Does the description on Commons contain all required '
'source and license\ninformation?', default=False):
local_file_page.delete(
f'{self.summary} [[:commons:File:{file_on_commons}]]',
prompt=False)
self.counter['delete'] += 1
else:
pywikibot.info(
'The file is not identical to the one on Commons.')
def teardown(self):
"""Show a message if no files were found."""
if self.generator_completed and not self.counter['read']:
pywikibot.info('No transcluded files found for '
f'{self.nc_templates_list()[0]}.')
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
options = {}
for arg in pywikibot.handle_args(args):
if arg == '-replacealways':
options['replace'] = True
options['replacealways'] = True
elif arg.startswith('-') and arg[1:] in ('always',
'replace',
'replaceloose',
'replaceonly'):
options[arg[1:]] = True
bot = NowCommonsDeleteBot(**options)
bot.run()
if __name__ == '__main__':
main()