lovelyCARDINAL/WikiBots

View on GitHub
src/Report/badFileName.js

Summary

Maintainability
A
0 mins
Test Coverage
import { MediaWikiApi } from 'wiki-saikou';
import config from '../utils/config.js';

const zhapi = new MediaWikiApi(config.zh.api, {
        headers: { 'user-agent': config.useragent },
    }),
    cmapi = new MediaWikiApi(config.cm.api, {
        headers: { 'user-agent': config.useragent },
    });

const MAP = {
    'A-D': ['A', 'B', 'C', 'D'],
    'E-J': ['E', 'F', 'G', 'H', 'I', 'J'],
    'K-N': ['K', 'L', 'M', 'N'],
    'O-T': ['O', 'P', 'Q', 'R', 'S', 'T'],
    'U-Z': ['U', 'V', 'W', 'X', 'Y', 'Z'],
    '0-1': ['0', '1'],
    '2-4': ['2', '3', '4'],
    '5-7': ['5', '6', '7'],
    '8-9': ['8', '9', '!', '?', '.', '&', '$', '@', '~', '(', ')', '%', "'", '"', '=', '-', '+', '。', ',', '?', '—', '“', '”', '…', '☆', '▽', '`', '^'],
};

async function queryFiles(apprefix) {
    const result = [];
    const eol = Symbol();
    let apcontinue = undefined;
    while (apcontinue !== eol) {
        const { data } = await cmapi.post({
            list: 'allpages',
            apprefix,
            apnamespace: '6',
            apfilterredir: 'nonredirects',
            aplimit: 'max',
            apcontinue,
        }, {
            retry: 15,
        });
        apcontinue = data.continue ? data.continue.apcontinue : eol;
        result.push(...data.query.allpages.map((page) => [page.title, page.pageid]));
    }
    return result;
}

function isBadTitle(fulltitle) {
    const title = fulltitle.replace(/^File:(.+?)\.(?:ogg|ogv|oga|flac|opus|wav|webm|mp3|png|gif|jpg|jpeg|webp|svg|pdf|jp2|ttf|woff2|mp4)$/i, '$1');
    const isDoubleExtension = /\.\w{3,4}$/.test(title);
    const isSymbolStart = /^\W/.test(title);
    if (isDoubleExtension || isSymbolStart) {
        return true;
    }
    const name = title.replaceAll(/[^\w. ]/g, '');
    return name.length > 15 && /^[\w.]+$/.test(name) && /\d/.test(name);
}

async function updateData(title, text) {
    await zhapi.postWithToken('csrf', {
        action: 'edit',
        title,
        text,
        summary: '更新数据报告',
        bot: true,
        notminor: true,
        nocreate: true,
        tags: 'Bot',
        watchlist: 'nochange',
    }, {
        retry: 50,
        noCache: true,
    }).then(({ data }) => console.log(JSON.stringify(data)));
}

(async () => {
    console.log(`Start time: ${new Date().toISOString()}`);

    await Promise.all([
        zhapi.login(
            config.zh.ibot.name,
            config.zh.ibot.password,
            undefined,
            { retry: 25, noCache: true },
        ).then(console.log),
        cmapi.login(
            config.cm.ibot.name,
            config.cm.ibot.password,
            undefined,
            { retry: 25, noCache: true },
        ).then(console.log),
    ]);

    const excludeRegex = await (async () => {
        const { data: { query: { pages: [{ revisions: [{ content }] }] } } } = await zhapi.post({
            prop: 'revisions',
            titles: 'User:星海子/BotConfig/excludeFilePrefix.json',
            rvprop: 'content',
        }, {
            retry: 15,
        });
        const exclude = JSON.parse(content).flat(Infinity).join('|');
        return new RegExp(`^File:(?!${exclude}).+?$`);
    })();

    await Promise.all(Object.entries(MAP).map(async ([key, value]) => {
        const pagelist = await Promise.all(value.map(async (char) => {
            return await queryFiles(char);
        })).then((result) => result.flat().filter((item) => excludeRegex.test(item[0]) && isBadTitle(item[0])));

        let text = '* 本页面为[[U:星海-interfacebot|机器人]]生成的疑似命名不当的文件名,以供维护人员检查。\n* 生成时间:{{subst:#time:Y年n月j日 (D) H:i (T)}}|{{subst:#time:Y年n月j日 (D) H:i (T)|||1}}\n\n{| class="wikitable sortable center plainlinks" style="word-break:break-all"\n|-\n! width=17%|页面ID !! 文件名 !! width=23%|操作\n';
        for (const [title, pageid] of pagelist) {
            text += `|-\n| ${pageid} || [[:${title}]] || [{{canonicalurl:cm:${title}}} 查看]|[[Special:链入页面/${title}|链入]]\n`;
        }
        text += '|}\n\n[[Category:萌娘百科数据报告]]';

        await updateData(`萌娘百科:疑似不当文件名数据/${key}`, text);
    }));

    console.log(`End time: ${new Date().toISOString()}`);
})();