wikimedia/mediawiki-extensions-UploadWizard

View on GitHub
resources/mw.Escaper.js

Summary

Maintainability
A
0 mins
Test Coverage
( function () {
    /**
     * @class
     */
    mw.Escaper = {
        /**
         * Escapes wikitext for use inside {{templates}}.
         *
         * @param {string} wikitext
         * @return {string}
         */
        escapeForTemplate: function ( wikitext ) {
            return this.escapePipes( wikitext );
        },

        /**
         * Escapes pipe characters, which could be problematic when the content is
         * inserted in a template.
         *
         * @param {string} wikitext
         * @return {string}
         */
        escapePipes: function ( wikitext ) {
            var extractedTemplates, extractedLinks;

            // Pipes (`|`) must be escaped because we'll be inserting this
            // content into a templates & pipes would mess up the syntax.
            // First, urlencode pipes inside links:
            wikitext = wikitext.replace( /\bhttps?:\/\/[^\s]+/g, ( match ) => match.replace( /\|/g, '%7C' ) );

            // Second, pipes can be valid inside other templates or links in
            // wikitext, so we'll first extract those from the content, then
            // replace the pipes, then restore the original (extracted) content:
            extractedTemplates = this.extractTemplates( wikitext );
            extractedLinks = this.extractLinks( extractedTemplates[ 0 ] );
            wikitext = extractedLinks[ 0 ].replace( /\|/g, '{{!}}' );
            return this.restoreExtracts( wikitext, Object.assign( extractedTemplates[ 1 ], extractedLinks[ 1 ] ) );
        },

        /**
         * Extract all {{templates}} from wikitext, replacing them with
         * placeholder content in the form of {{1}}, {{2}}.
         *
         * Nested templates will safely be extracted by first replacing inner
         * templates, then moving outwards, ensuring we don't get closing
         * bracket mismatches.
         *
         * Restoring the content is as simple as feeding the returned content &
         * replacements back into this.restoreExtracts.
         *
         * @param {string} wikitext
         * @return {Array} [{string} wikitext, {Object} replacements]
         */
        extractTemplates: function ( wikitext ) {
            var extracts = {},
                previousExtracts = {},
                extracted = wikitext,
                // the regex explained:
                // * `[^\{]`: character can not be {
                // * `\{(?!\{)`: or if it is, it can't be followed by another {
                // this excludes template opening brackets: {{
                // * `\{\{[0-9]+\}\}`: unless it's a complete {{[0-9]+}}
                //   sequence, generated by an earlier run of this regex
                regex = /\{\{([^{]|\{(?!\{)|\{\{[0-9]+\}\})*?\}\}/g,
                callback = function ( match ) {
                    var replacement = '{{' + Object.keys( extracts ).length + '}}';

                    // safeguard for not replacing already-replaced matches
                    // this makes sure that when real content contains something
                    // like {{1}}, it will still be replaced, while {{1}}
                    // generated by this code can be recognized & ignored
                    if ( match in previousExtracts ) {
                        return match;
                    }

                    extracts[ replacement ] = match;
                    return replacement;
                };

            do {
                wikitext = extracted;
                previousExtracts = OO.copy( extracts );
                extracted = wikitext.replace( regex, callback );
            } while ( wikitext !== extracted );

            return [ wikitext, extracts ];
        },

        /**
         * Extract all [[links]] from wikitext, replacing them with placeholder
         * content in the form of [[1]], [[2]].
         *
         * Restoring the content is as simple as feeding the returned content &
         * replacements back into this.restoreExtracts.
         *
         * @param {string} wikitext
         * @return {Array} [{string} wikitext, {Object} replacements]
         */
        extractLinks: function ( wikitext ) {
            var extracts = {};

            wikitext = wikitext.replace( /\[\[.*?\]\]/g, ( match ) => {
                var replacement = '[[' + Object.keys( extracts ).length + ']]';
                extracts[ replacement ] = match;
                return replacement;
            } );

            return [ wikitext, extracts ];
        },

        /**
         * Restores content that was extracted from wikitext.
         *
         * @param {string} wikitext
         * @param {Object} replacements
         * @return {string}
         */
        restoreExtracts: function ( wikitext, replacements ) {
            // turn search keys into a regular expression, allowing us to match
            // all of them at once
            var searchValues = Object.keys( replacements ).map( mw.util.escapeRegExp ),
                searchRegex = new RegExp( '(' + searchValues.join( '|' ) + ')', 'g' ),
                callback = function ( match ) {
                    var replacement = replacements[ match ];

                    // we matched something that has no replacement, must be valid
                    // user input that just happens to look like on of the
                    // replacement values
                    if ( replacement === undefined ) {
                        return match;
                    }

                    // if we find the replacement itself matches a search value, we
                    // also don't want to go recursive: nesting doesn't work like
                    // that, it's just a coincidence where user input happens to
                    // look just like a replacement value (e.g. `{{1}}`)
                    if ( replacement in replacements ) {
                        return replacement;
                    }

                    // we must not replace this one again, to avoid getting stuck in
                    // endless recursion
                    delete replacements[ match ];

                    // go recursive, there may be more replacements nested down there
                    return this.restoreExtracts( replacement, replacements );
                }.bind( this );

            return wikitext.replace( searchRegex, callback );
        }
    };
}() );