resources/mw.Escaper.js
( function () {
/**
* @class
*/
mw.Escaper = {
/**
* Escapes wikitext for use inside {{templates}}.
*
* @param {string} wikitext
* @return {string}
*/
escapeForTemplate: function ( wikitext ) {
return this.escapePipes( wikitext );
},
/**
* Escapes pipe characters, which could be problematic when the content is
* inserted in a template.
*
* @param {string} wikitext
* @return {string}
*/
escapePipes: function ( wikitext ) {
var extractedTemplates, extractedLinks;
// Pipes (`|`) must be escaped because we'll be inserting this
// content into a templates & pipes would mess up the syntax.
// First, urlencode pipes inside links:
wikitext = wikitext.replace( /\bhttps?:\/\/[^\s]+/g, ( match ) => match.replace( /\|/g, '%7C' ) );
// Second, pipes can be valid inside other templates or links in
// wikitext, so we'll first extract those from the content, then
// replace the pipes, then restore the original (extracted) content:
extractedTemplates = this.extractTemplates( wikitext );
extractedLinks = this.extractLinks( extractedTemplates[ 0 ] );
wikitext = extractedLinks[ 0 ].replace( /\|/g, '{{!}}' );
return this.restoreExtracts( wikitext, Object.assign( extractedTemplates[ 1 ], extractedLinks[ 1 ] ) );
},
/**
* Extract all {{templates}} from wikitext, replacing them with
* placeholder content in the form of {{1}}, {{2}}.
*
* Nested templates will safely be extracted by first replacing inner
* templates, then moving outwards, ensuring we don't get closing
* bracket mismatches.
*
* Restoring the content is as simple as feeding the returned content &
* replacements back into this.restoreExtracts.
*
* @param {string} wikitext
* @return {Array} [{string} wikitext, {Object} replacements]
*/
extractTemplates: function ( wikitext ) {
var extracts = {},
previousExtracts = {},
extracted = wikitext,
// the regex explained:
// * `[^\{]`: character can not be {
// * `\{(?!\{)`: or if it is, it can't be followed by another {
// this excludes template opening brackets: {{
// * `\{\{[0-9]+\}\}`: unless it's a complete {{[0-9]+}}
// sequence, generated by an earlier run of this regex
regex = /\{\{([^{]|\{(?!\{)|\{\{[0-9]+\}\})*?\}\}/g,
callback = function ( match ) {
var replacement = '{{' + Object.keys( extracts ).length + '}}';
// safeguard for not replacing already-replaced matches
// this makes sure that when real content contains something
// like {{1}}, it will still be replaced, while {{1}}
// generated by this code can be recognized & ignored
if ( match in previousExtracts ) {
return match;
}
extracts[ replacement ] = match;
return replacement;
};
do {
wikitext = extracted;
previousExtracts = OO.copy( extracts );
extracted = wikitext.replace( regex, callback );
} while ( wikitext !== extracted );
return [ wikitext, extracts ];
},
/**
* Extract all [[links]] from wikitext, replacing them with placeholder
* content in the form of [[1]], [[2]].
*
* Restoring the content is as simple as feeding the returned content &
* replacements back into this.restoreExtracts.
*
* @param {string} wikitext
* @return {Array} [{string} wikitext, {Object} replacements]
*/
extractLinks: function ( wikitext ) {
var extracts = {};
wikitext = wikitext.replace( /\[\[.*?\]\]/g, ( match ) => {
var replacement = '[[' + Object.keys( extracts ).length + ']]';
extracts[ replacement ] = match;
return replacement;
} );
return [ wikitext, extracts ];
},
/**
* Restores content that was extracted from wikitext.
*
* @param {string} wikitext
* @param {Object} replacements
* @return {string}
*/
restoreExtracts: function ( wikitext, replacements ) {
// turn search keys into a regular expression, allowing us to match
// all of them at once
var searchValues = Object.keys( replacements ).map( mw.util.escapeRegExp ),
searchRegex = new RegExp( '(' + searchValues.join( '|' ) + ')', 'g' ),
callback = function ( match ) {
var replacement = replacements[ match ];
// we matched something that has no replacement, must be valid
// user input that just happens to look like on of the
// replacement values
if ( replacement === undefined ) {
return match;
}
// if we find the replacement itself matches a search value, we
// also don't want to go recursive: nesting doesn't work like
// that, it's just a coincidence where user input happens to
// look just like a replacement value (e.g. `{{1}}`)
if ( replacement in replacements ) {
return replacement;
}
// we must not replace this one again, to avoid getting stuck in
// endless recursion
delete replacements[ match ];
// go recursive, there may be more replacements nested down there
return this.restoreExtracts( replacement, replacements );
}.bind( this );
return wikitext.replace( searchRegex, callback );
}
};
}() );