src/subParsers/makehtml/links.js
////
// makehtml/links.js
// Copyright (c) 2018 ShowdownJS
//
// Transforms MD links into `<a>` html anchors
//
// A link contains link text (the visible text), a link destination (the URI that is the link destination), and
// optionally a link title. There are two basic kinds of links in Markdown.
// In inline links the destination and title are given immediately after the link text.
// In reference links the destination and title are defined elsewhere in the document.
//
// ***Author:***
// - Estevão Soares dos Santos (Tivie) <https://github.com/tivie>
////
(function () {
/**
* Helper function: Wrapper function to pass as second replace parameter
*
* @param {RegExp} rgx
* @param {string} evtRootName
* @param {{}} options
* @param {{}} globals
* @returns {Function}
*/
function replaceAnchorTagReference (rgx, evtRootName, options, globals, emptyCase) {
emptyCase = !!emptyCase;
return function (wholeMatch, text, id, url, m5, m6, title) {
// bail we we find 2 newlines somewhere
if (/\n\n/.test(wholeMatch)) {
return wholeMatch;
}
var evt = createEvent(rgx, evtRootName + '.captureStart', wholeMatch, text, id, url, title, options, globals);
return writeAnchorTag(evt, options, globals, emptyCase);
};
}
function replaceAnchorTagBaseUrl (rgx, evtRootName, options, globals, emptyCase) {
return function (wholeMatch, text, id, url, m5, m6, title) {
url = showdown.helper.applyBaseUrl(options.relativePathBaseUrl, url);
var evt = createEvent(rgx, evtRootName + '.captureStart', wholeMatch, text, id, url, title, options, globals);
return writeAnchorTag(evt, options, globals, emptyCase);
};
}
/**
* TODO Normalize this
* Helper function: Create a capture event
* @param {RegExp} rgx
* @param {String} evtName Event name
* @param {String} wholeMatch
* @param {String} text
* @param {String} id
* @param {String} url
* @param {String} title
* @param {{}} options
* @param {{}} globals
* @returns {showdown.helper.Event|*}
*/
function createEvent (rgx, evtName, wholeMatch, text, id, url, title, options, globals) {
return globals.converter._dispatch(evtName, wholeMatch, options, globals, {
regexp: rgx,
matches: {
wholeMatch: wholeMatch,
text: text,
id: id,
url: url,
title: title
}
});
}
/**
* Helper Function: Normalize and write an anchor tag based on passed parameters
* @param evt
* @param options
* @param globals
* @param {boolean} emptyCase
* @returns {string}
*/
function writeAnchorTag (evt, options, globals, emptyCase) {
var wholeMatch = evt.getMatches().wholeMatch;
var text = evt.getMatches().text;
var id = evt.getMatches().id;
var url = evt.getMatches().url;
var title = evt.getMatches().title;
var target = '';
if (!title) {
title = '';
}
id = (id) ? id.toLowerCase() : '';
if (emptyCase) {
url = '';
} else if (!url) {
if (!id) {
// lower-case and turn embedded newlines into spaces
id = text.toLowerCase().replace(/ ?\n/g, ' ');
}
url = '#' + id;
if (!showdown.helper.isUndefined(globals.gUrls[id])) {
url = globals.gUrls[id];
if (!showdown.helper.isUndefined(globals.gTitles[id])) {
title = globals.gTitles[id];
}
} else {
return wholeMatch;
}
}
//url = showdown.helper.escapeCharacters(url, '*_:~', false); // replaced line to improve performance
url = url.replace(showdown.helper.regexes.asteriskDashTildeAndColon, showdown.helper.escapeCharactersCallback);
if (title !== '' && title !== null) {
title = title.replace(/"/g, '"');
//title = showdown.helper.escapeCharacters(title, '*_', false); // replaced line to improve performance
title = title.replace(showdown.helper.regexes.asteriskDashTildeAndColon, showdown.helper.escapeCharactersCallback);
title = ' title="' + title + '"';
}
// optionLinksInNewWindow only applies
// to external links. Hash links (#) open in same page
if (options.openLinksInNewWindow && !/^#/.test(url)) {
// escaped _
target = ' rel="noopener noreferrer" target="¨E95Eblank"';
}
// Text can be a markdown element, so we run through the appropriate parsers
text = showdown.subParser('makehtml.codeSpans')(text, options, globals);
text = showdown.subParser('makehtml.emoji')(text, options, globals);
text = showdown.subParser('makehtml.underline')(text, options, globals);
text = showdown.subParser('makehtml.italicsAndBold')(text, options, globals);
text = showdown.subParser('makehtml.strikethrough')(text, options, globals);
text = showdown.subParser('makehtml.ellipsis')(text, options, globals);
text = showdown.subParser('makehtml.hashHTMLSpans')(text, options, globals);
//evt = createEvent(rgx, evtRootName + '.captureEnd', wholeMatch, text, id, url, title, options, globals);
var result = '<a href="' + url + '"' + title + target + '>' + text + '</a>';
//evt = createEvent(rgx, evtRootName + '.beforeHash', wholeMatch, text, id, url, title, options, globals);
result = showdown.subParser('makehtml.hashHTMLSpans')(result, options, globals);
return result;
}
var evtRootName = 'makehtml.links';
/**
* Turn Markdown link shortcuts into XHTML <a> tags.
*/
showdown.subParser('makehtml.links', function (text, options, globals) {
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
// 1. Handle reference-style links: [link text] [id]
text = showdown.subParser('makehtml.links.reference')(text, options, globals);
// 2. Handle inline-style links: [link text](url "optional title")
text = showdown.subParser('makehtml.links.inline')(text, options, globals);
// 3. Handle reference-style shortcuts: [link text]
// These must come last in case there's a [link text][1] or [link text](/foo)
text = showdown.subParser('makehtml.links.referenceShortcut')(text, options, globals);
// 4. Handle angle brackets links -> `<http://example.com/>`
// Must come after links, because you can use < and > delimiters in inline links like [this](<url>).
text = showdown.subParser('makehtml.links.angleBrackets')(text, options, globals);
// 5. Handle GithubMentions (if option is enabled)
text = showdown.subParser('makehtml.links.ghMentions')(text, options, globals);
// 6. Handle <a> tags and img tags
text = text.replace(/<a\s[^>]*>[\s\S]*<\/a>/g, function (wholeMatch) {
return showdown.helper._hashHTMLSpan(wholeMatch, globals);
});
text = text.replace(/<img\s[^>]*\/?>/g, function (wholeMatch) {
return showdown.helper._hashHTMLSpan(wholeMatch, globals);
});
// 7. Handle naked links (if option is enabled)
text = showdown.subParser('makehtml.links.naked')(text, options, globals);
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.inline', function (text, options, globals) {
var evtRootName = evtRootName + '.inline';
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
// 1. Look for empty cases: []() and [empty]() and []("title")
var rgxEmpty = /\[(.*?)]()()()()\(<? ?>? ?(?:["'](.*)["'])?\)/g;
text = text.replace(rgxEmpty, replaceAnchorTagBaseUrl(rgxEmpty, evtRootName, options, globals, true));
// 2. Look for cases with crazy urls like ./image/cat1).png
var rgxCrazy = /\[((?:\[[^\]]*]|[^\[\]])*)]()\s?\([ \t]?<([^>]*)>(?:[ \t]*((["'])([^"]*?)\5))?[ \t]?\)/g;
text = text.replace(rgxCrazy, replaceAnchorTagBaseUrl(rgxCrazy, evtRootName, options, globals));
// 3. inline links with no title or titles wrapped in ' or ":
// [text](url.com) || [text](<url.com>) || [text](url.com "title") || [text](<url.com> "title")
//var rgx2 = /\[[ ]*[\s]?[ ]*([^\n\[\]]*?)[ ]*[\s]?[ ]*] ?()\(<?[ ]*[\s]?[ ]*([^\s'"]*)>?(?:[ ]*[\n]?[ ]*()(['"])(.*?)\5)?[ ]*[\s]?[ ]*\)/; // this regex is too slow!!!
var rgx2 = /\[([\S ]*?)]\s?()\( *<?([^\s'"]*?(?:\([\S]*?\)[\S]*?)?)>?\s*(?:()(['"])(.*?)\5)? *\)/g;
text = text.replace(rgx2, replaceAnchorTagBaseUrl(rgx2, evtRootName, options, globals));
// 4. inline links with titles wrapped in (): [foo](bar.com (title))
var rgx3 = /\[([\S ]*?)]\s?()\( *<?([^\s'"]*?(?:\([\S]*?\)[\S]*?)?)>?\s+()()\((.*?)\) *\)/g;
text = text.replace(rgx3, replaceAnchorTagBaseUrl(rgx3, evtRootName, options, globals));
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.reference', function (text, options, globals) {
var evtRootName = evtRootName + '.reference';
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
var rgx = /\[((?:\[[^\]]*]|[^\[\]])*)] ?(?:\n *)?\[(.*?)]()()()()/g;
text = text.replace(rgx, replaceAnchorTagReference(rgx, evtRootName, options, globals));
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.referenceShortcut', function (text, options, globals) {
var evtRootName = evtRootName + '.referenceShortcut';
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
var rgx = /\[([^\[\]]+)]()()()()()/g;
text = text.replace(rgx, replaceAnchorTagReference(rgx, evtRootName, options, globals));
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.ghMentions', function (text, options, globals) {
var evtRootName = evtRootName + 'ghMentions';
if (!options.ghMentions) {
return text;
}
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
var rgx = /(^|\s)(\\)?(@([a-z\d]+(?:[a-z\d._-]+?[a-z\d]+)*))/gi;
text = text.replace(rgx, function (wholeMatch, st, escape, mentions, username) {
// bail if the mentions was escaped
if (escape === '\\') {
return st + mentions;
}
// check if options.ghMentionsLink is a string
// TODO Validation should be done at initialization not at runtime
if (!showdown.helper.isString(options.ghMentionsLink)) {
throw new Error('ghMentionsLink option must be a string');
}
var url = options.ghMentionsLink.replace(/{u}/g, username);
var evt = createEvent(rgx, evtRootName + '.captureStart', wholeMatch, mentions, null, url, null, options, globals);
// captureEnd Event is triggered inside writeAnchorTag function
return st + writeAnchorTag(evt, options, globals);
});
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.angleBrackets', function (text, options, globals) {
var evtRootName = 'makehtml.links.angleBrackets';
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
// 1. Parse links first
var urlRgx = /<(((?:https?|ftp):\/\/|www\.)[^'">\s]+)>/gi;
text = text.replace(urlRgx, function (wholeMatch, url, urlStart) {
var text = url;
url = (urlStart === 'www.') ? 'http://' + url : url;
var evt = createEvent(urlRgx, evtRootName + '.captureStart', wholeMatch, text, null, url, null, options, globals);
return writeAnchorTag(evt, options, globals);
});
// 2. Then Mail Addresses
var mailRgx = /<(?:mailto:)?([-.\w]+@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)>/gi;
text = text.replace(mailRgx, function (wholeMatch, mail) {
var url = 'mailto:';
mail = showdown.subParser('makehtml.unescapeSpecialChars')(mail, options, globals);
if (options.encodeEmails) {
url = showdown.helper.encodeEmailAddress(url + mail);
mail = showdown.helper.encodeEmailAddress(mail);
} else {
url = url + mail;
}
var evt = createEvent(mailRgx, evtRootName + '.captureStart', wholeMatch, mail, null, url, null, options, globals);
return writeAnchorTag(evt, options, globals);
});
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
/**
* TODO MAKE THIS WORK (IT'S NOT ACTIVATED)
* TODO WRITE THIS DOCUMENTATION
*/
showdown.subParser('makehtml.links.naked', function (text, options, globals) {
if (!options.simplifiedAutoLink) {
return text;
}
var evtRootName = 'makehtml.links.naked';
text = globals.converter._dispatch(evtRootName + '.start', text, options, globals).getText();
// 2. Now we check for
// we also include leading markdown magic chars [_*~] for cases like __https://www.google.com/foobar__
var urlRgx = /([_*~]*?)(((?:https?|ftp):\/\/|www\.)[^\s<>"'`´.-][^\s<>"'`´]*?\.[a-z\d.]+[^\s<>"']*)\1/gi;
text = text.replace(urlRgx, function (wholeMatch, leadingMDChars, url, urlPrefix) {
// we now will start traversing the url from the front to back, looking for punctuation chars [_*~,;:.!?\)\]]
var len = url.length;
var suffix = '';
for (var i = len - 1; i >= 0; --i) {
var char = url.charAt(i);
if (/[_*~,;:.!?]/.test(char)) {
// it's a punctuation char
// we remove it from the url
url = url.slice(0, -1);
// and prepend it to the suffix
suffix = char + suffix;
} else if (/\)/.test(char)) {
var opPar = url.match(/\(/g) || [];
var clPar = url.match(/\)/g);
// it's a curved parenthesis so we need to check for "balance" (kinda)
if (opPar.length < clPar.length) {
// there are more closing Parenthesis than opening so chop it!!!!!
url = url.slice(0, -1);
// and prepend it to the suffix
suffix = char + suffix;
} else {
// it's (kinda) balanced so our work is done
break;
}
} else if (/]/.test(char)) {
var opPar2 = url.match(/\[/g) || [];
var clPar2 = url.match(/\]/g);
// it's a squared parenthesis so we need to check for "balance" (kinda)
if (opPar2.length < clPar2.length) {
// there are more closing Parenthesis than opening so chop it!!!!!
url = url.slice(0, -1);
// and prepend it to the suffix
suffix = char + suffix;
} else {
// it's (kinda) balanced so our work is done
break;
}
} else {
// it's not a punctuation or a parenthesis so our work is done
break;
}
}
// we copy the treated url to the text variable
var text = url;
// finally, if it's a www shortcut, we prepend http
url = (urlPrefix === 'www.') ? 'http://' + url : url;
// url part is done so let's take care of text now
// we need to escape the text (because of links such as www.example.com/foo__bar__baz)
text = text.replace(showdown.helper.regexes.asteriskDashTildeAndColon, showdown.helper.escapeCharactersCallback);
// finally we dispatch the event
var evt = createEvent(urlRgx, evtRootName + '.captureStart', wholeMatch, text, null, url, null, options, globals);
// and return the link tag, with the leadingMDChars and suffix. The leadingMDChars are added at the end too because
// we consumed those characters in the regexp
return leadingMDChars + writeAnchorTag(evt, options, globals) + suffix + leadingMDChars;
});
// 2. Then mails
var mailRgx = /(^|\s)(?:mailto:)?([A-Za-z0-9!#$%&'*+-/=?^_`{|}~.]+@[-a-z0-9]+(\.[-a-z0-9]+)*\.[a-z]+)(?=$|\s)/gmi;
text = text.replace(mailRgx, function (wholeMatch, leadingChar, mail) {
var url = 'mailto:';
mail = showdown.subParser('makehtml.unescapeSpecialChars')(mail, options, globals);
if (options.encodeEmails) {
url = showdown.helper.encodeEmailAddress(url + mail);
mail = showdown.helper.encodeEmailAddress(mail);
} else {
url = url + mail;
}
var evt = createEvent(mailRgx, evtRootName + '.captureStart', wholeMatch, mail, null, url, null, options, globals);
return leadingChar + writeAnchorTag(evt, options, globals);
});
text = globals.converter._dispatch(evtRootName + '.end', text, options, globals).getText();
return text;
});
})();