packages/hiccup-markdown/src/parse.ts
// Downloaded @ 2023-03-07T13:30:37.046Z
// Source: https://demo.thi.ng/umbrella/parse-playground/#l9oQo0ROTDE6IDxETkw-KyA9PiBkaXNjYXJkIDsKRE5MMjogPE5MPnsyLH0gOwpsYnI6ICdcXCchIDxOTD4hIDsKZXNjOiAnXFwnISAoJ18nIHwgJyonIHwgJ2AnIHwgJ34nIHwgJzonIHwgJy0nIHwgJzwnIHwgJz4nIHwgJ1snIHwgJ10nIHwgJygnIHwgJyknIHwgJ3snIHwgJ30nIHwgJyMnIHwgJy4nIHwgJ3wnIHwgJ1xcJykgOwpjZXNjOiAnXFwnISAoJ2AnfCAnXFwnKSA7CmNiZXNjOiAnXFwnISAnYCcgOwpjdWVzYzogJ1xcJyEgJzonIDsKbWVzYzogJ1xcJyEgJ30nIDsKaWRlbGltOiAoIiFbIiB8ICdbJyB8ICIqKiIgfCAnXycgfCAnYCcgfCAnPCcgfCAifn4iIHwgIiA6IiB8IDxsYnI-KSA7CmRlbGltOiAoPGlkZWxpbT4gfCA8RE5MMj4pIDsKZWJvZHk6ICg8ZXNjPnwuKSA7CmNib2R5OiAoPGNlc2M-fC4pIDsKY2Jib2R5OiAoPGNiZXNjPnwuKSA7CmN1Ym9keTogKDxjdWVzYz58LikgOwptYm9keTogKDxtZXNjPnwuKSA7CmJvZHk6IDxlYm9keT4oPy08ZGVsaW0-ISkgPT4gam9pbiA7Cmlib2R5OiA8ZWJvZHk-KD8tPGlkZWxpbT4hKSA9PiBqb2luIDsKCmNvbW1lbnQ6ICI8IS0tIiEgLig_KyItLT4iKSA9PiBkaXNjYXJkIDsKd2lraXJlZjogIltbIiEgPGVib2R5Pig_KyJdXSIhKSA9PiBqb2luIDsKZm5yZWY6ICJbXiIhIDxsYWJlbD4gOwpmbm90ZTogPExTVEFSVD4gIlteIiEgPGZubGFiZWw-IDxXUzE-IDxwYXJhPiA7CmZubGFiZWw6IDxlYm9keT4oPysiXToiISkgPT4gam9pbiA7CmxhYmVsOiA8ZWJvZHk-KD8rJ10nISkgPT4gam9pbiA7CnRhcmdldDogPGVib2R5Pig_KycpJyEpID0-IGpvaW4gOwpsaW5rOiAnWychIDxsaW5rbGFiZWw-ICcoJyEgPHRhcmdldD4gOwpsaW5rcmVmOiAnWychIDxsaW5rbGFiZWw-ICdbJyEgPGxhYmVsPiA7CmxpbmtkZWY6IDxMU1RBUlQ-ICdbJyEgPGxhYmVsPiAnOichIDxXUzE-IDxsZHRhcmdldD4gOwpsZHRhcmdldDogPGVib2R5Pig_KzxETkwxPikgPT4gam9pbiA7CmxpbmtsYWJlbDogKDxpbWc-IHwgPGJvbGQ-IHwgPGl0YWxpYz4gfCA8c3RyaWtlPiB8IDxjb2RlPiB8IDxlbW9qaT4gfCA8a2JkPiB8IDxzdWI-IHwgPHN1cD4gfCA8bGlua2JvZHk-KSogJ10nISA7CmxpbmtkZWxpbTogKCIhWyIgfCAnWycgfCAiKioiIHwgJ18nIHwgIn5-IiB8ICdgJyB8ICddJykgOwpsaW5rYm9keTogPGVib2R5Pig_LTxsaW5rZGVsaW0-ISkgPT4gam9pbiA7CmltZzogIiFbIiEgPGxhYmVsPiAnKCchIDx0YXJnZXQ-IDsKYm9sZDogIioqIiEgKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rcmVmPiB8IDxsaW5rPiB8IDxpdGFsaWM-IHwgPHN0cmlrZT4gfCA8Y29kZT4gfCA8ZW1vamk-IHwgPGtiZD4gfCA8c3ViPiB8IDxzdXA-IHwgPGlib2R5PikqICIqKiIhIDsKaXRhbGljOiAnXychICg8d2lraXJlZj4gfCA8aW1nPiB8IDxmbnJlZj4gfCA8bGlua3JlZj4gfCA8bGluaz4gfCA8Ym9sZD4gfCA8c3RyaWtlPiB8IDxjb2RlPiB8IDxlbW9qaT4gfCA8a2JkPiB8IDxzdWI-IHwgPHN1cD4gfCA8aWJvZHk-KSogJ18nISA7CnN0cmlrZTogIn5-IiEgKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rcmVmPiB8IDxsaW5rPiB8IDxib2xkPiB8IDxpdGFsaWM-IHwgPGNvZGU-IHwgPGVtb2ppPiB8IDxrYmQ-IHwgPHN1Yj4gfCA8c3VwPiB8IDxpYm9keT4pKiAifn4iISA7CmNvZGU6ICdgJyEgPGNib2R5Pig_KydgJyEpID0-IGpvaW4gOwprYmQ6ICI8a2JkPiIhIDxlYm9keT4oPysiPC9rYmQ-IiEpID0-IGpvaW4gOwpzdWI6ICI8c3ViPiIhIDxlYm9keT4oPysiPC9zdWI-IiEpID0-IGpvaW4gOwpzdXA6ICI8c3VwPiIhIDxlYm9keT4oPysiPC9zdXA-IiEpID0-IGpvaW4gOwplbW9qaTogJyAnPyAnOichICg8QUxQSEFfTlVNPiB8ICcrJykoPysnOichKSA9PiBqb2luIDsKcGFyYTogKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rcmVmPiB8IDxsaW5rPiB8IDxib2xkPiB8IDxpdGFsaWM-IHwgPHN0cmlrZT4gfCA8Y29kZT4gfCA8a2JkPiB8IDxzdWI-IHwgPHN1cD4gfCA8ZW1vamk-IHwgPGxicj4gfCA8Y29tbWVudD4gfCA8Ym9keT4pKiA8RE5MMj4hIDsKCmhkbGV2ZWw6ICcjJysgPT4gY291bnQgOwpoZGlkOiAieyMiISAoPEFMUEhBX05VTT4gfCAnLScpKyAnfSchID0-IGpvaW4gOwpoZGRlbGltOiAoPGlkZWxpbT4gfCA8Tkw-IHwgInsjIikgOwpoZGJvZHk6IDxlYm9keT4oPy08aGRkZWxpbT4hKSA9PiBqb2luIDsKaGQ6IDxMU1RBUlQ-IDxoZGxldmVsPiA8V1MwPgogICAgKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rPiB8IDxib2xkPiB8IDxpdGFsaWM-IHwgPHN0cmlrZT4gfCA8Y29kZT4gfCA8ZW1vamk-IHwgPGtiZD4gfCA8c3ViPiB8IDxzdXA-IHwgPGhkYm9keT4pKiA8aGRpZD4_IDxETkwxPiA7CgpsaWxldmVsOiAnICcqID0-IGNvdW50IDsKb2xpZDogKDxBTFBIQT4rfDxESUdJVD4rKSA9PiBqb2luIDsKdWxpbml0OiA8RE5MPiA8V1MwPiAnLSchIDsKb2xpbml0OiA8RE5MPiA8V1MwPiA8b2xpZD4hICcuJyEgOwpsaWRlbGltOiAoPGRlbGltPiB8IDx1bGluaXQ-IHwgPG9saW5pdD4pIDsKbGlib2R5OiA8ZWJvZHk-KD8tPGxpZGVsaW0-ISkgPT4gam9pbiA7CnRvZG86ICdbJyEgWyB4WF0gJ10nISA8V1MxPiA9PiBob2lzdFIgOwp1bGl0ZW06IDxMU1RBUlQ-IDxsaWxldmVsPiAiLSAiISA8dG9kbz4_CiAgICAgICAgKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rPiB8IDxib2xkPiB8IDxpdGFsaWM-IHwgPHN0cmlrZT4gfCA8Y29kZT4gfCA8ZW1vamk-IHwgPGtiZD4gfCA8c3ViPiB8IDxzdXA-IHwgPGxpYm9keT4gKSogPEROTD4gOwpvbGl0ZW06IDxMU1RBUlQ-IDxsaWxldmVsPiA8b2xpZD4gIi4gIiEgPHRvZG8-PwogICAgICAgICg8d2lraXJlZj4gfCA8aW1nPiB8IDxmbnJlZj4gfCA8bGluaz4gfCA8Ym9sZD4gfCA8aXRhbGljPiB8IDxzdHJpa2U-IHwgPGNvZGU-IHwgPGVtb2ppPiB8IDxrYmQ-IHwgPHN1Yj4gfCA8c3VwPiB8IDxsaWJvZHk-ICkqIDxETkw-IDsKbGlzdDogKDx1bGl0ZW0-IHwgPG9saXRlbT4pKyA8RE5MMT4gOwoKY2JkZWxpbTogPExTVEFSVD4gImBgYCIhIDsKY29kZWJsb2NrOiA8Y2JkZWxpbT4hIDxjb2RlbWV0YT4gPGNvZGVib2R5PiA8RE5MMT4gOwpjb2RlbWV0YTogLig_KzxOTD4hKSA9PiBqb2luIDsKY29kZWJvZHk6IDxjYmJvZHk-KD8rPGNiZGVsaW0-KSA9PiBqb2luIDsKCmN1ZGVsaW06IDxMU1RBUlQ-ICI6OjoiISA7CmN1c3RvbWJsb2NrOiA8Y3VkZWxpbT4hIDxjdXN0b21tZXRhPiA8Y3VzdG9tYm9keT4gPEROTDE-IDsKY3VzdG9tbWV0YTogLig_KzxOTD4hKSA9PiBqb2luIDsKY3VzdG9tYm9keTogPGN1Ym9keT4oPys8Y3VkZWxpbT4pID0-IGpvaW4gOwoKbWV0YWJsb2NrOiA8TFNUQVJUPiAie3t7IiEgPG1ldGFib2R5PiA8RE5MMT4gOwptZXRhYm9keTogPG1ib2R5Pig_KzxtZXRhZW5kPiEpID0-IGpvaW4gOwptZXRhZW5kOiAifX19IiA8TEVORD4gOwoKYnFsZXZlbDogJz4nKyA9PiBjb3VudCA7CmJxZGVsaW06ICg8aWRlbGltPiB8IDxOTD4pIDsKYnFib2R5OiA8ZWJvZHk-KD8tPGJxZGVsaW0-ISkgPT4gam9pbiA7CmJxbGluZTogPExTVEFSVD4gPGJxbGV2ZWw-ICcgJz8hCiAgICAgICAgKDx3aWtpcmVmPiB8IDxpbWc-IHwgPGZucmVmPiB8IDxsaW5rPiB8IDxib2xkPiB8IDxpdGFsaWM-IHwgPHN0cmlrZT4gfCA8Y29kZT4gfCA8ZW1vamk-IHwgPGtiZD4gfCA8c3ViPiB8IDxzdXA-IHwgPGJxbGJyPiB8IDxicWJvZHk-KSogPEROTD4gOwpicWxicjogJ1xcJyEoPy08RE5MPikgOwpicXVvdGU6IDxicWxpbmU-KyA8RE5MMT4gOwoKdGRlbGltOiAoPGlkZWxpbT4gfCAnfCcgKSA7CnRib2R5OiA8ZWJvZHk-KD8tPHRkZWxpbT4hKSA9PiBqb2luIDsKdGNlbGw6IDxXUzA-ICg8d2lraXJlZj4gfCA8aW1nPiB8IDxmbnJlZj4gfCA8bGluaz4gfCA8Ym9sZD4gfCA8aXRhbGljPiB8IDxzdHJpa2U-IHwgPGNvZGU-IHwgPGVtb2ppPiB8IDxrYmQ-IHwgPHN1Yj4gfCA8c3VwPiB8IDx0Ym9keT4gKSogJ3wnISA7CnRyb3c6IDxMU1RBUlQ-ICd8JyEgPHRjZWxsPig_KzxETkw-KSA7CnRhYmxlOiA8dHJvdz4rIDxETkwxPiA7CgpocjogIi0tIiAnLScrIDxETkwxPiA9PiBqb2luIDsKCm1haW46IDxXUzA-ICg8aGQ-IHwgPGxpc3Q-IHwgPGJxdW90ZT4gfCA8Y29kZWJsb2NrPiB8IDxjdXN0b21ibG9jaz4gfCA8bWV0YWJsb2NrPiB8IDx0YWJsZT4gfCA8aHI-IHwgPGZub3RlPiB8IDxsaW5rZGVmPiB8IDxjb21tZW50PiB8IDxwYXJhPiApKiA7pG1haW7aAtQjIEhlbGxvIFwhXFsgOndvcmxkOjpzbWlsZTpbXmFiY10geyNoZDF9CgojIyBhYmMgeyNoZDJ9CiMjIGRlZiA8c3VwPkNvbnRyb2w8L3N1cD4KCmEgXDwgYgoKTGV0IGBleHAyKHgpID0gMioqeGAKClByZXNzIDxrYmQ-Q29udHJvbDwva2JkPiArIDxrYmQ-Ujwva2JkPiB0byA8IS0tc2tpcC0tPiByZWxvYWQuCgo6OjphbGVydApUZXN0IFtsYWJlbF1bcmVmcmVmXQo6OjoKCi0tLQoKe3t7IHsiaWQiOiAxMjMsICJwcm90ZWN0ZWQiOiB0cnVlIH0gfX19ClNvIFtbc21pbGVfZm9vMTJdXSDwn5iNIHdoYXQncyBbdGhpc10oLi90aGlzLm1kKVteMl0gdGhlbjoKCkEgW2xpbmsgX3dpdGggKipuZXN0ZWQgfn5mb3JtYXR-fioqIGFuZCBgcmVmZXJlbmNlYF9dW3JlZnJlZl0uLi4KCmBgYHRzIHRhbmdsZTpubyBsbjp5ZXMKY29uc3QgcmUgPSAvW1xuXHJdKy9nOwovLyB0aGUgZW5kCmBgYAoKMS4gZm9vCiAgNC4gYmFyCiAgICAgYmF6IGFuZCBzbyBvbgogICAgIG11bHRpbGluZQoyLiBbIF0gdGFzawozLiBbeF0gKipkb25lKioKCj4gYmxvY2txdW90ZQo-IG11bHRpbGluZSB0b29cCj4-IG5lc3RlZAo-Pj4gbGV2ZWwgMwo-IC0tIHNvdXJjZQoKfCBmb28gfCBiYXIgfAp8Oi06fC06fAp8IFtbcmVmIzIyXV0gfCBgb25gICoqb2ZmKiogfAoKW15hYmNdOiBGb28gX2Jhcl8gYmF6CgpbXjJdOiBUT0RPCgpbcmVmcmVmXTogaHR0cDovL3RoaS5uZy8KW2Zvb106IC4vZm9vLm1kI2JhcgoK2VA8IS0tCmlnbm9yZQptdWx0aWxpbmUKLS0-CgoxMS4gYWJjCiAgLSB4eXoKMjEuIHh4eAogIGEuIGZvbwogICAgYi4gYmFyCjMxLiB5eXkKCtlSfCBjb2wxIHwgY29sMiB8CnwgLS0tLSB8IC0tLS06fAp8IDxrYmQ-Q3RybDwva2JkPiB8IDxzdXA-Ujwvc3VwPiBgW1thYmNcfGlkXV1gIHwKCrkqKjp3YXJuOiBhYmMKZGVmIDpvazoqKgoK2Ug-IGxpbmUxCj4-ICoqbGluZSAyKipcCj4-IGxpbmUgMmEKPj4gbGluZSAyYgo-PiAKPj4gbGluZSAyYwo-Pj4gbGluZSAzCgo
import type { Fn3, IObjectOf, Maybe, Primitive } from "@thi.ng/api";
import { peek } from "@thi.ng/arrays/peek";
import { isArray } from "@thi.ng/checks/is-array";
import { isPlainObject } from "@thi.ng/checks/is-plain-object";
import { isPrimitive } from "@thi.ng/checks/is-primitive";
import { isString } from "@thi.ng/checks/is-string";
import { DEFAULT, defmulti } from "@thi.ng/defmulti";
import { EMOJI } from "@thi.ng/emoji/emoji";
import type { ILogger } from "@thi.ng/logger";
import type { ParseScope, ParseState } from "@thi.ng/parse";
import { defContext } from "@thi.ng/parse/context";
import { defGrammar } from "@thi.ng/parse/grammar";
import { escapeEntities } from "@thi.ng/strings/entities";
import { slugifyGH } from "@thi.ng/strings/slugify";
import type {
ColumnAlign,
ParseOpts,
ParseResult,
TagTransforms,
TodoAttribs,
TransformCtx,
} from "./api.js";
export const GRAMMAR = defGrammar(`
DNL1: <DNL>+ => discard ;
DNL2: <NL>{2,} ;
lbr: '\\\\'! <NL>! ;
esc: '\\\\'! ('_' | '*' | '\`' | '~' | ':' | '-' | '<' | '>' | '[' | ']' | '(' | ')' | '{' | '}' | '#' | '.' | '|' | '\\\\') ;
cesc: '\\\\'! ('\`'| '\\\\') ;
cbesc: '\\\\'! '\`' ;
cuesc: '\\\\'! ':' ;
mesc: '\\\\'! '}' ;
idelim: ("![" | '[' | "**" | '_' | '\`' | '<' | "~~" | " :" | <lbr>) ;
delim: (<idelim> | <DNL2>) ;
ebody: (<esc>|.) ;
cbody: (<cesc>|.) ;
cbbody: (<cbesc>|.) ;
cubody: (<cuesc>|.) ;
mbody: (<mesc>|.) ;
body: <ebody>(?-<delim>!) => join ;
ibody: <ebody>(?-<idelim>!) => join ;
comment: "<!--"! .(?+"-->") => discard ;
wikiref: "[["! <ebody>(?+"]]"!) => join ;
fnref: "[^"! <label> ;
fnote: <LSTART> "[^"! <fnlabel> <WS1> <para> ;
fnlabel: <ebody>(?+"]:"!) => join ;
label: <ebody>(?+']'!) => join ;
target: <ebody>(?+')'!) => join ;
link: '['! <linklabel> '('! <target> ;
linkref: '['! <linklabel> '['! <label> ;
linkdef: <LSTART> '['! <label> ':'! <WS1> <ldtarget> ;
ldtarget: <ebody>(?+<DNL1>) => join ;
linklabel: (<img> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <linkbody>)* ']'! ;
linkdelim: ("![" | '[' | "**" | '_' | "~~" | '\`' | ']') ;
linkbody: <ebody>(?-<linkdelim>!) => join ;
img: "!["! <label> '('! <target> ;
bold: "**"! (<wikiref> | <img> | <fnref> | <linkref> | <link> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <ibody>)* "**"! ;
italic: '_'! (<wikiref> | <img> | <fnref> | <linkref> | <link> | <bold> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <ibody>)* '_'! ;
strike: "~~"! (<wikiref> | <img> | <fnref> | <linkref> | <link> | <bold> | <italic> | <code> | <emoji> | <kbd> | <sub> | <sup> | <ibody>)* "~~"! ;
code: '\`'! <cbody>(?+'\`'!) => join ;
kbd: "<kbd>"! <ebody>(?+"</kbd>"!) => join ;
sub: "<sub>"! <ebody>(?+"</sub>"!) => join ;
sup: "<sup>"! <ebody>(?+"</sup>"!) => join ;
emoji: ' '? ':'! (<ALPHA_NUM> | '+')(?+':'!) => join ;
para: (<wikiref> | <img> | <fnref> | <linkref> | <link> | <bold> | <italic> | <strike> | <code> | <kbd> | <sub> | <sup> | <emoji> | <lbr> | <comment> | <body>)* <DNL2>! ;
hdlevel: '#'+ => count ;
hdid: "{#"! (<ALPHA_NUM> | '-')+ '}'! => join ;
hddelim: (<idelim> | <NL> | "{#") ;
hdbody: <ebody>(?-<hddelim>!) => join ;
hd: <LSTART> <hdlevel> <WS0>
(<wikiref> | <img> | <fnref> | <link> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <hdbody>)* <hdid>? <DNL1> ;
lilevel: ' '* => count ;
olid: (<ALPHA>+|<DIGIT>+) => join ;
ulinit: <DNL> <WS0> '-'! ;
olinit: <DNL> <WS0> <olid>! '.'! ;
lidelim: (<delim> | <ulinit> | <olinit>) ;
libody: <ebody>(?-<lidelim>!) => join ;
todo: '['! [ xX] ']'! <WS1> => hoistR ;
ulitem: <LSTART> <lilevel> "- "! <todo>?
(<wikiref> | <img> | <fnref> | <link> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <libody> )* <DNL> ;
olitem: <LSTART> <lilevel> <olid> ". "! <todo>?
(<wikiref> | <img> | <fnref> | <link> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <libody> )* <DNL> ;
list: (<ulitem> | <olitem>)+ <DNL1> ;
cbdelim: <LSTART> "\`\`\`"! ;
codeblock: <cbdelim>! <codemeta> <codebody> <DNL1> ;
codemeta: .(?+<NL>!) => join ;
codebody: <cbbody>(?+<cbdelim>) => join ;
cudelim: <LSTART> ":::"! ;
customblock: <cudelim>! <custommeta> <custombody> <DNL1> ;
custommeta: .(?+<NL>!) => join ;
custombody: <cubody>(?+<cudelim>) => join ;
metablock: <LSTART> "{{{"! <metabody> <DNL1> ;
metabody: <mbody>(?+<metaend>!) => join ;
metaend: "}}}" <LEND> ;
bqlevel: '>'+ => count ;
bqdelim: (<idelim> | <NL>) ;
bqbody: <ebody>(?-<bqdelim>!) => join ;
bqline: <LSTART> <bqlevel> ' '?!
(<wikiref> | <img> | <fnref> | <link> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <bqlbr> | <bqbody>)* <DNL> ;
bqlbr: '\\\\'!(?-<DNL>) ;
bquote: <bqline>+ <DNL1> ;
tdelim: (<idelim> | '|' ) ;
tbody: <ebody>(?-<tdelim>!) => join ;
tcell: <WS0> (<wikiref> | <img> | <fnref> | <link> | <bold> | <italic> | <strike> | <code> | <emoji> | <kbd> | <sub> | <sup> | <tbody> )* '|'! ;
trow: <LSTART> '|'! <tcell>(?+<DNL>) ;
table: <trow>+ <DNL1> ;
hr: "--" '-'+ <DNL1> => join ;
main: <WS0> (<hd> | <list> | <bquote> | <codeblock> | <customblock> | <metablock> | <table> | <hr> | <fnote> | <linkdef> | <comment> | <para> )* ;
`);
export const DEFAULT_TAG_TRANSFORMS: TagTransforms = {
bold: (_, body) => ["strong", {}, ...body],
blockquote: (_, body, meta) => ["blockquote", withMeta({}, meta), ...body],
br: () => ["br", {}],
code: (_, body) => ["code", {}, body],
codeblock: (_, lang, __head, body, meta) => [
"pre",
withMeta({ data: { lang }, __head }, meta),
["code", {}, body],
],
custom: (_, type, __head, body, meta) => [
"custom",
withMeta({ type, __head }, meta),
body,
],
emoji: (_, id) => EMOJI[id] || id,
footnote: (_, id, body, meta) => [
"li",
withMeta({ id: `fn-${id}` }, meta),
["sup", {}, `[${id}] `],
...body,
" ",
["a", { href: `#fnref-${id}` }, "↩︎"],
],
footnoteRef: (_, id) => [
"sup",
{},
["a", { id: `fnref-${id}`, href: `#fn-${id}` }, `[${id}]`],
],
footnoteWrapper: (_, notes) => [
"ul",
{ id: "footnotes" },
...Object.keys(notes)
.sort()
.map((id) => notes[id]),
],
heading: (_, level, id, body, meta) => [
level > 6 ? "p" : `h${level}`,
withMeta({ id }, meta),
...body,
],
hr: (_, __length, meta?) => ["hr", withMeta({ __length }, meta)],
img: (_, alt, src, title) => ["img", { src, alt, title }],
italic: (_, body) => ["em", {}, ...body],
kbd: (_, key) => ["kbd", {}, key],
link: (_, href, title, body) => ["a", { href, title }, ...body],
linkRef: (ctx, refID, body) => [
"a",
{
href: () => ctx.linkRefs[refID]?.[0],
title: () => ctx.linkRefs[refID]?.[1],
},
...body,
],
meta: (_, body) => body,
olitem: (_, attribs, index, body) => [
"li",
{ ...attribs, __index: index },
...body,
],
ol: (_, items, meta) => ["ol", withMeta({}, meta), ...items],
para: (_, body, meta) => ["p", withMeta({}, meta), ...body],
strike: (_, body) => ["s", {}, ...body],
sub: (_, body) => ["sub", {}, body],
sup: (_, body) => ["sup", {}, body],
table: (ctx, head, rows, meta) => [
"table",
withMeta({ __align: ctx.align }, meta),
["thead", {}, head],
["tbody", {}, ...rows],
],
tableCell: (_, body) => ["td", {}, ...body],
tableHead: (_, body) => ["th", {}, ...body],
tableRow: (_, cells) => ["tr", {}, ...cells],
ul: (_, items, meta) => ["ul", withMeta({}, meta), ...items],
ulitem: (_, attribs, body) => ["li", attribs, ...body],
wikiref: (_, id, label) => [
"a",
{ class: "wikiref", href: encodeURI(id.replace(/\s+/g, "_")) },
label || id,
],
};
export class ParseError extends Error {
constructor(public state?: ParseState<string>) {
super(
state
? `stopped line: ${state.l}, col: ${state.c} (pos: ${state.p})`
: undefined
);
}
}
/**
* Parses given Markdown source string into a tree structure defined by given
* {@link TagTransforms}.
*
* @remarks
* The tag transforms are optional and the default implementations can be
* overwritten on tag-by-tag basis. The default transforms yield a simple hiccup
* tree (aka each tag will be an array in the form: `["tagname", {...},
* ...body]`).
*
* See [thi.ng/hiccup](https://thi.ng/hiccup) and related packages for further
* details.
*
* @param src
* @param opts
*/
export const parse = (
src: string,
{
tags,
opts,
linkRefs,
logger,
}: Partial<{
tags: Partial<TagTransforms>;
opts: Partial<ParseOpts>;
linkRefs: IObjectOf<[string, Maybe<string>?]>;
logger: ILogger;
}> = {}
): ParseResult => {
const parseCtx = parseRaw(src, opts?.retain);
const mdCtx = defTransformContext(tags, opts, linkRefs, logger);
const result: any[] = [];
transformScope(parseCtx.root, mdCtx, result);
return {
result,
ctx: mdCtx,
complete: !!parseCtx.done,
state: parseCtx.state,
};
};
/**
* 1st stage of the parsing (with out result transformations). This calls the
* `main` rule of the provided parse {@link GRAMMAR} and returns a parse
* context, incl. the raw abstract syntax tree of the parsed document. If
* parsing failed entirely (due to invalid input), throws a {@link ParseError}.
*
* @remarks
* Note: Even if the function returns a result, parsing might only have
* partially successful (can be checked via the [`.done`
* flag](https://docs.thi.ng/umbrella/parse/classes/ParseContext.html#done)).
*
* This function is only for advanced use. Mostly you'll probably want to use
* the main {@link parse} function instead.
*
* @param src
* @param retain
*/
export const parseRaw = (src: string, retain = false) => {
const ctx = defContext(src + "\n\n", { retain });
if (!GRAMMAR!.rules.main(ctx)) throw new ParseError(ctx.state);
return ctx;
};
export const defTransformContext = (
tags?: Partial<TagTransforms>,
opts?: Partial<ParseOpts>,
linkRefs?: IObjectOf<[string, Maybe<string>?]>,
logger?: ILogger
): TransformCtx => ({
footnotes: {},
headings: [],
linkRefs: linkRefs || {},
hasFootnotes: false,
logger,
meta: null,
align: [],
column: 0,
row: 0,
opts: {
escape: false,
retain: false,
...opts,
},
tags: { ...DEFAULT_TAG_TRANSFORMS, ...tags },
});
/**
* Polymorphic & recursive parse scope/node transformation function. Takes a
* single scope, context and accumulator array, then calls itself recursively
* for any child scopes and passes relevant data to its user defined
* {@link TagTransforms} handler and adds result to the accumulator array.
*
*/
export const transformScope: Fn3<
ParseScope<string>,
TransformCtx,
any[],
void
> = defmulti<ParseScope<string>, TransformCtx, any[], void>(
(x, ctx) => {
ctx.logger && ctx.logger.debug(x);
return x.id;
},
{
bqbody: "body",
bqlbr: "lbr",
bqline: "repeat0",
hdbody: "body",
ibody: "body",
label: "body",
libody: "body",
linkbody: "body",
main: "root",
repeat1: "repeat0",
tbody: "body",
},
{
[DEFAULT]: (scope: ParseScope<string>, ctx: TransformCtx) => {
throw new Error(
`unknown ID: ${scope.id}, ctx: ${JSON.stringify(ctx)}`
);
},
root: ({ children }, ctx, acc) => {
if (!children) return;
transformScope(children![0], ctx, acc);
if (ctx.hasFootnotes) {
__collect(acc, ctx.tags.footnoteWrapper(ctx, ctx.footnotes));
}
},
main: (scope, ctx, acc) => transformScope(scope.children![0], ctx, acc),
repeat0: (scope, ctx, acc) =>
scope.children && __children(ctx, scope.children, acc),
body: (scope, ctx, acc) => __collect(acc, __escape(ctx, scope.result)),
bold: (scope, ctx, acc) =>
__collect(
acc,
ctx.tags.bold(ctx, __children(ctx, scope.children!))
),
bquote: (scope, ctx, acc) => {
const stack: any[][] = [[]];
const children = scope.children![0].children!;
const $unwind = (level: number) => {
while (level < stack.length) {
const nested = stack.pop()!;
__collect(
peek(stack),
ctx.tags.blockquote(ctx, __trimBody(nested))
);
}
return peek(stack);
};
for (let i = 0, n = children!.length - 1; i <= n; i++) {
const [{ result: level }, bqline] = children[i].children!;
let body = peek(stack);
if (level > stack.length) {
while (level > stack.length) stack.push((body = []));
} else if (level < stack.length) {
body = $unwind(level);
} else if (body.length) {
const prev = children[i - 1].children![1];
if (!bqline.children) {
__collect(body, ctx.tags.br(ctx));
__collect(body, ctx.tags.br(ctx));
} else if (
prev.children &&
peek(prev.children).id !== "bqlbr"
) {
body.push(" ");
}
}
transformScope(bqline, ctx, body);
}
__collect(
acc,
ctx.tags.blockquote(ctx, __trimBody($unwind(1)), ctx.meta)
);
ctx.meta = null;
},
code: (scope, ctx, acc) =>
__collect(acc, ctx.tags.code(ctx, __escape(ctx, scope.result))),
codeblock: ({ children }, ctx, acc) => {
const [lang, ...head] = (<string>children![0].result).split(" ");
const body = children![1].result.trim();
__collect(
acc,
ctx.tags.codeblock(
ctx,
lang,
head,
__escape(ctx, body),
ctx.meta
)
);
ctx.meta = null;
},
customblock: ({ children }, ctx, acc) => {
const [type, ...head] = (<string>children![0].result).split(" ");
__collect(
acc,
ctx.tags.custom(
ctx,
type,
head,
children![1].result.trim(),
ctx.meta
)
);
ctx.meta = null;
},
emoji: ({ result }, ctx, acc) => {
if (result[0] === " ") {
__collect(acc, " ");
result = result.substring(1);
}
__collect(acc, ctx.tags.emoji(ctx, result));
},
fnote: ({ children }, ctx) => {
const body: any[] = [];
const id = children![0].result;
transformScope(children![1].children![0], ctx, body);
const res = ctx.tags.footnote(ctx, id, body, ctx.meta);
if (res != null) {
ctx.hasFootnotes = true;
ctx.footnotes[id] = res;
}
ctx.meta = null;
},
fnref: (scope, ctx, acc) =>
__collect(
acc,
ctx.tags.footnoteRef(ctx, scope.children![0].result)
),
hd: ({ children }, ctx, acc) => {
const body: any[] = [];
const level = children![0].result;
transformScope(children![1], ctx, body);
__trimBody(body);
const slug =
children![2]?.result || slugifyGH(extractBody(body).join(""));
ctx.headings.push({ level, body });
__collect(acc, ctx.tags.heading(ctx, level, slug, body, ctx.meta));
ctx.meta = null;
},
hr: (scope, ctx, acc) => {
__collect(acc, ctx.tags.hr(ctx, scope.result.length, ctx.meta));
ctx.meta = null;
},
img: ({ children }, ctx, acc) =>
__collect(
acc,
ctx.tags.img(
ctx,
__escape(ctx, children![0].result.trim()),
...__linkTitle(children![1].result)
)
),
italic: (scope, ctx, acc) =>
__collect(
acc,
ctx.tags.italic(ctx, __children(ctx, scope.children!))
),
kbd: (scope, ctx, acc) => __collectPrim(scope, ctx, acc, "kbd"),
lbr: (_, ctx, acc) => __collect(acc, ctx.tags.br(ctx)),
link: ({ children }, ctx, acc) =>
__collect(
acc,
ctx.tags.link(
ctx,
...__linkTitle(children![1].result),
__children(ctx, children![0].children!)
)
),
linkdef: ({ children }, ctx) => {
ctx.linkRefs[children![0].result] = __linkTitle(
children![1].result
);
},
linkref: ({ children }, ctx, acc) =>
__collect(
acc,
ctx.tags.linkRef(
ctx,
children![1].result,
__children(ctx, children![0].children!)
)
),
list: (scope, ctx, acc) => {
const children = scope.children![0].children!;
const stack: any[][] = [
[children[0].id === "ulitem" ? "ul" : "ol"],
];
const levels = [0];
for (let item of children) {
const currLevel = item.children![0].result;
if (currLevel > peek(levels)) {
const sublist = [item.id === "ulitem" ? "ul" : "ol"];
const parent = peek(stack);
parent.length > 1
? peek(parent).push(sublist)
: parent.push([
parent[0] === "ul" ? "ulitem" : "olitem",
{},
sublist,
]);
stack.push(sublist);
levels.push(currLevel);
} else if (currLevel < peek(levels)) {
while (currLevel < peek(levels)) {
stack.pop();
levels.pop();
}
}
transformScope(item, ctx, peek(stack));
}
const $list = (root: any[], isRoot = false) =>
ctx.tags[<"ul" | "ol">root[0]](
ctx,
root.slice(1).map($item),
isRoot ? ctx.meta : null
);
const $item = (item: any[]) => {
let last = item[item.length - 1];
if (last[0] === "ul" || last[0] === "ol")
item[item.length - 1] = $list(last);
return item[0] === "ulitem"
? ctx.tags.ulitem(ctx, item[1], item.slice(2))
: ctx.tags.olitem(ctx, item[1], item[2], item.slice(3));
};
__collect(acc, $list(stack[0], true));
ctx.meta = null;
},
metablock: ({ children }, ctx) => {
ctx.meta = ctx.tags.meta(ctx, children![0].result.trim());
},
olitem: ({ children }, ctx, acc) => {
const body: any[] = [];
transformScope(children![3], ctx, body);
__collect(acc, [
"olitem",
__listItemAttribs(children![2]),
children![1].result,
...__trimBody(body),
]);
},
para: (scope, ctx, acc) => {
__collect(
acc,
ctx.tags.para(
ctx,
__trimBody(__children(ctx, scope.children!)),
ctx.meta
)
);
ctx.meta = null;
},
strike: (scope, ctx, acc) =>
__collect(
acc,
ctx.tags.strike(ctx, __children(ctx, scope.children!))
),
sub: (scope, ctx, acc) => __collectPrim(scope, ctx, acc, "sub"),
sup: (scope, ctx, acc) => __collectPrim(scope, ctx, acc, "sup"),
table: (scope, ctx, acc) => {
const children = scope.children![0].children!;
const head: any[] = [];
const rows: any[] = [];
ctx.row = 0;
if (children.length > 1) {
ctx.align = __columnAlignments(
children[1].children![0].children!
);
for (let i = 2, n = children.length; i < n; i++) {
ctx.row = i - 1;
transformScope(children[i], ctx, rows);
}
} else {
ctx.align = new Array(
children[0].children![0].children!.length
).fill("left");
}
ctx.row = 0;
transformScope(children[0], ctx, head);
__collect(acc, ctx.tags.table(ctx, head[0], rows, ctx.meta));
ctx.meta = null;
},
tcell: ({ children }, ctx, acc) =>
__collect(
acc,
(ctx.row > 0 ? ctx.tags.tableCell : ctx.tags.tableHead)(
ctx,
__trimBody(__children(ctx, children!))
)
),
trow: (scope, ctx, acc) => {
const cells: any[] = [];
scope.children![0].children!.forEach((c, i) => {
ctx.column = i;
transformScope(c, ctx, cells);
});
__collect(acc, ctx.tags.tableRow(ctx, cells));
},
ulitem: ({ children }, ctx, acc) => {
const body: any[] = [];
transformScope(children![2], ctx, body);
__collect(acc, [
"ulitem",
__listItemAttribs(children![1]),
...__trimBody(body),
]);
},
wikiref: (scope, ctx, acc) => {
const [id, label] = scope.result.split("|");
__collect(acc, ctx.tags.wikiref(ctx, id.trim(), label?.trim()));
},
}
);
/**
* Takes an attributes object and optional metadata. If `meta` is not nullish,
* assigns it as `__meta` key in given object. Returns object.
*
* @param target
* @param meta
*/
export const withMeta = (target: any, meta?: any) => {
if (meta != null) target.__meta = meta;
return target;
};
/**
* Takes a hiccup tree and extracts only the primitive body values (strings,
* numbers, booleans) and returns them as array.
*
* @param body
* @param acc
*/
export const extractBody = (body: any[], acc: Primitive[] = []) => {
for (let x of isPlainObject(body[1]) ? body.slice(2) : body) {
if (isPrimitive(x)) acc.push(x);
else if (isArray(x)) extractBody(x, acc);
}
return acc;
};
/** @internal */
const __collect = (acc: any[], x: any) => x != null && acc.push(x);
const __collectPrim = (
scope: ParseScope<string>,
ctx: TransformCtx,
acc: any[],
tag: "kbd" | "sub" | "sup"
) => __collect(acc, ctx.tags[tag](ctx, scope.result));
/** @internal */
const __children = (
ctx: TransformCtx,
children: ParseScope<string>[],
acc: any[] = []
) => {
for (let c of children!) transformScope(c, ctx, acc);
return acc;
};
/** @internal */
const __escape = (ctx: TransformCtx, x: string) =>
ctx.opts.escape ? escapeEntities(x) : x;
/** @internal */
const __listItemAttribs = (scope?: ParseScope<string>): TodoAttribs =>
scope?.id === "todo"
? {
__todo: true,
__done: scope.result === "x",
}
: {};
/** @internal */
const __trimBody = (body: any[]) => {
if (body.length === 1 && isString(body[0])) body[0] = body[0].trim();
else {
const last = peek(body);
if (isString(last) && /^\s+$/.test(last)) body.pop();
}
return body;
};
/** @internal */
const __columnAlignments = (children: ParseScope<string>[]) => {
const align: ColumnAlign[] = [];
for (let c of children) {
const raw = <string>c.children![0].children![0].result.trim();
const isLeft = raw.startsWith(":-") ? 1 : 0;
const isRight = raw.endsWith("-:") ? 2 : 0;
align.push(
<ColumnAlign>(
["default", "left", "right", "center"][isLeft | isRight]
)
);
}
return align;
};
const __linkTitle = (src: string): [string, Maybe<string>] => {
const match = /\s"(.+)"$/.exec(src);
return match ? [src.substring(0, match.index), match[1]] : [src, undefined];
};