packages/strings/src/entities.ts
/**
* @remarks
* References:
* - https://en.wikipedia.org/wiki/List_of_XML_and_HTML_character_entity_references
* - https://www.w3.org/TR/xml-entity-names/byalpha.html
*/
export const ENTITIES: Record<string, string> = {
"&": "&",
"<": "<",
">": ">",
"≤": "≤",
"≥": "≥",
'"': """,
"'": "'",
"—": "—",
"–": "–",
"…": "…",
"⋮": "⋮",
"⋯": "⋯",
"¢": "¢",
"€": "€",
"£": "£",
"¥": "¥",
"₹": "₹",
元: "元",
"§": "§",
"¶": "¶",
"©": "©",
"®": "®",
"™": "™",
"℃": "℃",
"℉": "℉",
"\u212a": "K", // Kelvin
"◂": "◂",
"▸": "▸",
"▴": "▴",
"▾": "▾",
"←": "←",
"→": "→",
"↑": "↑",
"↓": "↓",
"↵": "↵",
"↩": "↩",
"⇐": "⇐",
"⇒": "⇒",
"↺": "↺",
"↻": "↻",
"¹": "¹",
"²": "²",
"³": "³",
"½": "½",
"⅓": "⅓",
"⅔": "⅔",
"¼": "¼",
"¾": "¾",
"⅕": "⅕",
"⅙": "⅙",
"⅛": "⅛",
Ä: "Ä",
Ë: "Ë",
Ï: "Ï",
Ö: "Ö",
Ü: "Ü",
ä: "ä",
ë: "ë",
ï: "ï",
ö: "ö",
ü: "ü",
ß: "ß",
α: "α",
β: "β",
γ: "γ",
δ: "δ",
ε: "ε",
θ: "θ",
λ: "λ",
μ: "μ",
π: "π",
σ: "σ",
τ: "τ",
φ: "φ",
ω: "ω",
Δ: "Δ",
Ω: "Ω",
"∞": "∞",
"±": "±",
"⨯": "⨯",
"°": "°",
"∑": "∑",
"∏": "∏",
"∫": "∫",
"√": "√",
"∠": "∠",
"∧": "∧",
"∨": "∨",
"∩": "∩",
"∪": "∪",
"⊂": "⊂",
"⊃": "⊃",
"∈": "∈",
"∉": "∉",
"∅": "∅",
"⊕": "⊕",
"⊖": "⊖",
"⊗": "⊗",
"⌖": "⌖",
"☆": "☆",
"★": "★",
"✓": "✓",
"✗": "✗",
"\xa0": " ",
};
export const RE_ENTITIES = new RegExp(
`[${Object.keys(ENTITIES).join("")}]`,
"gu"
);
export const ENTITIES_REV = Object.entries(ENTITIES).reduce(
(acc, [k, v]) => ((acc[v] = k), acc),
<Record<string, string>>{}
);
export const RE_ENTITIES_REV = new RegExp(
`(${Object.keys(ENTITIES_REV).join("|")})`,
"g"
);
export const RE_ENTITIES_NUM = /&#(x?)([0-9a-f]+);/gi;
/**
* Replaces all occurrences of character keys in {@link ENTITIES} with their
* named HTML entities.
*
* @remarks
* Only use this function when targetting HTML output. For XML/SVG etc. use
* {@link escapeEntitiesNum}.
*
* @param src
*/
export const escapeEntities = (src: string) =>
src.replace(RE_ENTITIES, (x) => ENTITIES[x]);
/**
* Similar to {@link escapeEntities}, but only uses _named_ entities for `&`,
* `<`, `>`, `'`, `"` and numeric entities for all others.
*
* @remarks
* This function is used as default by thi.ng/hiccup `serialize()` to escape
* characters and ensure compatibility with XML (which by default only supports
* named entities for the above 5 characters).
*
* @param src
*/
export const escapeEntitiesNum = (src: string) =>
src.replace(RE_ENTITIES, (x) => {
const code = x.charCodeAt(0);
return code < 128 ? ENTITIES[x] : `&#x${code.toString(16)};`;
});
/**
* Replace all known named and numeric entities with their original characters.
* Opposite op of {@link escapeEntities} and {@link escapeEntitiesNum}.
*
* @param src
*/
export const unescapeEntities = (src: string) =>
src
.replace(RE_ENTITIES_REV, (x) => ENTITIES_REV[x])
.replace(RE_ENTITIES_NUM, (_, hex, x) =>
String.fromCharCode(parseInt(x, hex ? 16 : 10))
);