src/components/Widgets/Markdown/serializers/index.js
import { get, isEmpty, reduce, pull } from 'lodash';
import unified from 'unified';
import u from 'unist-builder';
import markdownToRemarkPlugin from 'remark-parse';
import remarkToMarkdownPlugin from 'remark-stringify';
import remarkToRehype from 'remark-rehype';
import rehypeToHtml from 'rehype-stringify';
import htmlToRehype from 'rehype-parse';
import rehypeToRemark from 'rehype-remark';
import remarkToRehypeShortcodes from './remarkRehypeShortcodes';
import rehypePaperEmoji from './rehypePaperEmoji';
import remarkAssertParents from './remarkAssertParents';
import remarkPaddedLinks from './remarkPaddedLinks';
import remarkWrapHtml from './remarkWrapHtml';
import remarkToSlate from './remarkSlate';
import remarkSquashReferences from './remarkSquashReferences';
import remarkImagesToText from './remarkImagesToText';
import remarkShortcodes from './remarkShortcodes';
import remarkEscapeMarkdownEntities from './remarkEscapeMarkdownEntities'
import slateToRemark from './slateRemark';
import registry from '../../../../lib/registry';
/**
* This module contains all serializers for the Markdown widget.
*
* The value of a Markdown widget is transformed to various formats during
* editing, and these formats are referenced throughout serializer source
* documentation. Below is brief glossary of the formats used.
*
* - Markdown {string}
* The stringified Markdown value. The value of the field is persisted
* (stored) in this format, and the stringified value is also used when the
* editor is in "raw" Markdown mode.
*
* - MDAST {object}
* Also loosely referred to as "Remark". MDAST stands for MarkDown AST
* (Abstract Syntax Tree), and is an object representation of a Markdown
* document. Underneath, it's a Unist tree with a Markdown-specific schema.
* MDAST syntax is a part of the Unified ecosystem, and powers the Remark
* processor, so Remark plugins may be used.
*
* - HAST {object}
* Also loosely referred to as "Rehype". HAST, similar to MDAST, is an object
* representation of an HTML document. The field value takes this format
* temporarily before the document is stringified to HTML.
*
* - HTML {string}
* The field value is stringifed to HTML for preview purposes - the HTML value
* is never parsed, it is output only.
*
* - Slate Raw AST {object}
* Slate's Raw AST is a very simple and unopinionated object representation of
* a document in a Slate editor. We define our own Markdown-specific schema
* for serialization to/from Slate's Raw AST and MDAST.
*/
/**
* Deserialize a Markdown string to an MDAST.
*/
export const markdownToRemark = markdown => {
/**
* Parse the Markdown string input to an MDAST.
*/
const parsed = unified()
.use(markdownToRemarkPlugin, { fences: true, pedantic: true, commonmark: true })
.parse(markdown);
/**
* Further transform the MDAST with plugins.
*/
const result = unified()
.use(remarkSquashReferences)
.use(remarkImagesToText)
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.runSync(parsed);
return result;
};
/**
* Serialize an MDAST to a Markdown string.
*/
export const remarkToMarkdown = obj => {
/**
* Rewrite the remark-stringify text visitor to simply return the text value,
* without encoding or escaping any characters. This means we're completely
* trusting the markdown that we receive.
*/
function remarkAllowAllText() {
const Compiler = this.Compiler;
const visitors = Compiler.prototype.visitors;
visitors.text = node => node.value;
};
/**
* Provide an empty MDAST if no value is provided.
*/
const mdast = obj || u('root', [u('paragraph', [u('text', '')])]);
const remarkToMarkdownPluginOpts = {
commonmark: true,
fences: true,
pedantic: true,
listItemIndent: '1',
// Settings to emulate the defaults from the Prosemirror editor, not
// necessarily optimal. Should eventually be configurable.
bullet: '*',
strong: '*',
rule: '-',
};
/**
* Escape markdown entities found in text and html nodes within the MDAST.
*/
const escapedMdast = unified()
.use(remarkEscapeMarkdownEntities)
.runSync(mdast);
const markdown = unified()
.use(remarkToMarkdownPlugin, remarkToMarkdownPluginOpts)
.use(remarkAllowAllText)
.stringify(escapedMdast);
return markdown;
};
/**
* Convert Markdown to HTML.
*/
export const markdownToHtml = (markdown, getAsset) => {
const mdast = markdownToRemark(markdown);
const hast = unified()
.use(remarkToRehypeShortcodes, { plugins: registry.getEditorComponents(), getAsset })
.use(remarkToRehype, { allowDangerousHTML: true })
.runSync(mdast);
const html = unified()
.use(rehypeToHtml, { allowDangerousHTML: true, allowDangerousCharacters: true })
.stringify(hast);
return html;
}
/**
* Deserialize an HTML string to Slate's Raw AST. Currently used for HTML
* pastes.
*/
export const htmlToSlate = html => {
const hast = unified()
.use(htmlToRehype, { fragment: true })
.parse(html);
const mdast = unified()
.use(rehypePaperEmoji)
.use(rehypeToRemark, { minify: false })
.runSync(hast);
const slateRaw = unified()
.use(remarkAssertParents)
.use(remarkPaddedLinks)
.use(remarkImagesToText)
.use(remarkShortcodes, { plugins: registry.getEditorComponents() })
.use(remarkWrapHtml)
.use(remarkToSlate)
.runSync(mdast);
return slateRaw;
};
/**
* Convert Markdown to Slate's Raw AST.
*/
export const markdownToSlate = markdown => {
const mdast = markdownToRemark(markdown);
const slateRaw = unified()
.use(remarkWrapHtml)
.use(remarkToSlate)
.runSync(mdast);
return slateRaw;
};
/**
* Convert a Slate Raw AST to Markdown.
*
* Requires shortcode plugins to parse shortcode nodes back to text.
*
* Note that Unified is not utilized for the conversion from Slate's Raw AST to
* MDAST. The conversion is manual because Unified can only operate on Unist
* trees.
*/
export const slateToMarkdown = raw => {
const mdast = slateToRemark(raw, { shortcodePlugins: registry.getEditorComponents() });
const markdown = remarkToMarkdown(mdast);
return markdown;
};