packages/base/src/block-text-builder.js
import {
// eslint-disable-next-line no-unused-vars
StackItem, BlockStackItem,
TableCellStackItem, TableRowStackItem, TableStackItem,
TransformerStackItem, ListStackItem, ListItemStackItem
} from './stack-item';
import { trimCharacter } from './util';
import { WhitespaceProcessor } from './whitespace-processor';
// eslint-disable-next-line import/no-unassigned-import
import './typedefs';
/**
* Helps to build text from inline and block elements.
*
* @class BlockTextBuilder
*/
class BlockTextBuilder {
/**
* Creates an instance of BlockTextBuilder.
*
* @param { Options } options HtmlToText options.
* @param { import('selderee').Picker<DomNode, TagDefinition> } picker Selectors decision tree picker.
* @param { any} [metadata] Optional metadata for HTML document, for use in formatters.
*/
constructor (options, picker, metadata = undefined) {
this.options = options;
this.picker = picker;
this.metadata = metadata;
this.whitespaceProcessor = new WhitespaceProcessor(options);
/** @type { StackItem } */
this._stackItem = new BlockStackItem(options);
/** @type { TransformerStackItem } */
this._wordTransformer = undefined;
}
/**
* Put a word-by-word transform function onto the transformations stack.
*
* Mainly used for uppercasing. Can be bypassed to add unformatted text such as URLs.
*
* Word transformations applied before wrapping.
*
* @param { (str: string) => string } wordTransform Word transformation function.
*/
pushWordTransform (wordTransform) {
this._wordTransformer = new TransformerStackItem(this._wordTransformer, wordTransform);
}
/**
* Remove a function from the word transformations stack.
*
* @returns { (str: string) => string } A function that was removed.
*/
popWordTransform () {
if (!this._wordTransformer) { return undefined; }
const transform = this._wordTransformer.transform;
this._wordTransformer = this._wordTransformer.next;
return transform;
}
/**
* Ignore wordwrap option in followup inline additions and disable automatic wrapping.
*/
startNoWrap () {
this._stackItem.isNoWrap = true;
}
/**
* Return automatic wrapping to behavior defined by options.
*/
stopNoWrap () {
this._stackItem.isNoWrap = false;
}
/** @returns { (str: string) => string } */
_getCombinedWordTransformer () {
const wt = (this._wordTransformer)
? ((str) => applyTransformer(str, this._wordTransformer))
: undefined;
const ce = this.options.encodeCharacters;
return (wt)
? ((ce) ? (str) => ce(wt(str)) : wt)
: ce;
}
_popStackItem () {
const item = this._stackItem;
this._stackItem = item.next;
return item;
}
/**
* Add a line break into currently built block.
*/
addLineBreak () {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += '\n';
} else {
this._stackItem.inlineTextBuilder.startNewLine();
}
}
/**
* Allow to break line in case directly following text will not fit.
*/
addWordBreakOpportunity () {
if (
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
) {
this._stackItem.inlineTextBuilder.wordBreakOpportunity = true;
}
}
/**
* Add a node inline into the currently built block.
*
* @param { string } str
* Text content of a node to add.
*
* @param { object } [param1]
* Object holding the parameters of the operation.
*
* @param { boolean } [param1.noWordTransform]
* Ignore word transformers if there are any.
* Don't encode characters as well.
* (Use this for things like URL addresses).
*/
addInline (str, { noWordTransform = false } = {}) {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += str;
return;
}
if (
str.length === 0 || // empty string
(
this._stackItem.stashedLineBreaks && // stashed linebreaks make whitespace irrelevant
!this.whitespaceProcessor.testContainsWords(str) // no words to add
)
) { return; }
if (this.options.preserveNewlines) {
const newlinesNumber = this.whitespaceProcessor.countNewlinesNoWords(str);
if (newlinesNumber > 0) {
this._stackItem.inlineTextBuilder.startNewLine(newlinesNumber);
// keep stashedLineBreaks unchanged
return;
}
}
if (this._stackItem.stashedLineBreaks) {
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
}
this.whitespaceProcessor.shrinkWrapAdd(
str,
this._stackItem.inlineTextBuilder,
(noWordTransform) ? undefined : this._getCombinedWordTransformer(),
this._stackItem.isNoWrap
);
this._stackItem.stashedLineBreaks = 0; // inline text doesn't introduce line breaks
}
/**
* Add a string inline into the currently built block.
*
* Use this for markup elements that don't have to adhere
* to text layout rules.
*
* @param { string } str Text to add.
*/
addLiteral (str) {
if (!(
this._stackItem instanceof BlockStackItem
|| this._stackItem instanceof ListItemStackItem
|| this._stackItem instanceof TableCellStackItem
)) { return; }
if (str.length === 0) { return; }
if (this._stackItem.isPre) {
this._stackItem.rawText += str;
return;
}
if (this._stackItem.stashedLineBreaks) {
this._stackItem.inlineTextBuilder.startNewLine(this._stackItem.stashedLineBreaks);
}
this.whitespaceProcessor.addLiteral(
str,
this._stackItem.inlineTextBuilder,
this._stackItem.isNoWrap
);
this._stackItem.stashedLineBreaks = 0;
}
/**
* Start building a new block.
*
* @param { object } [param0]
* Object holding the parameters of the block.
*
* @param { number } [param0.leadingLineBreaks]
* This block should have at least this number of line breaks to separate it from any preceding block.
*
* @param { number } [param0.reservedLineLength]
* Reserve this number of characters on each line for block markup.
*
* @param { boolean } [param0.isPre]
* Should HTML whitespace be preserved inside this block.
*/
openBlock ({ leadingLineBreaks = 1, reservedLineLength = 0, isPre = false } = {}) {
const maxLineLength = Math.max(20, this._stackItem.inlineTextBuilder.maxLineLength - reservedLineLength);
this._stackItem = new BlockStackItem(
this.options,
this._stackItem,
leadingLineBreaks,
maxLineLength
);
if (isPre) { this._stackItem.isPre = true; }
}
/**
* Finalize currently built block, add it's content to the parent block.
*
* @param { object } [param0]
* Object holding the parameters of the block.
*
* @param { number } [param0.trailingLineBreaks]
* This block should have at least this number of line breaks to separate it from any following block.
*
* @param { (str: string) => string } [param0.blockTransform]
* A function to transform the block text before adding to the parent block.
* This happens after word wrap and should be used in combination with reserved line length
* in order to keep line lengths correct.
* Used for whole block markup.
*/
closeBlock ({ trailingLineBreaks = 1, blockTransform = undefined } = {}) {
const block = this._popStackItem();
const blockText = (blockTransform) ? blockTransform(getText(block)) : getText(block);
addText(this._stackItem, blockText, block.leadingLineBreaks, Math.max(block.stashedLineBreaks, trailingLineBreaks));
}
/**
* Start building a new list.
*
* @param { object } [param0]
* Object holding the parameters of the list.
*
* @param { number } [param0.maxPrefixLength]
* Length of the longest list item prefix.
* If not supplied or too small then list items won't be aligned properly.
*
* @param { 'left' | 'right' } [param0.prefixAlign]
* Specify how prefixes of different lengths have to be aligned
* within a column.
*
* @param { number } [param0.interRowLineBreaks]
* Minimum number of line breaks between list items.
*
* @param { number } [param0.leadingLineBreaks]
* This list should have at least this number of line breaks to separate it from any preceding block.
*/
openList ({ maxPrefixLength = 0, prefixAlign = 'left', interRowLineBreaks = 1, leadingLineBreaks = 2 } = {}) {
this._stackItem = new ListStackItem(this.options, this._stackItem, {
interRowLineBreaks: interRowLineBreaks,
leadingLineBreaks: leadingLineBreaks,
maxLineLength: this._stackItem.inlineTextBuilder.maxLineLength,
maxPrefixLength: maxPrefixLength,
prefixAlign: prefixAlign
});
}
/**
* Start building a new list item.
*
* @param {object} param0
* Object holding the parameters of the list item.
*
* @param { string } [param0.prefix]
* Prefix for this list item (item number, bullet point, etc).
*/
openListItem ({ prefix = '' } = {}) {
if (!(this._stackItem instanceof ListStackItem)) {
throw new Error('Can\'t add a list item to something that is not a list! Check the formatter.');
}
const list = this._stackItem;
const prefixLength = Math.max(prefix.length, list.maxPrefixLength);
const maxLineLength = Math.max(20, list.inlineTextBuilder.maxLineLength - prefixLength);
this._stackItem = new ListItemStackItem(this.options, list, {
prefix: prefix,
maxLineLength: maxLineLength,
leadingLineBreaks: list.interRowLineBreaks
});
}
/**
* Finalize currently built list item, add it's content to the parent list.
*/
closeListItem () {
const listItem = this._popStackItem();
const list = listItem.next;
const prefixLength = Math.max(listItem.prefix.length, list.maxPrefixLength);
const spacing = '\n' + ' '.repeat(prefixLength);
const prefix = (list.prefixAlign === 'right')
? listItem.prefix.padStart(prefixLength)
: listItem.prefix.padEnd(prefixLength);
const text = prefix + getText(listItem).replace(/\n/g, spacing);
addText(
list,
text,
listItem.leadingLineBreaks,
Math.max(listItem.stashedLineBreaks, list.interRowLineBreaks)
);
}
/**
* Finalize currently built list, add it's content to the parent block.
*
* @param { object } param0
* Object holding the parameters of the list.
*
* @param { number } [param0.trailingLineBreaks]
* This list should have at least this number of line breaks to separate it from any following block.
*/
closeList ({ trailingLineBreaks = 2 } = {}) {
const list = this._popStackItem();
const text = getText(list);
if (text) {
addText(this._stackItem, text, list.leadingLineBreaks, trailingLineBreaks);
}
}
/**
* Start building a table.
*/
openTable () {
this._stackItem = new TableStackItem(this._stackItem);
}
/**
* Start building a table row.
*/
openTableRow () {
if (!(this._stackItem instanceof TableStackItem)) {
throw new Error('Can\'t add a table row to something that is not a table! Check the formatter.');
}
this._stackItem = new TableRowStackItem(this._stackItem);
}
/**
* Start building a table cell.
*
* @param { object } [param0]
* Object holding the parameters of the cell.
*
* @param { number } [param0.maxColumnWidth]
* Wrap cell content to this width. Fall back to global wordwrap value if undefined.
*/
openTableCell ({ maxColumnWidth = undefined } = {}) {
if (!(this._stackItem instanceof TableRowStackItem)) {
throw new Error('Can\'t add a table cell to something that is not a table row! Check the formatter.');
}
this._stackItem = new TableCellStackItem(this.options, this._stackItem, maxColumnWidth);
}
/**
* Finalize currently built table cell and add it to parent table row's cells.
*
* @param { object } [param0]
* Object holding the parameters of the cell.
*
* @param { number } [param0.colspan] How many columns this cell should occupy.
* @param { number } [param0.rowspan] How many rows this cell should occupy.
*/
closeTableCell ({ colspan = 1, rowspan = 1 } = {}) {
const cell = this._popStackItem();
const text = trimCharacter(getText(cell), '\n');
cell.next.cells.push({ colspan: colspan, rowspan: rowspan, text: text });
}
/**
* Finalize currently built table row and add it to parent table's rows.
*/
closeTableRow () {
const row = this._popStackItem();
row.next.rows.push(row.cells);
}
/**
* Finalize currently built table and add the rendered text to the parent block.
*
* @param { object } param0
* Object holding the parameters of the table.
*
* @param { TablePrinter } param0.tableToString
* A function to convert a table of stringified cells into a complete table.
*
* @param { number } [param0.leadingLineBreaks]
* This table should have at least this number of line breaks to separate if from any preceding block.
*
* @param { number } [param0.trailingLineBreaks]
* This table should have at least this number of line breaks to separate it from any following block.
*/
closeTable ({ tableToString, leadingLineBreaks = 2, trailingLineBreaks = 2 }) {
const table = this._popStackItem();
const output = tableToString(table.rows);
if (output) {
addText(this._stackItem, output, leadingLineBreaks, trailingLineBreaks);
}
}
/**
* Return the rendered text content of this builder.
*
* @returns { string }
*/
toString () {
return getText(this._stackItem.getRoot());
// There should only be the root item if everything is closed properly.
}
}
function getText (stackItem) {
if (!(
stackItem instanceof BlockStackItem
|| stackItem instanceof ListItemStackItem
|| stackItem instanceof TableCellStackItem
)) {
throw new Error('Only blocks, list items and table cells can be requested for text contents.');
}
return (stackItem.inlineTextBuilder.isEmpty())
? stackItem.rawText
: stackItem.rawText + stackItem.inlineTextBuilder.toString();
}
function addText (stackItem, text, leadingLineBreaks, trailingLineBreaks) {
if (!(
stackItem instanceof BlockStackItem
|| stackItem instanceof ListItemStackItem
|| stackItem instanceof TableCellStackItem
)) {
throw new Error('Only blocks, list items and table cells can contain text.');
}
const parentText = getText(stackItem);
const lineBreaks = Math.max(stackItem.stashedLineBreaks, leadingLineBreaks);
stackItem.inlineTextBuilder.clear();
if (parentText) {
stackItem.rawText = parentText + '\n'.repeat(lineBreaks) + text;
} else {
stackItem.rawText = text;
stackItem.leadingLineBreaks = lineBreaks;
}
stackItem.stashedLineBreaks = trailingLineBreaks;
}
/**
* @param { string } str A string to transform.
* @param { TransformerStackItem } transformer A transformer item (with possible continuation).
* @returns { string }
*/
function applyTransformer (str, transformer) {
return ((transformer) ? applyTransformer(transformer.transform(str), transformer.next) : str);
}
export { BlockTextBuilder };