packages/base/src/whitespace-processor.js
// eslint-disable-next-line no-unused-vars
import { InlineTextBuilder } from './inline-text-builder';
// eslint-disable-next-line import/no-unassigned-import
import './typedefs';
function charactersToCodes (str) {
return [...str]
.map(c => '\\u' + c.charCodeAt(0).toString(16).padStart(4, '0'))
.join('');
}
/**
* Helps to handle HTML whitespaces.
*
* @class WhitespaceProcessor
*/
class WhitespaceProcessor {
/**
* Creates an instance of WhitespaceProcessor.
*
* @param { Options } options HtmlToText options.
* @memberof WhitespaceProcessor
*/
constructor (options) {
this.whitespaceChars = (options.preserveNewlines)
? options.whitespaceCharacters.replace(/\n/g, '')
: options.whitespaceCharacters;
const whitespaceCodes = charactersToCodes(this.whitespaceChars);
this.leadingWhitespaceRe = new RegExp(`^[${whitespaceCodes}]`);
this.trailingWhitespaceRe = new RegExp(`[${whitespaceCodes}]$`);
this.allWhitespaceOrEmptyRe = new RegExp(`^[${whitespaceCodes}]*$`);
this.newlineOrNonWhitespaceRe = new RegExp(`(\\n|[^\\n${whitespaceCodes}])`, 'g');
this.newlineOrNonNewlineStringRe = new RegExp(`(\\n|[^\\n]+)`, 'g');
if (options.preserveNewlines) {
const wordOrNewlineRe = new RegExp(`\\n|[^\\n${whitespaceCodes}]+`, 'gm');
/**
* Shrink whitespaces and wrap text, add to the builder.
*
* @param { string } text Input text.
* @param { InlineTextBuilder } inlineTextBuilder A builder to receive processed text.
* @param { (str: string) => string } [ transform ] A transform to be applied to words.
* @param { boolean } [noWrap] Don't wrap text even if the line is too long.
*/
this.shrinkWrapAdd = function (text, inlineTextBuilder, transform = (str => str), noWrap = false) {
if (!text) { return; }
const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
let anyMatch = false;
let m = wordOrNewlineRe.exec(text);
if (m) {
anyMatch = true;
if (m[0] === '\n') {
inlineTextBuilder.startNewLine();
} else if (previouslyStashedSpace || this.testLeadingWhitespace(text)) {
inlineTextBuilder.pushWord(transform(m[0]), noWrap);
} else {
inlineTextBuilder.concatWord(transform(m[0]), noWrap);
}
while ((m = wordOrNewlineRe.exec(text)) !== null) {
if (m[0] === '\n') {
inlineTextBuilder.startNewLine();
} else {
inlineTextBuilder.pushWord(transform(m[0]), noWrap);
}
}
}
inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch) || (this.testTrailingWhitespace(text));
// No need to stash a space in case last added item was a new line,
// but that won't affect anything later anyway.
};
} else {
const wordRe = new RegExp(`[^${whitespaceCodes}]+`, 'g');
this.shrinkWrapAdd = function (text, inlineTextBuilder, transform = (str => str), noWrap = false) {
if (!text) { return; }
const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
let anyMatch = false;
let m = wordRe.exec(text);
if (m) {
anyMatch = true;
if (previouslyStashedSpace || this.testLeadingWhitespace(text)) {
inlineTextBuilder.pushWord(transform(m[0]), noWrap);
} else {
inlineTextBuilder.concatWord(transform(m[0]), noWrap);
}
while ((m = wordRe.exec(text)) !== null) {
inlineTextBuilder.pushWord(transform(m[0]), noWrap);
}
}
inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch) || this.testTrailingWhitespace(text);
};
}
}
/**
* Add text with only minimal processing.
* Everything between newlines considered a single word.
* No whitespace is trimmed.
* Not affected by preserveNewlines option - `\n` always starts a new line.
*
* `noWrap` argument is `true` by default - this won't start a new line
* even if there is not enough space left in the current line.
*
* @param { string } text Input text.
* @param { InlineTextBuilder } inlineTextBuilder A builder to receive processed text.
* @param { boolean } [noWrap] Don't wrap text even if the line is too long.
*/
addLiteral (text, inlineTextBuilder, noWrap = true) {
if (!text) { return; }
const previouslyStashedSpace = inlineTextBuilder.stashedSpace;
let anyMatch = false;
let m = this.newlineOrNonNewlineStringRe.exec(text);
if (m) {
anyMatch = true;
if (m[0] === '\n') {
inlineTextBuilder.startNewLine();
} else if (previouslyStashedSpace) {
inlineTextBuilder.pushWord(m[0], noWrap);
} else {
inlineTextBuilder.concatWord(m[0], noWrap);
}
while ((m = this.newlineOrNonNewlineStringRe.exec(text)) !== null) {
if (m[0] === '\n') {
inlineTextBuilder.startNewLine();
} else {
inlineTextBuilder.pushWord(m[0], noWrap);
}
}
}
inlineTextBuilder.stashedSpace = (previouslyStashedSpace && !anyMatch);
}
/**
* Test whether the given text starts with HTML whitespace character.
*
* @param { string } text The string to test.
* @returns { boolean }
*/
testLeadingWhitespace (text) {
return this.leadingWhitespaceRe.test(text);
}
/**
* Test whether the given text ends with HTML whitespace character.
*
* @param { string } text The string to test.
* @returns { boolean }
*/
testTrailingWhitespace (text) {
return this.trailingWhitespaceRe.test(text);
}
/**
* Test whether the given text contains any non-whitespace characters.
*
* @param { string } text The string to test.
* @returns { boolean }
*/
testContainsWords (text) {
return !this.allWhitespaceOrEmptyRe.test(text);
}
/**
* Return the number of newlines if there are no words.
*
* If any word is found then return zero regardless of the actual number of newlines.
*
* @param { string } text Input string.
* @returns { number }
*/
countNewlinesNoWords (text) {
this.newlineOrNonWhitespaceRe.lastIndex = 0;
let counter = 0;
let match;
while ((match = this.newlineOrNonWhitespaceRe.exec(text)) !== null) {
if (match[0] === '\n') {
counter++;
} else {
return 0;
}
}
return counter;
}
}
export { WhitespaceProcessor };