src/clean-text-spec.es6
/* global describe, it */
require('lazy-ass');
var check = require('check-more-types');
var _ = require('lodash');
import { cleanEnteredSearchText, cleanHtmlTags, cleanTickerSearchHtml } from './clean-text.es6';
describe('cleanEnteredSearchText', function () {
it('is a function', function () {
la(check.fn(cleanEnteredSearchText), 'missing clean function', cleanEnteredSearchText);
});
it('removes non breaking space', function () {
var result = cleanEnteredSearchText('foo b');
la(result === 'foo b');
});
it('restores &', function () {
la(cleanEnteredSearchText('FOO b') === 'foo b');
la(cleanEnteredSearchText('foo&bar') === 'foo&bar');
});
it('trims spaces on the ends after conversion', function () {
la(cleanEnteredSearchText('foo ') === 'foo');
la(cleanEnteredSearchText(' foo') === 'foo');
});
});
describe('cleanHtmlTags', function () {
it('is a function', function () {
la(check.fn(cleanHtmlTags), 'has cleanHtmlTags', cleanHtmlTags);
});
it('passes string unchanged', function () {
var result;
result = cleanHtmlTags('GOOG');
la(result === 'GOOG', result);
result = cleanHtmlTags('f');
la(result === 'f', result);
});
it('removes b and i tags', function () {
la(cleanHtmlTags('<b>foo</b>') === 'foo');
la(cleanHtmlTags('<i>foo</i>') === 'foo');
});
it('removes mismatched tags', function () {
la(cleanHtmlTags('<b>foo</i>') === 'foo');
la(cleanHtmlTags('<no>foo</yes>') === 'foo');
});
it('removes tags with dashes', function () {
la(cleanHtmlTags('<foo-bar>foo</foo-bar>') === 'foo');
});
it('removes html tags, can leaves space', function () {
la(cleanHtmlTags('<a></a>') === '');
la(cleanHtmlTags('<a>f</a>', ' ') === ' f ');
la(cleanHtmlTags('<h3>author here</h3>') === 'author here');
});
it('leaves words after closing tag', function () {
la(cleanHtmlTags('<a>link</a> after') === 'link after');
});
it('leaves words before and after closing tag', function () {
la(cleanHtmlTags('before <a>link</a> after') === 'before link after');
});
it('cleans complex tag', function () {
var text = 'b <a rel="noopener noreferrer" ' +
'target="_blank">link</a> after 2';
var cleaned = cleanHtmlTags(text);
var expected = 'b link after 2';
la(cleaned === expected, 'could not clean', text, 'got', cleaned);
});
it('cleans tag with href attribute correctly', function () {
var text = 'b <a href="http://foo" rel="noopener noreferrer" ' +
'target="_blank">link</a> after 2';
var cleaned = cleanHtmlTags(text);
var expected = 'b link after 2';
la(cleaned === expected, 'could not clean', text, 'got', cleaned);
});
it('can cleanup attributes', function () {
var html = '<a id="a">f</a>';
la(cleanHtmlTags(html) === 'f');
});
it('can cleanup styles', function () {
var html = '<a style="margin-bottom: 20px;">f</a>';
la(cleanHtmlTags(html) === 'f');
});
it('can cleanup multiple styles', function () {
var html = '<a style="margin-bottom: 20px; max-width: 560px; color: rgb(68, 68, 68);">f</a>';
la(cleanHtmlTags(html) === 'f');
});
it('can cleanup font names', function () {
var html = '<a style="font-family: Palatino, \'Palatino Linotype\', Georgia, serif;">f</a>';
la(cleanHtmlTags(html) === 'f');
});
it('can clean gradients', function () {
var html = '<h2 class="story" style="background: -webkit-gradient(linear, 0% 0%, 0% 100%, ' +
'from(rgb(0, 102, 102)), to(rgb(0, 66, 66))) rgb(0, 66, 66);">foo</h2>';
la(cleanHtmlTags(html) === 'foo');
});
});
describe('cleanTickerSearchHtml', function () {
var clean = cleanTickerSearchHtml;
it('is a function', function () {
la(check.fn(clean), 'missing clean fn');
});
it('passes string unchanged', function () {
la(clean('GOOG') === 'GOOG');
la(clean('f') === 'f');
});
it('removes white space', function () {
la(clean(' GOOG') === 'GOOG');
la(clean(' GOOG') === 'GOOG');
});
it('keeps white space at the end', function () {
la(clean('GOOG ') === 'GOOG ');
la(clean('GOOG ') === 'GOOG ');
});
it('replaces white space at the end with space', function () {
la(clean('GOOG\t') === 'GOOG ');
});
it('removes html tags, leaves space', function () {
la(clean('<a></a>') === '');
la(clean('<a>f</a>') === 'f ');
});
it('removes html tags leaves space', function () {
la(clean('<a>f </a>') === 'f ');
});
it('removes tags with spaces', function () {
la(clean('<this is tag>f') === 'f');
la(clean('<this is tag>f</this is tag>') === 'f ', 'has closing tag');
});
it('removes tags with dashes', function () {
la(clean('<this-is-tag>f') === 'f');
la(clean('<this-is-tag>f</this-is-tag>') === 'f ', 'has closing tag');
});
it('removes tags with attributes', function () {
la(clean('<a id="a">f</a>') === 'f ');
});
it('removes tags with guids', function () {
la(clean('<span id="docs-internal-guid-3f7bace0-cee6-dba1-d26d05895">f</span>') === 'f ');
});
it('removes tags with styles', function () {
la(clean('<span style="font-size: 15px; color: rgb(0, 0, 0);">f</span>') === 'f ');
});
it('removes data tag with &', function () {
la(clean('<td data-sheets-value="[null,2,"GOOG"]">GOOG</td>') === 'GOOG ');
});
it('removes data tag with ^', function () {
la(clean('<td data-sheets-value="[null,2,"^GOOG"]">^GSPC</td>') === '^GSPC ');
});
it('removes data tag with $', function () {
la(clean('<td data-sheets-value="[null,2,"$GOOG"]">$RR</td>') === '$RR ');
});
it('cleans pasted styled cells 1', function () {
var text = '<td style="padding:2px 3px 2px 3px;" ' +
'data-sheets-value="[null,2,"goog"]">goog';
var cleaned = clean(text);
la(cleaned === 'goog', cleaned);
});
it('cleans pasted styled cells 2', function () {
var text = '<td style="padding:2px 3px 2px 3px;vertical-align:bottom;" ' +
'data-sheets-value="[null,2,"goog"]">goog';
var cleaned = clean(text);
la(cleaned === 'goog', cleaned);
});
it('cleans pasted styled cells 3', function () {
var text = '<td style="padding:2px 3px 2px 3px;vertical-align:bottom;background-color:#ea9999;" ' +
'data-sheets-value="[null,2,"goog"]">goog';
var cleaned = clean(text);
la(cleaned === 'goog', cleaned);
});
it('cleans pasted styled cells with color', function () {
var text = '<td style="background-color:#ea9999;" >goog';
var cleaned = clean(text);
la(cleaned === 'goog', cleaned);
});
it('removes data tag with .', function () {
la(clean('<td something="[.something]">$CL</td>') === '$CL ');
});
it('splits tags into new lines', function () {
var result = clean('<div>F</div><div>BA</div>');
la(result === 'F\nBA ');
});
it('cleans as expected', function () {
var txt = 'foo bar';
var reg = / /g;
var cleaned = txt.replace(reg, '');
la(cleaned === 'foo bar');
});
it('cleans non-breaking spaces', function () {
var txt = 'foo bar';
la(clean(txt) === 'foo bar');
});
it('allows ampersands', function () {
var txt = 'foo & bar';
la(clean(txt) === 'foo & bar');
});
it('handles entire table with comments', function () {
var txt = '\n\n\n<table border="0" cellpadding="0" cellspacing="0" width="65" style="width: 65pt;">' +
'<!--StartFragment-->\n' +
'<colgroup><col width="65" style="width:65pt">' +
'</colgroup><tbody><tr height="15" style="height:15.0pt">' +
' <td height="15" width="65" style="height:15.0pt;width:65pt">one</td>' +
'</tr>' +
'<tr height="15" style="height:15.0pt">' +
'<td height="15" style="height:15.0pt">two</td>' +
'</tr>' +
'<tr height="15" style="height:15.0pt">' +
'<td height="15" style="height:15.0pt">three</td>' +
'</tr>' +
'<tr height="15" style="height:15.0pt">' +
'<td height="15" style="height:15.0pt">four</td>' +
'</tr>' +
'<!--EndFragment-->' +
'</tbody></table>';
var cleaned = clean(txt);
la(cleaned.indexOf('StartFragment') === -1,
'could not clean start HTML comment', cleaned);
la(cleaned.indexOf('EndFragment') === -1,
'could not clean end HTML comment', cleaned);
var strings = cleaned.split(/\s/).filter(check.unemptyString);
la(_.isEqual(strings, ['one', 'two', 'three', 'four']),
'invalid extracted strings', strings);
});
it('handles hairy attributes', function () {
var txt = '<td style="padding:2px 3px 2px 3px;vertical-align:bottom;" ' +
'data-sheets-value="{"1":2 "2":"^GSPC"}">^GSPC';
var cleaned = clean(txt);
la(cleaned === '^GSPC', cleaned);
});
it('cleans multiple hairy tags', function () {
var txt = '<td style="padding:2px 3px 2px 3px;vertical-align:bottom;border-top:1px solid ' +
'#000000;border-right:1px solid #000000;border-bottom:1px solid #000000;border-left:1px ' +
'solid #000000;" data-sheets-value="{"1":2,"2":"F"}">F\n' +
'<td style="padding:2px 3px 2px 3px;vertical-align:bottom;" ' +
'data-sheets-value="{"1":2,"2":"^GSPC"}">^GSPC\n\n' +
'<td style="padding:2px 3px 2px 3px;vertical-align:bottom;background-color:#ea9999;" ' +
'data-sheets-value="{"1":2,"2":"GOOG"}">GOOG\n' +
'<td style="padding:2px 3px 2px 3px;vertical-align:bottom;" ' +
'data-sheets-value="{"1":2,"2":"A"}">A\n\n\n' +
'<td style="padding:2px 3px 2px 3px;vertical-align:bottom;" ' +
'data-sheets-value="{"1":2,"2":"AMZN"}">AMZN\n';
var cleaned = clean(txt);
var ts = cleaned.split('\n').map(_.trim).filter(check.unemptyString);
var expected = ['F', '^GSPC', 'GOOG', 'A', 'AMZN'];
la(_.isEqual(ts, expected), ts);
});
it('removes style tag experiment', function () {
var STYLE_TAG1 = /<style><\/style>/g;
var text1 = 'foo <style></style>';
la(STYLE_TAG1.test(text1), 'found style in text 1');
var STYLE_TAG2 = /<style>[\w\W]*<\/style>/g;
var text2 = 'foo <style>foo</style>';
la(STYLE_TAG2.test(text2), 'found style in text 2');
});
it('removes style tag', function () {
var txt = [
'<style type="text/css">',
'p.p1 {margin: 0.0px 0.0px 0.0px 0.0px; font: 26.0px \'Helvetica Neue\'; color: #000000}',
'</style>',
'<p class="p1">GOOG APPL</p>'
].join('\n');
var cleaned = clean(txt);
console.log('cleaned', cleaned);
var ts = cleaned.split(' ').map(_.trim).filter(check.unemptyString);
var expected = ['GOOG', 'APPL'];
la(_.isEqual(ts, expected), ts);
});
});