src/v5/EntitiesParser.js
const ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
const htmlEntities = {
"space": { regex: /&(nbsp|#160);/g, val: " " },
// "lt" : { regex: /&(lt|#60);/g, val: "<" },
// "gt" : { regex: /&(gt|#62);/g, val: ">" },
// "amp" : { regex: /&(amp|#38);/g, val: "&" },
// "quot" : { regex: /&(quot|#34);/g, val: "\"" },
// "apos" : { regex: /&(apos|#39);/g, val: "'" },
"cent" : { regex: /&(cent|#162);/g, val: "¢" },
"pound" : { regex: /&(pound|#163);/g, val: "£" },
"yen" : { regex: /&(yen|#165);/g, val: "¥" },
"euro" : { regex: /&(euro|#8364);/g, val: "€" },
"copyright" : { regex: /&(copy|#169);/g, val: "©" },
"reg" : { regex: /&(reg|#174);/g, val: "®" },
"inr" : { regex: /&(inr|#8377);/g, val: "₹" },
"num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
"num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
};
class EntitiesParser{
constructor(replaceHtmlEntities) {
this.replaceHtmlEntities = replaceHtmlEntities;
this.docTypeEntities = {};
this.lastEntities = {
"apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
"gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
"lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
"quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
};
}
addExternalEntities(externalEntities){
const entKeys = Object.keys(externalEntities);
for (let i = 0; i < entKeys.length; i++) {
const ent = entKeys[i];
this.addExternalEntity(ent,externalEntities[ent])
}
}
addExternalEntity(key,val){
validateEntityName(key);
if(val.indexOf("&") !== -1) {
reportWarning(`Entity ${key} is not added as '&' is found in value;`)
return;
}else{
this.lastEntities[ent] = {
regex: new RegExp("&"+key+";","g"),
val : val
}
}
}
addDocTypeEntities(entities){
const entKeys = Object.keys(entities);
for (let i = 0; i < entKeys.length; i++) {
const ent = entKeys[i];
this.docTypeEntities[ent] = {
regex: new RegExp("&"+ent+";","g"),
val : entities[ent]
}
}
}
parse(val){
return this.replaceEntitiesValue(val)
}
/**
* 1. Replace DOCTYPE entities
* 2. Replace external entities
* 3. Replace HTML entities if asked
* @param {string} val
*/
replaceEntitiesValue(val){
if(typeof val === "string" && val.length > 0){
for(let entityName in this.docTypeEntities){
const entity = this.docTypeEntities[entityName];
val = val.replace( entity.regx, entity.val);
}
for(let entityName in this.lastEntities){
const entity = this.lastEntities[entityName];
val = val.replace( entity.regex, entity.val);
}
if(this.replaceHtmlEntities){
for(let entityName in htmlEntities){
const entity = htmlEntities[entityName];
val = val.replace( entity.regex, entity.val);
}
}
val = val.replace( ampEntity.regex, ampEntity.val);
}
return val;
}
};
//an entity name should not contains special characters that may be used in regex
//Eg !?\\\/[]$%{}^&*()<>
const specialChar = "!?\\\/[]$%{}^&*()<>|+";
function validateEntityName(name){
for (let i = 0; i < specialChar.length; i++) {
const ch = specialChar[i];
if(name.indexOf(ch) !== -1) throw new Error(`Invalid character ${ch} in entity name`);
}
return name;
}
module.exports = EntitiesParser;