NaturalIntelligence/fast-xml-parser

View on GitHub
src/v5/EntitiesParser.js

Summary

Maintainability
F
5 days
Test Coverage
const ampEntity = { regex: /&(amp|#38|#x26);/g, val : "&"};
const htmlEntities = {
    "space": { regex: /&(nbsp|#160);/g, val: " " },
    // "lt" : { regex: /&(lt|#60);/g, val: "<" },
    // "gt" : { regex: /&(gt|#62);/g, val: ">" },
    // "amp" : { regex: /&(amp|#38);/g, val: "&" },
    // "quot" : { regex: /&(quot|#34);/g, val: "\"" },
    // "apos" : { regex: /&(apos|#39);/g, val: "'" },
    "cent" : { regex: /&(cent|#162);/g, val: "¢" },
    "pound" : { regex: /&(pound|#163);/g, val: "£" },
    "yen" : { regex: /&(yen|#165);/g, val: "¥" },
    "euro" : { regex: /&(euro|#8364);/g, val: "€" },
    "copyright" : { regex: /&(copy|#169);/g, val: "©" },
    "reg" : { regex: /&(reg|#174);/g, val: "®" },
    "inr" : { regex: /&(inr|#8377);/g, val: "₹" },
    "num_dec": { regex: /&#([0-9]{1,7});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 10)) },
    "num_hex": { regex: /&#x([0-9a-fA-F]{1,6});/g, val : (_, str) => String.fromCharCode(Number.parseInt(str, 16)) },
};

class EntitiesParser{
    constructor(replaceHtmlEntities) {
      this.replaceHtmlEntities = replaceHtmlEntities;
      this.docTypeEntities = {};
      this.lastEntities = {
        "apos" : { regex: /&(apos|#39|#x27);/g, val : "'"},
        "gt" : { regex: /&(gt|#62|#x3E);/g, val : ">"},
        "lt" : { regex: /&(lt|#60|#x3C);/g, val : "<"},
        "quot" : { regex: /&(quot|#34|#x22);/g, val : "\""},
      };
    }

    addExternalEntities(externalEntities){
        const entKeys = Object.keys(externalEntities);
        for (let i = 0; i < entKeys.length; i++) {
          const ent = entKeys[i];
          this.addExternalEntity(ent,externalEntities[ent])
        }
    }
    addExternalEntity(key,val){
      validateEntityName(key);
      if(val.indexOf("&") !== -1) {
        reportWarning(`Entity ${key} is not added as '&' is found in value;`)
        return;
      }else{
        this.lastEntities[ent] = {
          regex: new RegExp("&"+key+";","g"),
          val : val
        }
      }
    }

    addDocTypeEntities(entities){
        const entKeys = Object.keys(entities);
        for (let i = 0; i < entKeys.length; i++) {
          const ent = entKeys[i];
          this.docTypeEntities[ent] = {
             regex: new RegExp("&"+ent+";","g"),
             val : entities[ent]
          }
        }
    }

    parse(val){
        return this.replaceEntitiesValue(val)
    }

    /**
     * 1. Replace DOCTYPE entities 
     * 2. Replace external entities 
     * 3. Replace HTML entities if asked
     * @param {string} val 
     */
    replaceEntitiesValue(val){
        if(typeof val === "string" && val.length > 0){
            for(let entityName in this.docTypeEntities){
                const entity = this.docTypeEntities[entityName];
                val = val.replace( entity.regx, entity.val);
              }
              for(let entityName in this.lastEntities){
                const entity = this.lastEntities[entityName];
                val = val.replace( entity.regex, entity.val);
              }
              if(this.replaceHtmlEntities){
                for(let entityName in htmlEntities){
                  const entity = htmlEntities[entityName];
                  val = val.replace( entity.regex, entity.val);
                }
              }
            val = val.replace( ampEntity.regex, ampEntity.val);
        }
        return val;
    }
};

//an entity name should not contains special characters that may be used in regex
//Eg !?\\\/[]$%{}^&*()<>
const specialChar = "!?\\\/[]$%{}^&*()<>|+";

function validateEntityName(name){
    for (let i = 0; i < specialChar.length; i++) {
        const ch = specialChar[i];
        if(name.indexOf(ch) !== -1) throw new Error(`Invalid character ${ch} in entity name`);
    }
    return name;
}

module.exports = EntitiesParser;