src/v5/XmlPartReader.js
'use strict';
/**
* find paired tag for a stop node
* @param {string} xmlDoc
* @param {string} tagName
* @param {number} i : start index
*/
function readStopNode(xmlDoc, tagName, i){
const startIndex = i;
// Starting at 1 since we already have an open tag
let openTagCount = 1;
for (; i < xmlDoc.length; i++) {
if( xmlDoc[i] === "<"){
if (xmlDoc[i+1] === "/") {//close tag
const closeIndex = findSubStrIndex(xmlDoc, ">", i, `${tagName} is not closed`);
let closeTagName = xmlDoc.substring(i+2,closeIndex).trim();
if(closeTagName === tagName){
openTagCount--;
if (openTagCount === 0) {
return {
tagContent: xmlDoc.substring(startIndex, i),
i : closeIndex
}
}
}
i=closeIndex;
} else if(xmlDoc[i+1] === '?') {
const closeIndex = findSubStrIndex(xmlDoc, "?>", i+1, "StopNode is not closed.")
i=closeIndex;
} else if(xmlDoc.substr(i + 1, 3) === '!--') {
const closeIndex = findSubStrIndex(xmlDoc, "-->", i+3, "StopNode is not closed.")
i=closeIndex;
} else if(xmlDoc.substr(i + 1, 2) === '![') {
const closeIndex = findSubStrIndex(xmlDoc, "]]>", i, "StopNode is not closed.") - 2;
i=closeIndex;
} else {
const tagData = readTagExp(xmlDoc, i, '>')
if (tagData) {
const openTagName = tagData && tagData.tagName;
if (openTagName === tagName && tagData.tagExp[tagData.tagExp.length-1] !== "/") {
openTagCount++;
}
i=tagData.closeIndex;
}
}
}
}//end for loop
}
/**
* Read closing tag name
* @param {Source} source
* @returns tag name
*/
function readClosingTagName(source){
let text = ""; //temporary data
while(source.canRead()){
let ch = source.readCh();
// if (ch === null || ch === undefined) break;
// source.updateBuffer();
if (ch === ">") return text.trimEnd();
else text += ch;
}
throw new Error(`Unexpected end of source. Reading '${substr}'`);
}
/**
* Read XML tag and build attributes map
* This function can be used to read normal tag, pi tag.
* This function can't be used to read comment, CDATA, DOCTYPE.
* Eg <tag attr = ' some"' attr= ">" bool>
* @param {string} xmlDoc
* @param {number} startIndex starting index
* @returns tag expression includes tag name & attribute string
*/
function readTagExp(parser) {
let inSingleQuotes = false;
let inDoubleQuotes = false;
let i;
let EOE = false;
for (i = 0; parser.source.canRead(i); i++) {
const char = parser.source.readChAt(i);
if (char === "'" && !inDoubleQuotes) {
inSingleQuotes = !inSingleQuotes;
} else if (char === '"' && !inSingleQuotes) {
inDoubleQuotes = !inDoubleQuotes;
} else if (char === '>' && !inSingleQuotes && !inDoubleQuotes) {
// If not inside quotes, stop reading at '>'
EOE = true;
break;
}
}
if(inSingleQuotes || inDoubleQuotes){
throw new Error("Invalid attribute expression. Quote is not properly closed");
}else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '>'");
const exp = parser.source.readStr(i);
parser.source.updateBufferBoundary(i + 1);
return buildTagExpObj(exp, parser)
}
function readPiExp(parser) {
let inSingleQuotes = false;
let inDoubleQuotes = false;
let i;
let EOE = false;
for (i = 0; parser.source.canRead(i) ; i++) {
const currentChar = parser.source.readChAt(i);
const nextChar = parser.source.readChAt(i+1);
if (currentChar === "'" && !inDoubleQuotes) {
inSingleQuotes = !inSingleQuotes;
} else if (currentChar === '"' && !inSingleQuotes) {
inDoubleQuotes = !inDoubleQuotes;
}
if (!inSingleQuotes && !inDoubleQuotes) {
if (currentChar === '?' && nextChar === '>') {
EOE = true;
break; // Exit the loop when '?>' is found
}
}
}
if(inSingleQuotes || inDoubleQuotes){
throw new Error("Invalid attribute expression. Quote is not properly closed in PI tag expression");
}else if(!EOE) throw new Error("Unexpected closing of source. Waiting for '?>'");
if(!parser.options.attributes.ignore){
//TODO: use regex to verify attributes if not set to ignore
}
const exp = parser.source.readStr(i);
parser.source.updateBufferBoundary(i + 1);
return buildTagExpObj(exp, parser)
}
function buildTagExpObj(exp, parser){
const tagExp = {
tagName: "",
selfClosing: false
};
let attrsExp = "";
if(exp[exp.length -1] === "/") tagExp.selfClosing = true;
//separate tag name
let i = 0;
for (; i < exp.length; i++) {
const char = exp[i];
if(char === " "){
tagExp.tagName = exp.substring(0, i);
attrsExp = exp.substring(i + 1);
break;
}
}
//only tag
if(tagExp.tagName.length === 0 && i === exp.length)tagExp.tagName = exp;
tagExp.tagName = tagExp.tagName.trimEnd();
if(!parser.options.attributes.ignore && attrsExp.length > 0){
parseAttributesExp(attrsExp,parser)
}
return tagExp;
}
const attrsRegx = new RegExp('([^\\s=]+)\\s*(=\\s*([\'"])([\\s\\S]*?)\\3)?', 'gm');
function parseAttributesExp(attrStr, parser) {
const matches = getAllMatches(attrStr, attrsRegx);
const len = matches.length; //don't make it inline
for (let i = 0; i < len; i++) {
let attrName = parser.processAttrName(matches[i][1]);
let attrVal = parser.replaceEntities(matches[i][4] || true);
parser.outputBuilder.addAttribute(attrName, attrVal);
}
}
const getAllMatches = function(string, regex) {
const matches = [];
let match = regex.exec(string);
while (match) {
const allmatches = [];
allmatches.startIndex = regex.lastIndex - match[0].length;
const len = match.length;
for (let index = 0; index < len; index++) {
allmatches.push(match[index]);
}
matches.push(allmatches);
match = regex.exec(string);
}
return matches;
};
module.exports = {
readStopNode: readStopNode,
readClosingTagName: readClosingTagName,
readTagExp: readTagExp,
readPiExp: readPiExp,
}