src/model/json-ld/to-json-ld.ts
import {DOMParser} from "xmldom";
import {Abstract, RechtspraakMetadata} from "../rechtspraak_metadata";
import {isStringArray, mustHaveTextAndAttributes, throwIfNotString} from "../../util/validations";
import refineMetadata from "../convert-to-typed";
// import convertToHtml from "./to-html";
function textValueWithCommonLabel(obj: any, label: (string | string[]), keyName?: string) {
let thr;
if (isStringArray(label)) {
thr = label.reduce((cum, lab) => {
return cum && !(obj["@attributes"] && lab === obj["@attributes"]["rdfs:label"]);
}, true);
} else
thr = !obj["@attributes"] || obj["@attributes"]["rdfs:label"] !== label;
if (thr)
throw new Error(`Expected ${keyName ? keyName : JSON.stringify(obj)} to look different than ${JSON.stringify(obj)}`);
return obj["#text"];
}
function setTextValueWithCommonLabel(obj: any, key: string, label: (string | string[])) {
if (!obj[key]) return;
mustHaveTextAndAttributes(obj[key], true, "rdfs:label");
obj[key] = textValueWithCommonLabel(obj[key], label, key);
}
function forceToArray(possibleList: any): any[] {
if (possibleList === null || possibleList === undefined) return [];
else if (Object.prototype.toString.call(possibleList) === "[object Array]") return possibleList;
else return [possibleList];
}
function forcePropertyToArray(obj: any, propname: string): void {
if (!!obj[propname])
obj[propname] = forceToArray(obj[propname]);
}
function forEach(nodes: NodeList, f: (s: Node) => any) {
for (let i = 0; i < nodes.length; i++) {
let s2 = nodes.item(i);
if (s2 === null)throw new Error();
else f(s2);
}
}
function extractAbstract(item: Node): (Abstract | undefined) {
if (item.textContent !== null) {
const value: string = item.textContent.trim().replace(/\s\s+/g, " ");
if (value.length > 1 && value !== "-") return {
abstractXml: "<?xml version=\"1.0\" encoding=\"utf-8\"?>" + item.toString(),
"@value": value
};
}
}
function flattenOutRootElement(depth: number, obj: any) {
if (depth === 0 && obj["open-rechtspraak"]) {
if (Object.keys(obj).length > 2
||
(Object.keys(obj).length === 2 && !!obj["#text"])
) {
console.error(Object.keys(obj));
throw new Error("Expected 1 key in 'open-rechtspraak'");
}
obj = obj["open-rechtspraak"];
}
return obj;
}
/**
* Changes XML to JSON
* @param parent
* @param depth node depth
* @returns {{}}
*/
export function xmlToJson(parent: Node, depth: number = 0): any {
// Create the return object
let obj: any = {}; // TODO formalize type?
if (parent.nodeType === 1) { // element Node.ELEMENT_NODE
// do attributes
let parentElement = parent as Element;
if (parentElement.attributes.length > 0) {
obj["@attributes"] = {};
for (let j = 0; j < parentElement.attributes.length; j++) {
const attribute = parentElement.attributes.item(j);
if (attribute) {
obj["@attributes"][attribute.nodeName] = attribute.nodeValue;
}
}
}
} else if (parent.nodeType === 3) { // Node.TEXT_NODE
if (parent.nodeValue && parent.nodeValue.trim().length > 0) {
// text
obj = parent.textContent;
} else {
obj = undefined;
}
}
if (parent.hasChildNodes()) {
forEach(parent.childNodes, (item: Node) => {
if (item.nodeType !== 7) { // Processing Instruction
const childNodeName: string = item.nodeName;
//if (nodeName.match(/inhoudsindicatie/))console.log(depth, xml.nodeName);
if (depth === 1
&& childNodeName === "inhoudsindicatie"
&& parent.nodeName === "open-rechtspraak") {
obj.abstract = extractAbstract(item);
} else {
if (childNodeName.match(/uitspraak|conclusie/)) {
if (!!item.textContent)
obj.textContent = item.textContent.trim().replace(/\s\s+/g, " ");
} else {
if (typeof(obj[childNodeName]) === "undefined") {
// child doesn't exist yet
obj[childNodeName] = xmlToJson(item, depth + 1);
} else {
// Add child to array
if (typeof(obj[childNodeName].push) === "undefined") {
const old = obj[childNodeName];
obj[childNodeName] = [];
if (!(old && old.trim && old.trim().length <= 0)) obj[childNodeName].push(old);
}
// text
if (!(item.nodeType === 3 && (!item.nodeValue || item.nodeValue.trim().length <= 0))) {
obj[childNodeName].push(xmlToJson(item, depth + 1));
}
}
}
}
}
});
if (obj["#text"]) {
if (obj["#text"].push)
obj["#text"] = obj["#text"].join("");
obj["#text"] = obj["#text"].trim().replace(/\s+/g, " ").trim();
if (obj["#text"].length <= 0)
obj["#text"] = undefined;
}
}
////
// Specific interventions
////
if (obj != undefined) {
obj = flattenOutRootElement(depth, obj);
if (depth === 3) {
forcePropertyToArray(obj, "psi:procedure");
forcePropertyToArray(obj, "dcterms:references");
forcePropertyToArray(obj, "dcterms:relation");
forcePropertyToArray(obj, "dcterms:isReplacedBy");
forcePropertyToArray(obj, "dcterms:replaces");
if (obj["dcterms:replaces"]) obj["dcterms:replaces"] = obj["dcterms:replaces"].map(
(replaces: any) => textValueWithCommonLabel(replaces, "Vervangt")
);
setTextValueWithCommonLabel(obj, "dcterms:issued", "Publicatiedatum");
setTextValueWithCommonLabel(obj, "dcterms:spatial", "Zittingsplaats");
setTextValueWithCommonLabel(obj, "dcterms:date", ["Uitspraakdatum", "Datum genomen"]);
forcePropertyToArray(obj, "psi:zaaknummer");
if (!!obj["psi:zaaknummer"])
obj["psi:zaaknummer"] = obj["psi:zaaknummer"].map(
(z: any) => textValueWithCommonLabel(z, "Zaaknr")
);
if (obj["dcterms:subject"]) obj["dcterms:subject"] = forceToArray(obj["dcterms:subject"]);
// if (obj['psi:zaaknummer']) {
// TODO parse in app
// obj['psi:zaaknummer'] = obj['psi:zaaknummer'].split(/[,;]|(?: en )/).map(s=>s.trim()).filter(s=>s !== '');
// obj['psi:zaaknummer'].filter(s=>!s.match(/^(?:[A-Z]+\s*)?[0-9]+\/[0-9-]+(?:\s[A-Z])?$/)).map(s =>
// console.error(obj['dcterms:identifier'] + ": Possible multiple values '" + s + "'")
// )
// }
if (obj["dcterms:hasVersion"]) {
mustHaveTextAndAttributes(obj["dcterms:hasVersion"], false, "rdfs:label", "resourceIdentifier");
let hasVersionList = obj["dcterms:hasVersion"]["rdf:list"];
if (!!hasVersionList)
obj["dcterms:hasVersion"] = forceToArray(hasVersionList["rdf:li"]).map(s => throwIfNotString(s, "dcterms:hasVersion"));
}
if (obj["dcterms:publisher"])
obj["dcterms:publisher"] = forceToArray(obj["dcterms:publisher"]);
}
if (Object.keys(obj).length === 1 && !!obj["#text"]) obj = obj["#text"];
}
////
//
////
if (typeof obj === "object" && obj["@attributes"])
if (Object.keys(obj["@attributes"]).filter(k => !!obj["@attributes"][k]).length <= 0)
obj["@attributes"] = undefined;
return obj;
}
export function toJsonLd(xml: Document): RechtspraakMetadata {
const doc = xmlToJson(
xml, 0
);
return refineMetadata(doc);
}
export function toJsonLdFromXmlString(xmlString: string): RechtspraakMetadata {
let domParser: DOMParser = new DOMParser();
let xml = domParser.parseFromString(xmlString);
return toJsonLd(xml);
}
export default toJsonLdFromXmlString;