jeresig/pharos-images

View on GitHub
converters/marburg.js

Summary

Maintainability
C
1 day
Test Coverage
"use strict";

const concat = require("concat-stream");
const libxmljs = require("libxmljs");

const types = {
    "Gemälde": "painting",
    "Malerei": "painting",
    "Malerei?": "painting",
    "Zeichenkunst": "drawing",
    "Angewandte Kunst": "decorative arts",
    "Skulptur": "sculpture",
    "Architektur": "architecture",
    "Altar": "sculpture",
};

const propMap = {
    id: ["recordInfoLink", (id) => id.replace(/^.*[/]obj/, "")],
    url: "recordInfoLink",
    title: "titleSet/appellationValue[@pref='preferred']",
    objectType: [
        "classification[@type='Gattung']/term[@addedSearchTerm='yes']",
        (val, getByTagName) => {
            const result = val || getByTagName("classification/term") ||
                getByTagName("objectWorkType/term[@addedSearchTerm='yes']") ||
                getByTagName("objectWorkType/term");
            // Return undefined for types that aren't found
            return types[result];
        },
    ],
    dates: {
        every: "eventDate/displayDate",
        data: (value) => value.replace(/^um /, "ca "),
    },
    // NOTE(jeresig): There are multiple mediums represented, we don't have a
    // good way to combine them.
    medium: "termMaterialsTech//term",
    dimensions: {
        every: "measurementsSet",
        data: (value, getByTagName) => {
            return (getByTagName("measurementValue") +
                getByTagName("measurementUnit")).replace(/,/g, ".");
        },
    },
    locations: {
        every: "repositorySet",
        data: (val, getByTagName) => {
            const city = getByTagName(
                "repositoryLocation/namePlaceSet/appellationValue");
            const name = getByTagName(
                "repositoryName/legalBodyName/appellationValue") || city;
            const country = getByTagName(
                "partOfPlace[@politicalEntity='Staat']//appellationValue");
            if (name || city || country) {
                return {name, city, country};
            }
        },
    },
    artists: {
        every: "actorInRole/actor[@type='person']",
        data: (val, getByTagName) => {
            const name = getByTagName(
                "nameActorSet/appellationValue[@pref='preferred']") ||
                getByTagName("nameActorSet/appellationValue");
            const pseudonym = getByTagName(
                "nameActorSet/appellationValue[@pref='alternative']");
            const startDate = getByTagName("vitalDatesActor/earliestDate");
            const endDate = getByTagName("vitalDatesActor/latestDate");
            let dates;
            if (startDate || endDate) {
                dates = [{
                    start: parseFloat(startDate) || undefined,
                    end: parseFloat(endDate) || undefined,
                }];
            }
            if (name) {
                return {
                    name,
                    pseudonym,
                    dates,
                };
            }
        },
    },
    images: {
        every: "resourceRepresentation[@type='image_full']/linkResource",
        data: (val) => `${val.replace(/^.*[/]/, "")}.jpg`,
    },
    categories: {
        every: "subjectConcept/term[@pref='preferred']",
        data: (val) => val,
    },
};

const searchByProps = function(root, propMap) {
    const results = {};

    const getByTagName = (name) => {
        const node = root.get(`.//${name}`);
        if (node) {
            return (node.value ?
                node.value() :
                node.text());
        }
    };

    if (typeof propMap === "function") {
        return propMap(root.text(), getByTagName);
    }

    for (const propName in propMap) {
        let searchValue = propMap[propName];
        const hasFilter = Array.isArray(searchValue);

        if (hasFilter) {
            searchValue = searchValue[0];
        }

        if (typeof searchValue === "string") {
            if (searchValue === ".") {
                results[propName] = root.text();

            } else {
                results[propName] = getByTagName(searchValue);
            }

            if (hasFilter) {
                results[propName] =
                    propMap[propName][1](results[propName], getByTagName);
            }

        } else if (typeof searchValue === "object") {
            if (searchValue.every) {
                const matches = root.find(`.//${searchValue.every}`);
                results[propName] = matches.map(
                    (node) => searchByProps(node, searchValue.data))
                    .filter((val) => !!val);
            } else {
                results[propName] = searchByProps(root, searchValue);
            }
        }
    }

    return results;
};

const combine = (root, match, props) => {
    props.forEach((name) => {
        root[name] = root[name].concat(match[name]);
    });
};

module.exports = {
    files: [
        "A LIDO-formatted XML file.",
    ],

    processFiles(fileStreams, callback) {
        const byId = {};

        fileStreams[0].pipe(concat((fileData) => {
            try {
                const xmlDoc = libxmljs.parseXml(
                    fileData.toString("utf8").replace(/lido:/g, ""));
                const matches = xmlDoc.find("//lido")
                    .map((node) => searchByProps(node, propMap));

                matches.forEach((match) => {
                    if (match.id in byId) {
                        if (match.images.length > 0) {
                            combine(byId[match.id], match,
                                ["images", "dimensions", "categories"]);
                        }
                    } else {
                        Object.assign(match, {lang: "de"});
                        byId[match.id] = match;
                    }
                });

                const results = Object.keys(byId)
                    .map((id) => byId[id])
                    .filter((match) => match.images.length > 0);
                callback(null, results);
            } catch (e) {
                callback(e);
            }
        }));
    },
};