scriptotek/php-marc

View on GitHub
src/Importers/XmlImporter.php

Summary

Maintainability
A
30 mins
Test Coverage
<?php

namespace Scriptotek\Marc\Importers;

use File_MARCXML;
use Scriptotek\Marc\Collection;
use Scriptotek\Marc\Exceptions\RecordNotFound;
use Scriptotek\Marc\Exceptions\XmlException;
use Scriptotek\Marc\Factory;
use SimpleXMLElement;

class XmlImporter
{
    protected $factory;

    /* var SimpleXMLElement */
    protected $source;

    /**
     * XmlImporter constructor.
     *
     * @param string|SimpleXMLElement $data  Filename, XML string or SimpleXMLElement object
     * @param string $ns  URI or prefix of the namespace
     * @param bool $isPrefix TRUE if $ns is a prefix, FALSE if it's a URI; defaults to FALSE
     * @param string $factory  (optional) Object factory, probably no need to set this outside testing.
     */
    public function __construct($data, $ns = '', $isPrefix = false, $factory = null)
    {
        $this->factory = isset($factory) ? $factory : new Factory();

        if (is_a($data, SimpleXMLElement::class)) {
            $this->source = $data;
            return;
        }

        if (strlen($data) < 256 && file_exists($data)) {
            $data = file_get_contents($data);
        }

        // Store errors internally so that we can fetch them with libxml_get_errors() later
        libxml_use_internal_errors(true);

        $this->source = simplexml_load_string($data, 'SimpleXMLElement', 0, $ns, $isPrefix);
        if (false === $this->source) {
            throw new XmlException(libxml_get_errors());
        }
    }

    public function getMarcNamespace($namespaces)
    {
        foreach ($namespaces as $prefix => $ns) {
            if ($ns == 'info:lc/xmlns/marcxchange-v1') {
                return [$prefix, $ns];
            } elseif ($ns == 'http://www.loc.gov/MARC21/slim') {
                return [$prefix, $ns];
            }
        }

        return ['', ''];
    }

    public function getRecords()
    {
        $this->source->registerXPathNamespace('m', 'http://www.loc.gov/MARC21/slim');
        $this->source->registerXPathNamespace('x', 'info:lc/xmlns/marcxchange-v1');

        // If root node is record:
        if ($this->source->getName() == 'record') {
            return [$this->source];
        }

        $marcRecords = $this->source->xpath('.//x:record');
        if (count($marcRecords)) {
            return $marcRecords;
        }
        $marcRecords = $this->source->xpath('.//m:record');
        if (count($marcRecords)) {
            return $marcRecords;
        }
        $marcRecords = $this->source->xpath('.//record');
        if (count($marcRecords)) {
            return $marcRecords;
        }

        return [];
    }

    public function getFirstRecord()
    {
        $records = $this->getRecords();
        if (!count($records)) {
            throw new RecordNotFound();
        }

        $record = $records[0];

        list($prefix, $ns) = $this->getMarcNamespace($record->getNamespaces(true));

        $parser = $this->factory->make('File_MARCXML', $record, File_MARCXML::SOURCE_SIMPLEXMLELEMENT, $ns);

        return (new Collection($parser))->$this->getFirstRecord();
    }

    public function getCollection(): Collection
    {
        $records = $this->getRecords();
        if (!count($records)) {
            return new Collection();
        }

        list($prefix, $ns) = $this->getMarcNamespace($records[0]->getNamespaces(true));

        $pprefix = empty($prefix) ? '' : "$prefix:";

        $records = array_map(function (SimpleXMLElement $record) {
            $x = $record->asXML();

            // Strip away XML declaration.
            // Tried LIBXML_NOXMLDECL first, but didn't work,
            // https://bugs.php.net/bug.php?id=50989
            $x = trim(preg_replace('/^\<\?xml.*?\?\>/', '', $x));

            return $x;
        }, $records);

        $nsDef = '';
        if (!empty($ns)) {
            if (empty($prefix)) {
                $nsDef = " xmlns=\"$ns\"";
            } else {
                $nsDef = " xmlns:$prefix=\"$ns\"";
            }
        }
        $marcCollection = '<?xml version="1.0" encoding="UTF-8"?>' .
            '<' . $pprefix . 'collection' . $nsDef . '>' .
            implode('', $records) .
            '</' . $pprefix . 'collection>';

        $parser = $this->factory->make('File_MARCXML', $marcCollection, File_MARCXML::SOURCE_STRING, $prefix, true);

        return new Collection($parser);
    }
}