giansalex/peru-consult

View on GitHub
src/Peru/Sunat/HtmlParser.php

Summary

Maintainability
A
35 mins
Test Coverage
<?php
/**
 * Created by PhpStorm.
 * User: Giansalex
 * Date: 13/01/2018
 * Time: 21:36.
 */

namespace Peru\Sunat;

use DOMDocument;
use DOMNode;
use DOMNodeList;
use DOMXPath;
use Generator;

/**
 * Class HtmlParser.
 * @deprecated obsoleto desde version 4.4, usar en vez Peru\Sunat\Parser\HtmlRecaptchaParser
 */
final class HtmlParser implements HtmlParserInterface
{
    /**
     * Parse html to dictionary.
     *
     * @param string $html
     *
     * @return array|false
     */
    public function parse(string $html)
    {
        $xp = $this->getXpathFromHtml($html);
        $table = $xp->query('./html/body/table[1]');

        if (0 == $table->length) {
            return false;
        }

        $nodes = $table->item(0)->childNodes;

        return $this->getKeyValues($nodes, $xp);
    }

    private function getKeyValues(DOMNodeList $nodes, DOMXPath $xp): array
    {
        $dic = [];
        foreach ($nodes as $item) {
            /** @var $item DOMNode */
            if ($this->isNotElement($item)) {
                continue;
            }

            $this->setValuesFromNode($xp, $item, $dic);
        }

        return $dic;
    }

    private function setValuesFromNode(DOMXPath $xp, DOMNode $item, &$dic): void
    {
        $isTitle = true;
        $title = '';
        foreach ($item->childNodes as $item2) {
            /** @var $item2 DOMNode */
            if ($this->isNotElement($item2)) {
                continue;
            }

            if ($isTitle) {
                $title = trim($item2->textContent);
                $isTitle = false;
            } else {
                $dic[$title] = $this->getContent($xp, $item2);
                $isTitle = true;
            }
        }
    }

    public static function getXpathFromHtml($html)
    {
        $dom = new DOMDocument();
        $prevState = libxml_use_internal_errors(true);
        $dom->loadHTML($html);
        libxml_clear_errors();
        libxml_use_internal_errors($prevState);

        return new DOMXPath($dom);
    }

    private function getContent(DOMXPath $xp, DOMNode $node)
    {
        $select = $xp->query('./select', $node);
        if ($select->length > 0) {
            $options = $select->item(0)->childNodes;

            return iterator_to_array($this->getValuesFromOption($options));
        }

        return trim($node->textContent);
    }

    private function getValuesFromOption(DOMNodeList $options): Generator
    {
        foreach ($options as $opt) {
            /** @var $opt DOMNode */
            if ('option' != $opt->nodeName) {
                continue;
            }
            yield trim($opt->textContent);
        }
    }

    private function isNotElement(DOMNode $node)
    {
        return XML_ELEMENT_NODE !== $node->nodeType;
    }
}