codeformunich/Muenchen-Transparent

View on GitHub
protected/RISParser/AntragData.php

Summary

Maintainability
C
7 hrs
Test Coverage
<?php

declare(strict_types=1);

class AntragData
{
    public int $id;
    public string $antragsnummer;
    public string $status;
    public ?int $baNr = null;
    public ?int $baId = null;
    public string $title;
    public string $wahlperiode;
    public ?\DateTime $gestelltAm;
    public ?\DateTime $registriertAm;
    public ?\DateTime $bearbeitungsfrist;
    public ?\DateTime $erledigtAm;
    public ?string $art;
    public ?string $typ;
    public ?string $bearbeitungsart;
    public ?int $referatId = null;
    public ?string $referatName = null;

    /** @var string[] */
    public array $initiativeNamen;

    /** @var string[] */
    public array $gestelltVon;

    /** @var DokumentLink[] */
    public array $dokumentLinks;

    /** @var StadtratsantragErgebnis[] */
    public array $ergebnisse;

    public static function parseFromHtml(string $html, ?int $idFallback = null): ?self
    {
        if (!preg_match('/<section class="card">.*<div><h2>Betreff<\/h2><\/div>.*<div class="card-body">\s*<div[^>]*>(?<title>[^<]*)<\/div>/siuU', $html, $match)) {
            throw new ParsingException('Not found: title');
        }
        $entry = new self();
        $entry->title = html_entity_decode($match['title'], ENT_COMPAT, 'UTF-8');

        if (!preg_match('/<h1[^>]*>.*(StR|BA)-(Antrag|Anfrage|Initiative) (?<nummer>[^<]*) <span[^>]*><span>\((?<status>[^)]*)\)<\/span>/siuU', $html, $match)) {
            throw new ParsingException('Not found: antragsnummer / status');
        }
        $entry->antragsnummer = str_replace(' ', '', $match['nummer']);
        $entry->status = $match['status'];

        if (preg_match('/<a[^>]*href="\.\/(?<id>\d+)\?/siu', $html, $match)) {
            $entry->id = intval($match['id']);
        } elseif (preg_match('/<a[^>]*href="\.\/bavorgaenge\/(?<id>\d+)"/siu', $html, $match)) {
            $entry->id = intval($match['id']);
        } elseif ($idFallback) {
            $entry->id = $idFallback;
        } else {
            throw new ParsingException('Not found: id');
        }

        if (!preg_match('/<div[^>]*>Wahlperiode:<\/div>\s*<div[^>]*>(?<wahlperiode>\d+-\d+)<\/div>/siuU', $html, $match)) {
            throw new ParsingException('Not found: wahlperiode');
        }
        $entry->wahlperiode = $match['wahlperiode'];

        if (preg_match('/Bezirksausschuss(<\/span>)?:<\/div>\s*<div[^>]*>\s*<a[^>]*gremium\/detail\/(?<baId>\d+)[^\d][^>]*>(?<baNr>\d+ -)/siuU', $html, $match)) {
            $entry->baId = intval($match['baId']);
            $entry->baNr = intval($match['baNr']);
        }

        if (preg_match('/<div[^>]*>Gestellt am:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
            $entry->gestelltAm = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
        } else {
            $entry->gestelltAm = null;
        }
        if (preg_match('/<div[^>]*>Registriert am:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
            $entry->registriertAm = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
        } elseif (preg_match('/<div[^>]*>TO aufgenommen am:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
            $entry->registriertAm = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
        } else {
            $entry->registriertAm = null;
        }
        if (preg_match('/<div[^>]*>Bearbeitungs-Frist:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
            $entry->bearbeitungsfrist = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
        } else {
            $entry->bearbeitungsfrist = null;
        }
        if (preg_match('/<div[^>]*>Erledigt am:<\/div>\s*<div[^>]*>(?<date>\d+\.\d+\.\d+)<\/div>/siuU', $html, $match)) {
            $entry->erledigtAm = (\DateTime::createFromFormat('d.m.Y', $match['date']))->setTime(0, 0, 0);
        } else {
            $entry->erledigtAm = null;
        }

        if (preg_match('/<div[^>]*>Art:<\/div>\s*<div[^>]*>\s*(<img[^>]*>)?\s*<span>(?<art>[^<]*)<\/span>/siuU', $html, $match)) {
            $entry->art = $match['art'];
        } else {
            $entry->art = null;
        }
        if (preg_match('/<div[^>]*>Bearbeitungs-Art:<\/div>\s*<div[^>]*>(?<value>[^<]*)<\/div>/siuU', $html, $match)) {
            $entry->bearbeitungsart = trim($match['value']);
        } else {
            $entry->bearbeitungsart = null;
        }
        if (preg_match('/<div[^>]*>Typ:<\/div>\s*<div[^>]*>(?<value>[^<]*)<\/div>/siuU', $html, $match)) {
            $entry->typ = trim($match['value']);
            $entry->typ = str_replace('Aenderung', 'Änderung', $entry->typ);
        } else {
            $entry->typ = null;
        }


        if (preg_match('/<div[^>]*>Gestellt von:<\/div>\s*<div[^>]*>(?<value>[^<]*)<\/div>/siuU', $html, $match)) {
            $entry->gestelltVon = array_filter(array_map('trim', explode(',', $match['value'])));
        } else {
            $entry->gestelltVon = [];
        }
        if (preg_match('/<div[^>]*>Initiative:<\/div>\s*<div[^>]*>(?<value>[^<]*)<\/div>/siuU', $html, $match)) {
            $entry->initiativeNamen = array_filter(array_map('trim', explode(',', $match['value'])));
        } else {
            $entry->initiativeNamen = [];
        }

        if (preg_match('/Zuständiges Referat:<\/div>\s*<div[^>]*>\s*<a[^>]*organisationseinheit\/detail\/(?<id>\d+)"[^>]*>(?<title>[^<]*)<\/a>/siuU', $html, $match)) {
            $entry->referatId = intval($match['id']);
            $entry->referatName = $match['title'];
        }

        $entry->dokumentLinks = [];
        $htmlPart = explode('<h2>Ergebnisse</h2>', $html);
        //preg_match_all('/<input[^>]*pdfselector[^>]*pdfid="(?<id>\d+)"[^>]*pdfname="(?<name>[^"]*)"/siu', $htmlPart[0], $matches);
        preg_match_all('/<a class="downloadlink" href="\.\.\/\.\.(?<url>\/dokument\/v\/(?<id>\d+))"[^>]*>(?<filename>(?<title>[^<]*)\.[^<.]*)</siuU', $htmlPart[0], $matches);
        for ($i = 0; $i < count($matches['id']); $i++) {
            $link = new DokumentLink();
            $link->filename = $matches['filename'][$i];
            $link->title = $matches['title'][$i];
            $link->id = intval($matches['id'][$i]);
            $link->url = $matches['url'][$i];
            $entry->dokumentLinks[] = $link;
        }


        $parts = explode('<h2>Ergebnisse</h2>', $html);
        $parts = explode('</section>', $parts[1]);
        preg_match_all('/<a [^>]*href="[^"]*detail\/(?<id>\d+)"[^>]*>(?<date>\d+\.\d+\.\d+)<\/a>/siuU', $parts[0], $matches);
        $entry->ergebnisse = [];
        for ($i = 0; $i < count($matches['id']); $i++) {
            $ergebnis = new StadtratsantragErgebnis();
            $ergebnis->sitzungId = intval($matches['id'][$i]);
            $ergebnis->sitzungAm = (\DateTime::createFromFormat('d.m.Y', $matches['date'][$i]))->setTime(0, 0, 0);
            $entry->ergebnisse[] = $ergebnis;
        }

        return $entry;
    }
}