longitude-one/geo-parser

View on GitHub
lib/LongitudeOne/Geo/String/Parser.php

Summary

Maintainability
B
5 hrs
Test Coverage
A
100%
<?php

/**
 * This file is part of the LongitudeOne GeoParser project.
 *
 * PHP 8.1 | 8.2 | 8.3
 *
 * Copyright LongitudeOne - Alexandre Tranchant - Derek J. Lambert.
 * Copyright 2024.
 *
 */

namespace LongitudeOne\Geo\String;

use Doctrine\Common\Lexer\Token;
use LongitudeOne\Geo\String\Exception\RangeException;
use LongitudeOne\Geo\String\Exception\UnexpectedValueException;

/**
 * Parser for geographic coordinate strings.
 */
class Parser
{
    /**
     * @var string original input string
     */
    private string $input;

    /**
     * @var Lexer lexer doctrine instance
     */
    private Lexer $lexer;

    /**
     * Cardinal direction can be Lexer::CARDINAL_LAT or Lexer::CARDINAL_LON.
     *
     * @var int|null next cardinal direction token type when present
     */
    private ?int $nextCardinal;

    /**
     * Symbol can be Lexer::T_APOSTROPHE, Lexer::T_QUOTE, or Lexer::T_DEGREE.
     *
     * @var int|false|null next symbol token type when present
     */
    private int|false|null $nextSymbol;

    /**
     * Constructor.
     *
     * Setup up instance properties
     */
    public function __construct(string|int|null $input = null)
    {
        $this->lexer = new Lexer();

        if (null !== $input) {
            $this->input = (string) $input;
        }
    }

    /**
     * Parse input string.
     *
     * @return float|int|array<int|float>
     */
    public function parse(string|int|null $input = null): float|int|array
    {
        if (null !== $input) {
            $this->input = (string) $input;
        }

        $this->nextCardinal = null;
        $this->nextSymbol = null;

        $this->lexer->setInput($this->input);

        // Move Lexer to first token
        $this->lexer->moveNext();

        // Parse and return value
        return $this->point();
    }

    /**
     * Match cardinal direction and return sign.
     *
     * @throws RangeException
     */
    private function cardinal(int|float $value): int|float
    {
        // If the cardinal direction was not on the previous coordinate, it can be anything
        if (null === $this->nextCardinal) {
            $this->nextCardinal = Lexer::T_CARDINAL_LON === $this->lexer->lookahead?->type ? Lexer::T_CARDINAL_LON : Lexer::T_CARDINAL_LAT;
        }

        // Match the cardinal direction
        /** @var string $cardinal N, W, S, E, or n, w, s, e */
        $cardinal = $this->match($this->nextCardinal);
        // By default, don't change sign
        $sign = 1;
        // Define value range
        $range = 0;

        switch (strtolower($cardinal)) {
            case 's':
                // Southern latitudes are negative
                $sign = -1;
                // no break
            case 'n':
                // Set requirement for second coordinate
                $this->nextCardinal = Lexer::T_CARDINAL_LON;
                // Latitude values are +/- 90
                $range = 90;
                break;
            case 'w':
                // Western longitudes are negative
                $sign = -1;
                // no break
            case 'e':
                // Set requirement for second coordinate
                $this->nextCardinal = Lexer::T_CARDINAL_LAT;
                // Longitude values are +/- 180
                $range = 180;
                break;
        }

        // Throw exception if value is out of range
        if ($value > $range) {
            $code = RangeException::LATITUDE_OUT_OF_RANGE;
            if (180 === $range) {
                $code = RangeException::LONGITUDE_OUT_OF_RANGE;
            }

            throw new RangeException($this->input, $code);
        }

        // Return value with sign
        return $value * $sign;
    }

    /**
     * Match and return single coordinate value.
     */
    private function coordinate(): float|int
    {
        // By default, don't change sign
        $sign = false;

        // Match sign if the cardinal direction has not been seen
        if (!($this->nextCardinal > 0) && $this->lexer->isNextTokenAny([Lexer::T_PLUS, Lexer::T_MINUS])) {
            $sign = $this->sign();
        }

        // Get coordinate value
        $coordinate = $this->degrees();

        // If sign not matched determine sign from cardinal direction when required
        // or if a cardinal direction is present and this is first coordinate in a pair
        if (false === $sign && ($this->nextCardinal > 0 || (null === $this->nextCardinal && $this->lexer->isNextTokenAny([Lexer::T_CARDINAL_LAT, Lexer::T_CARDINAL_LON])))) {
            return $this->cardinal($coordinate);
        }

        // Remember there was no cardinal direction on first coordinate
        $this->nextCardinal = -1;

        // Return value with sign if it's set
        return (false === $sign ? 1 : $sign) * $coordinate;
    }

    /**
     * Match and return degree value.
     */
    private function degrees(): float|int
    {
        // Reset symbol requirement
        if (Lexer::T_APOSTROPHE === $this->nextSymbol || Lexer::T_QUOTE === $this->nextSymbol) {
            $this->nextSymbol = Lexer::T_DEGREE;
        }

        // If degrees is a float, there will be no minutes or seconds
        if ($this->lexer->isNextToken(Lexer::T_FLOAT)) {
            // Get degree value
            /** @var float $degrees */
            $degrees = $this->match(Lexer::T_FLOAT);

            // Degree symbol may follow degree float values
            if ($this->lexer->isNextToken(Lexer::T_DEGREE)) {
                $this->match(Lexer::T_DEGREE);

                // Set symbol requirement for next value in pair
                $this->nextSymbol = Lexer::T_DEGREE;
            }

            // Return the float value
            return $degrees;
        }

        // If degrees isn't a float, it must be an integer
        /** @var int $degrees */
        $degrees = $this->number();

        // If a symbol does not follow integer, this value is complete
        if (!$this->symbol()) {
            return $degrees;
        }

        // Grab peek of next token since we can't array dereference result in PHP 5.3
        $glimpse = $this->lexer->glimpse();

        // If a colon hasn't been matched, and next token is a number followed by degree symbol, when tuple separator is space instead of comma, this value is complete
        if (Lexer::T_COLON !== $this->nextSymbol && $this->lexer->isNextTokenAny([Lexer::T_INTEGER, Lexer::T_FLOAT]) && isset($glimpse->type) && Lexer::T_DEGREE === $glimpse->type) {
            return $degrees;
        }

        // Add minutes to value
        $degrees += (float) $this->minutes();

        // Return value
        return $degrees;
    }

    /**
     * Match token and return value.
     */
    private function match(int $token): string|int
    {
        // If the next token isn't type specified throw error
        if (!$this->lexer->isNextToken($token)) {
            throw $this->syntaxError((string) $this->lexer->getLiteral($token));
        }

        // Move lexer to the next token
        $this->lexer->moveNext();

        /** @var Token<int, string|int> $nextToken nextToken cannot be null, because of the above test. */
        $nextToken = $this->lexer->token;

        return $nextToken->value;
    }

    /**
     * Match and return minutes value.
     *
     * @throws RangeException
     */
    private function minutes(): string|int
    {
        // If using colon or minutes is an integer parse value
        if (Lexer::T_COLON === $this->nextSymbol || $this->lexer->isNextToken(Lexer::T_INTEGER)) {
            /** @var int $readMinutes */
            $readMinutes = $this->match(Lexer::T_INTEGER);

            // Throw exception if minutes are greater than 60
            if ($readMinutes >= 60) {
                throw new RangeException($this->input, RangeException::MINUTES_OUT_OF_RANGE);
            }

            // Get fractional minutes
            $minutes = $readMinutes / 60;

            // If using colon and one doesn't follow value is done
            if (Lexer::T_COLON === $this->nextSymbol && !$this->lexer->isNextToken(Lexer::T_COLON)) {
                return (string) $minutes;
            }

            // Match minutes symbol
            $this->symbol();

            // Add seconds to value, then return the result.
            return (string) ((float) $minutes + (float) $this->seconds());
        }

        // If minutes is a float, there will be no seconds
        if ($this->lexer->isNextToken(Lexer::T_FLOAT)) {
            $minutes = $this->match(Lexer::T_FLOAT);

            // Throw exception if minutes are greater than 60
            if ($minutes >= 60) {
                throw new RangeException($this->input, RangeException::MINUTES_OUT_OF_RANGE);
            }

            // Get fractional minutes
            $minutes = (string) ((float) $minutes / 60);

            // Match minutes symbol
            $this->symbol();

            // return value
            return $minutes;
        }

        // No minutes were present so return 0
        return 0;
    }

    /**
     * Match integer or float token and return value.
     *
     * @throws UnexpectedValueException
     */
    private function number(): int|string
    {
        // If the next token is a float match, then return it
        if ($this->lexer->isNextToken(Lexer::T_FLOAT)) {
            return $this->match(Lexer::T_FLOAT);
        }

        // If the next token is an integer match, then return it
        if ($this->lexer->isNextToken(Lexer::T_INTEGER)) {
            return $this->match(Lexer::T_INTEGER);
        }

        // Throw exception since no match
        throw $this->syntaxError('LongitudeOne\Geo\String\Lexer::T_INTEGER or LongitudeOne\Geo\String\Lexer::T_FLOAT');
    }

    /**
     * Match and return single value or pair.
     *
     * @return float|int|array<int|float>
     *
     * @throws UnexpectedValueException
     */
    private function point(): float|int|array
    {
        // Get first coordinate value
        $x = $this->coordinate();

        // If no additional tokens return single coordinate
        if (null === $this->lexer->lookahead) {
            return $x;
        }

        // Coordinate pairs may be separated by a comma
        if ($this->lexer->isNextToken(Lexer::T_COMMA)) {
            $this->match(Lexer::T_COMMA);
        }

        // Get second coordinate value
        $y = $this->coordinate();

        // There should be no additional tokens
        if (null !== $this->lexer->lookahead) {
            throw $this->syntaxError('end of string');
        }

        // Return coordinate array
        return [$x, $y];
    }

    /**
     * Match and return seconds value.
     *
     * @throws RangeException
     */
    private function seconds(): int|string
    {
        // Seconds value can be an integer or float
        if ($this->lexer->isNextTokenAny([Lexer::T_INTEGER, Lexer::T_FLOAT])) {
            $seconds = $this->number();

            // Throw exception if seconds are greater than 60
            if ($seconds >= 60) {
                throw new RangeException($this->input, RangeException::SECONDS_OUT_OF_RANGE);
            }

            // Get fractional seconds
            $seconds = (string) ((float) $seconds / 3600);

            // Match seconds symbol if requirement not colon
            if (Lexer::T_COLON !== $this->nextSymbol) {
                $this->symbol();
            }

            // Return value
            return $seconds;
        }

        // No seconds were present so return 0
        return 0;
    }

    /**
     * Match plus or minus sign and return coefficient.
     *
     * @return int 1 for plus, -1 for minus
     */
    private function sign(): int
    {
        if ($this->lexer->isNextToken(Lexer::T_PLUS)) {
            // Match plus and set sign
            $this->match(Lexer::T_PLUS);

            return 1;
        }

        // Match minus and set sign
        $this->match(Lexer::T_MINUS);

        return -1;
    }

    /**
     * Match value component symbol if required or present.
     */
    private function symbol(): bool|int|null
    {
        // If the symbol requirement is not set and the next token is a colon, then match this colon
        if (null === $this->nextSymbol && $this->lexer->isNextToken(Lexer::T_COLON)) {
            $this->match(Lexer::T_COLON);

            // Set symbol requirement for any remaining value
            return $this->nextSymbol = Lexer::T_COLON;
        }

        // If the symbol requirement is not set and the next token is a degree symbol, then match this degree symbol
        if (null === $this->nextSymbol && $this->lexer->isNextToken(Lexer::T_DEGREE)) {
            $this->match(Lexer::T_DEGREE);

            // Set requirement for any remaining value
            return $this->nextSymbol = Lexer::T_APOSTROPHE;
        }

        // Match symbol if requirement set and update requirement for next symbol
        switch ($this->nextSymbol) {
            case Lexer::T_COLON:
                $this->match(Lexer::T_COLON);

                return $this->nextSymbol;
            case Lexer::T_DEGREE:
                $this->match(Lexer::T_DEGREE);

                // The next symbol will be minutes
                return $this->nextSymbol = Lexer::T_APOSTROPHE;
            case Lexer::T_APOSTROPHE:
                $this->match(Lexer::T_APOSTROPHE);

                // The next symbol will be seconds
                return $this->nextSymbol = Lexer::T_QUOTE;
            case Lexer::T_QUOTE:
                $this->match(Lexer::T_QUOTE);

                return $this->nextSymbol;
        }

        // Set requirement for any remaining value
        return $this->nextSymbol = false;
    }

    /**
     * Create exception with a descriptive error message.
     */
    private function syntaxError(string $expected): UnexpectedValueException
    {
        $expected = sprintf('Expected %s, got', $expected);
        $token = $this->lexer->lookahead;
        $found = null === $token ? 'end of string.' : sprintf('"%s"', $token->value);

        $message = sprintf(
            '[Syntax Error] line 0, col %d: Error: %s %s in value "%s"',
            $token->position ?? -1,
            $expected,
            $found,
            $this->input
        );

        return new UnexpectedValueException($message);
    }
}