netdudes/DataSourceryBundle

View on GitHub
UQL/Lexer.php

Summary

Maintainability
A
0 mins
Test Coverage
<?php

namespace Netdudes\DataSourceryBundle\UQL;

use Netdudes\DataSourceryBundle\UQL\Exception\UqlLexerException;

/**
 * Class Lexer
 *
 * The Lexer component has the task of translating the user-typed syntax into
 * concrete Tokens with definite meanings in the language's syntax.
 *
 * @package Netdudes\NetdudesDataSourceryBundle\UQL
 */
class Lexer
{
    /**
     * Lex the input string by moving through it trying to match any of
     * the defined language tokens.
     *
     * @param $string
     *
     * @return array
     * @throws \Exception
     */
    public static function lex($string)
    {
        $tokens = [];
        $cursor = 0;

        while ($cursor < strlen($string)) {
            $result = static::matchToken($string, $cursor);
            if ($result === false) {
                throw new UqlLexerException('Can\'t parse at character ' . $cursor . ': "' . substr($string, $cursor, 50) . '[...]"');
            } elseif ($result['token'] != "T_WHITESPACE") {
                // We found a non-whitespace token. Store it.
                $tokens[] = $result;
            }
            $cursor += strlen($result['match']);
        }

        return $tokens;
    }

    /**
     * Tries to match any of the tokens to the current cursor position.
     *
     * @param $string
     * @param $cursor
     *
     * @return array|bool
     */
    public static function matchToken($string, $cursor)
    {
        $string = substr($string, $cursor);
        foreach (Tokens::$terminalTokens as $regex => $token) {
            if (preg_match($regex, $string, $matches)) {
                return [
                    'match' => $matches[1],
                    'token' => $token,
                ];
            }
        }

        return false;
    }
}