
View on GitHub


0 mins
Test Coverage

namespace Netdudes\DataSourceryBundle\UQL;

use Netdudes\DataSourceryBundle\UQL\Exception\UqlLexerException;

 * Class Lexer
 * The Lexer component has the task of translating the user-typed syntax into
 * concrete Tokens with definite meanings in the language's syntax.
 * @package Netdudes\NetdudesDataSourceryBundle\UQL
class Lexer
     * Lex the input string by moving through it trying to match any of
     * the defined language tokens.
     * @param $string
     * @return array
     * @throws \Exception
    public static function lex($string)
        $tokens = [];
        $cursor = 0;

        while ($cursor < strlen($string)) {
            $result = static::matchToken($string, $cursor);
            if ($result === false) {
                throw new UqlLexerException('Can\'t parse at character ' . $cursor . ': "' . substr($string, $cursor, 50) . '[...]"');
            } elseif ($result['token'] != "T_WHITESPACE") {
                // We found a non-whitespace token. Store it.
                $tokens[] = $result;
            $cursor += strlen($result['match']);

        return $tokens;

     * Tries to match any of the tokens to the current cursor position.
     * @param $string
     * @param $cursor
     * @return array|bool
    public static function matchToken($string, $cursor)
        $string = substr($string, $cursor);
        foreach (Tokens::$terminalTokens as $regex => $token) {
            if (preg_match($regex, $string, $matches)) {
                return [
                    'match' => $matches[1],
                    'token' => $token,

        return false;