src/Json/Resources/json5/lexemes.pp2

Summary

Maintainability
Test Coverage
/**
 * --------------------------------------------------------------------------
 *  JSON5 Punctuators and Keywords
 * --------------------------------------------------------------------------
 *
 * The lexical grammar for JSON5 has as its terminal symbols characters
 * (Unicode code units) that conform to the rules for JSON5SourceCharacter.
 * It defines a set of productions, starting from the goal symbol
 * JSON5InputElement, that describe how sequences of such characters are
 * translated into a sequence of input elements.
 *
 * Input elements other than white space and comments form the terminal
 * symbols for the syntactic grammar for JSON5 and are called tokens.
 * These tokens are the identifiers, literals, and punctuators of the
 * JSON5 language. Simple white space and comments are discarded and do
 * not appear in the stream of input elements for the syntactic grammar.
 *
 * Productions of the lexical grammar are distinguished by having two
 * colons "::" as separating punctuation.
 *
 * @see https://spec.json5.org/#lexical-grammar
 *
 */



/**
 * --------------------------------------------------------------------------
 *  Comments
 * --------------------------------------------------------------------------
 *
 * Comments can be either single or multi-line. Multi-line comments cannot
 * nest. Comments may appear before and after any JSON5Token.
 *
 * A single line comment begins with two soliduses and ends with a
 * LineTerminator or the end of the document. All Unicode characters may
 * be placed within the start and end, except for a LineTerminator.
 *
 * A multi-line comment begins with a solidus and an asterisk and ends
 * with an asterisk and a solidus. All Unicode characters may be placed
 * within the start and end, except for an asterisk followed by a solidus.
 *
 * @see https://spec.json5.org/#comments
 */

%skip T_COMMENT                 //[^\n]*\n
%skip T_DOC_COMMENT             /\*.*?\*/


%token T_BRACKET_OPEN           \[
%token T_BRACKET_CLOSE          \]
%token T_BRACE_OPEN             {
%token T_BRACE_CLOSE            }
%token T_COLON                  :
%token T_COMMA                  ,
%token T_PLUS                   \+
%token T_MINUS                  \-

/**
 * --------------------------------------------------------------------------
 *  Values
 * --------------------------------------------------------------------------
 *
 * A JSON5 value must be an object, array, string, or number, or one of the
 * three literal names "true", "false", or "null".
 *
 * @see https://spec.json5.org/#values
 *
 */

%token T_BOOL_TRUE              true\b
%token T_BOOL_FALSE             false\b
%token T_NULL                   null\b


/**
 * --------------------------------------------------------------------------
 *  Numbers
 * --------------------------------------------------------------------------
 *
 * The representation of numbers is similar to that used in most programming
 * languages. A number may be represented in in base 10 using decimal
 * digits, base 16 using hexadecimal digits, or the IEEE 754 values positive
 * infinity, negative infinity, or NaN.
 *
 * @see https://spec.json5.org/#values
 *
 */

%token T_INF                    Infinity\b
%token T_NAN                    NaN\b
%token T_HEX_NUMBER             0x([0-9a-fA-F]+)

// Float number with leading (LD) floating point
%token T_FLOAT_LD_NUMBER        [0-9]*\.[0-9]+

// Float number with trailing (TG) floating point
%token T_FLOAT_TG_NUMBER        [0-9]+\.[0-9]*

%token T_INT_NUMBER             [0-9]+

%token T_EXPONENT_PART          [eE]((?:\-|\+)?[0-9]+)
%token T_IDENTIFIER             [\$_A-Za-z][\$_0-9A-Za-z]*


/**
 * --------------------------------------------------------------------------
 *  Strings
 * --------------------------------------------------------------------------
 *
 * A string begins and ends with single or double quotation marks. The same
 * quotation mark that begins a string must also end the string. All
 * Unicode characters may be placed within the quotation marks, except
 * for the characters that must be escaped: the quotation mark used to
 * begin and end the string, reverse solidus, and line terminators.
 *
 * @see https://spec.json5.org/#strings
 *
 */

%token T_DOUBLE_QUOTED_STRING   "([^"\\]*(\\.[^"\\]*)*)"
%token T_SINGLE_QUOTED_STRING   '([^'\\]*(\\.[^'\\]*)*)'


/**
 * White space may appear before and after any JSON5Token.
 *
 * @see https://spec.json5.org/#white-space
 */

%skip T_HORIZONTAL_TAB         \x09
%skip T_LINE_FEED              \x0A
%skip T_VERTICAL_TAB           \x0B
%skip T_FORM_FEED              \x0C
%skip T_CARRIAGE_RETURN        \x0D
%skip T_WHITESPACE             \x20
%skip T_NON_BREAKING_SPACE     \xA0
%skip T_LINE_SEPARATOR         \x2028
%skip T_PARAGRAPH_SEPARATOR    \x2029
%skip T_UTF32BE_BOM            ^\x00\x00\xFE\xFF
%skip T_UTF32LE_BOM            ^\xFE\xFF\x00\x00
%skip T_UTF16BE_BOM            ^\xFE\xFF
%skip T_UTF16LE_BOM            ^\xFF\xFE
%skip T_UTF8_BOM               ^\xEF\xBB\xBF
%skip T_UTF7_BOM               ^\x2B\x2F\x76\x38\x2B\x2F\x76\x39\x2B\x2F\x76\x2B\x2B\x2F\x76\x2F