chapi-ast-csharp/src/main/antlr/CSharpLexer.g4
// Eclipse Public License - v 1.0, http://www.eclipse.org/legal/epl-v10.html
// Copyright (c) 2013, Christian Wulf (chwchw@gmx.de)
// Copyright (c) 2016-2017, Ivan Kochurkin (kvanttt@gmail.com), Positive Technologies.
lexer grammar CSharpLexer;
channels { COMMENTS_CHANNEL, DIRECTIVE }
options { superClass = chapi.ast.antlr.CSharpLexerBase; }
BYTE_ORDER_MARK: '\u00EF\u00BB\u00BF';
SINGLE_LINE_DOC_COMMENT: '///' InputCharacter* -> channel(COMMENTS_CHANNEL);
EMPTY_DELIMITED_DOC_COMMENT: '/***/' -> channel(COMMENTS_CHANNEL);
DELIMITED_DOC_COMMENT: '/**' ~'/' .*? '*/' -> channel(COMMENTS_CHANNEL);
SINGLE_LINE_COMMENT: '//' InputCharacter* -> channel(COMMENTS_CHANNEL);
DELIMITED_COMMENT: '/*' .*? '*/' -> channel(COMMENTS_CHANNEL);
WHITESPACES: (Whitespace | NewLine)+ -> channel(HIDDEN);
SHARP: '#' -> mode(DIRECTIVE_MODE);
ABSTRACT: 'abstract';
ADD: 'add';
ALIAS: 'alias';
ARGLIST: '__arglist';
AS: 'as';
ASCENDING: 'ascending';
ASYNC: 'async';
AWAIT: 'await';
BASE: 'base';
BOOL: 'bool';
BREAK: 'break';
BY: 'by';
BYTE: 'byte';
CASE: 'case';
CATCH: 'catch';
CHAR: 'char';
CHECKED: 'checked';
CLASS: 'class';
CONST: 'const';
CONTINUE: 'continue';
DECIMAL: 'decimal';
DEFAULT: 'default';
DELEGATE: 'delegate';
DESCENDING: 'descending';
DO: 'do';
DOUBLE: 'double';
DYNAMIC: 'dynamic';
ELSE: 'else';
ENUM: 'enum';
EQUALS: 'equals';
EVENT: 'event';
EXPLICIT: 'explicit';
EXTERN: 'extern';
FALSE: 'false';
FINALLY: 'finally';
FIXED: 'fixed';
FLOAT: 'float';
FOR: 'for';
FOREACH: 'foreach';
FROM: 'from';
GET: 'get';
GOTO: 'goto';
GROUP: 'group';
IF: 'if';
IMPLICIT: 'implicit';
IN: 'in';
INT: 'int';
INTERFACE: 'interface';
INTERNAL: 'internal';
INTO: 'into';
IS: 'is';
JOIN: 'join';
LET: 'let';
LOCK: 'lock';
LONG: 'long';
NAMEOF: 'nameof';
NAMESPACE: 'namespace';
NEW: 'new';
NULL_: 'null';
OBJECT: 'object';
ON: 'on';
OPERATOR: 'operator';
ORDERBY: 'orderby';
OUT: 'out';
OVERRIDE: 'override';
PARAMS: 'params';
PARTIAL: 'partial';
PRIVATE: 'private';
PROTECTED: 'protected';
PUBLIC: 'public';
READONLY: 'readonly';
REF: 'ref';
REMOVE: 'remove';
RETURN: 'return';
SBYTE: 'sbyte';
SEALED: 'sealed';
SELECT: 'select';
SET: 'set';
SHORT: 'short';
SIZEOF: 'sizeof';
STACKALLOC: 'stackalloc';
STATIC: 'static';
STRING: 'string';
STRUCT: 'struct';
SWITCH: 'switch';
THIS: 'this';
THROW: 'throw';
TRUE: 'true';
TRY: 'try';
TYPEOF: 'typeof';
UINT: 'uint';
ULONG: 'ulong';
UNCHECKED: 'unchecked';
UNMANAGED: 'unmanaged';
UNSAFE: 'unsafe';
USHORT: 'ushort';
USING: 'using';
VAR: 'var';
VIRTUAL: 'virtual';
VOID: 'void';
VOLATILE: 'volatile';
WHEN: 'when';
WHERE: 'where';
WHILE: 'while';
YIELD: 'yield';
//B.1.6 Identifiers
// must be defined after all keywords so the first branch (Available_identifier) does not match keywords
// https://msdn.microsoft.com/en-us/library/aa664670(v=vs.71).aspx
IDENTIFIER: '@'? IdentifierOrKeyword;
//B.1.8 Literals
// 0.Equals() would be parsed as an invalid real (1. branch) causing a lexer error
LITERAL_ACCESS: [0-9] ('_'* [0-9])* IntegerTypeSuffix? '.' '@'? IdentifierOrKeyword;
INTEGER_LITERAL: [0-9] ('_'* [0-9])* IntegerTypeSuffix?;
HEX_INTEGER_LITERAL: '0' [xX] ('_'* HexDigit)+ IntegerTypeSuffix?;
BIN_INTEGER_LITERAL: '0' [bB] ('_'* [01])+ IntegerTypeSuffix?;
REAL_LITERAL: ([0-9] ('_'* [0-9])*)? '.' [0-9] ('_'* [0-9])* ExponentPart? [FfDdMm]? | [0-9] ('_'* [0-9])* ([FfDdMm] | ExponentPart [FfDdMm]?);
CHARACTER_LITERAL: '\'' (~['\\\r\n\u0085\u2028\u2029] | CommonCharacter) '\'';
REGULAR_STRING: '"' (~["\\\r\n\u0085\u2028\u2029] | CommonCharacter)* '"';
VERBATIUM_STRING: '@"' (~'"' | '""')* '"';
INTERPOLATED_REGULAR_STRING_START: '$"' { this.OnInterpolatedRegularStringStart(); } -> pushMode(INTERPOLATION_STRING);
INTERPOLATED_VERBATIUM_STRING_START: '$@"' { this.OnInterpolatedVerbatiumStringStart(); } -> pushMode(INTERPOLATION_STRING);
//B.1.9 Operators And Punctuators
OPEN_BRACE: '{' { this.OnOpenBrace(); };
CLOSE_BRACE: '}' { this.OnCloseBrace(); };
OPEN_BRACKET: '[';
CLOSE_BRACKET: ']';
OPEN_PARENS: '(';
CLOSE_PARENS: ')';
DOT: '.';
COMMA: ',';
COLON: ':' { this.OnColon(); };
SEMICOLON: ';';
PLUS: '+';
MINUS: '-';
STAR: '*';
DIV: '/';
PERCENT: '%';
AMP: '&';
BITWISE_OR: '|';
CARET: '^';
BANG: '!';
TILDE: '~';
ASSIGNMENT: '=';
LT: '<';
GT: '>';
INTERR: '?';
DOUBLE_COLON: '::';
OP_COALESCING: '??';
OP_INC: '++';
OP_DEC: '--';
OP_AND: '&&';
OP_OR: '||';
OP_PTR: '->';
OP_EQ: '==';
OP_NE: '!=';
OP_LE: '<=';
OP_GE: '>=';
OP_ADD_ASSIGNMENT: '+=';
OP_SUB_ASSIGNMENT: '-=';
OP_MULT_ASSIGNMENT: '*=';
OP_DIV_ASSIGNMENT: '/=';
OP_MOD_ASSIGNMENT: '%=';
OP_AND_ASSIGNMENT: '&=';
OP_OR_ASSIGNMENT: '|=';
OP_XOR_ASSIGNMENT: '^=';
OP_LEFT_SHIFT: '<<';
OP_LEFT_SHIFT_ASSIGNMENT: '<<=';
OP_COALESCING_ASSIGNMENT: '??=';
OP_RANGE: '..';
// https://msdn.microsoft.com/en-us/library/dn961160.aspx
mode INTERPOLATION_STRING;
DOUBLE_CURLY_INSIDE: '{{';
OPEN_BRACE_INSIDE: '{' { this.OpenBraceInside(); } -> skip, pushMode(DEFAULT_MODE);
REGULAR_CHAR_INSIDE: { this.IsRegularCharInside() }? SimpleEscapeSequence;
VERBATIUM_DOUBLE_QUOTE_INSIDE: { this.IsVerbatiumDoubleQuoteInside() }? '""';
DOUBLE_QUOTE_INSIDE: '"' { this.OnDoubleQuoteInside(); } -> popMode;
REGULAR_STRING_INSIDE: { this.IsRegularCharInside() }? ~('{' | '\\' | '"')+;
VERBATIUM_INSIDE_STRING: { this.IsVerbatiumDoubleQuoteInside() }? ~('{' | '"')+;
mode INTERPOLATION_FORMAT;
DOUBLE_CURLY_CLOSE_INSIDE: '}}' -> type(FORMAT_STRING);
CLOSE_BRACE_INSIDE: '}' { this.OnCloseBraceInside(); } -> skip, popMode;
FORMAT_STRING: ~'}'+;
mode DIRECTIVE_MODE;
DIRECTIVE_WHITESPACES: Whitespace+ -> channel(HIDDEN);
DIGITS: [0-9]+ -> channel(DIRECTIVE);
DIRECTIVE_TRUE: 'true' -> channel(DIRECTIVE), type(TRUE);
DIRECTIVE_FALSE: 'false' -> channel(DIRECTIVE), type(FALSE);
DEFINE: 'define' -> channel(DIRECTIVE);
UNDEF: 'undef' -> channel(DIRECTIVE);
DIRECTIVE_IF: 'if' -> channel(DIRECTIVE), type(IF);
ELIF: 'elif' -> channel(DIRECTIVE);
DIRECTIVE_ELSE: 'else' -> channel(DIRECTIVE), type(ELSE);
ENDIF: 'endif' -> channel(DIRECTIVE);
LINE: 'line' -> channel(DIRECTIVE);
ERROR: 'error' Whitespace+ -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
WARNING: 'warning' Whitespace+ -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
REGION: 'region' Whitespace* -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
ENDREGION: 'endregion' Whitespace* -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
PRAGMA: 'pragma' Whitespace+ -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
NULLABLE: 'nullable' Whitespace+ -> channel(DIRECTIVE), mode(DIRECTIVE_TEXT);
DIRECTIVE_DEFAULT: 'default' -> channel(DIRECTIVE), type(DEFAULT);
DIRECTIVE_HIDDEN: 'hidden' -> channel(DIRECTIVE);
DIRECTIVE_OPEN_PARENS: '(' -> channel(DIRECTIVE), type(OPEN_PARENS);
DIRECTIVE_CLOSE_PARENS: ')' -> channel(DIRECTIVE), type(CLOSE_PARENS);
DIRECTIVE_BANG: '!' -> channel(DIRECTIVE), type(BANG);
DIRECTIVE_OP_EQ: '==' -> channel(DIRECTIVE), type(OP_EQ);
DIRECTIVE_OP_NE: '!=' -> channel(DIRECTIVE), type(OP_NE);
DIRECTIVE_OP_AND: '&&' -> channel(DIRECTIVE), type(OP_AND);
DIRECTIVE_OP_OR: '||' -> channel(DIRECTIVE), type(OP_OR);
DIRECTIVE_STRING: '"' ~('"' | [\r\n\u0085\u2028\u2029])* '"' -> channel(DIRECTIVE), type(STRING);
CONDITIONAL_SYMBOL: IdentifierOrKeyword -> channel(DIRECTIVE);
DIRECTIVE_SINGLE_LINE_COMMENT: '//' ~[\r\n\u0085\u2028\u2029]* -> channel(COMMENTS_CHANNEL), type(SINGLE_LINE_COMMENT);
DIRECTIVE_NEW_LINE: NewLine -> channel(DIRECTIVE), mode(DEFAULT_MODE);
mode DIRECTIVE_TEXT;
TEXT: ~[\r\n\u0085\u2028\u2029]+ -> channel(DIRECTIVE);
TEXT_NEW_LINE: NewLine -> channel(DIRECTIVE), type(DIRECTIVE_NEW_LINE), mode(DEFAULT_MODE);
// Fragments
fragment InputCharacter: ~[\r\n\u0085\u2028\u2029];
fragment NewLineCharacter
: '\u000D' //'<Carriage Return CHARACTER (U+000D)>'
| '\u000A' //'<Line Feed CHARACTER (U+000A)>'
| '\u0085' //'<Next Line CHARACTER (U+0085)>'
| '\u2028' //'<Line Separator CHARACTER (U+2028)>'
| '\u2029' //'<Paragraph Separator CHARACTER (U+2029)>'
;
fragment IntegerTypeSuffix: [lL]? [uU] | [uU]? [lL];
fragment ExponentPart: [eE] ('+' | '-')? [0-9] ('_'* [0-9])*;
fragment CommonCharacter
: SimpleEscapeSequence
| HexEscapeSequence
| UnicodeEscapeSequence
;
fragment SimpleEscapeSequence
: '\\\''
| '\\"'
| '\\\\'
| '\\0'
| '\\a'
| '\\b'
| '\\f'
| '\\n'
| '\\r'
| '\\t'
| '\\v'
;
fragment HexEscapeSequence
: '\\x' HexDigit
| '\\x' HexDigit HexDigit
| '\\x' HexDigit HexDigit HexDigit
| '\\x' HexDigit HexDigit HexDigit HexDigit
;
fragment NewLine
: '\r\n' | '\r' | '\n'
| '\u0085' // <Next Line CHARACTER (U+0085)>'
| '\u2028' //'<Line Separator CHARACTER (U+2028)>'
| '\u2029' //'<Paragraph Separator CHARACTER (U+2029)>'
;
fragment Whitespace
: UnicodeClassZS //'<Any Character With Unicode Class Zs>'
| '\u0009' //'<Horizontal Tab Character (U+0009)>'
| '\u000B' //'<Vertical Tab Character (U+000B)>'
| '\u000C' //'<Form Feed Character (U+000C)>'
;
fragment UnicodeClassZS
: '\u0020' // SPACE
| '\u00A0' // NO_BREAK SPACE
| '\u1680' // OGHAM SPACE MARK
| '\u180E' // MONGOLIAN VOWEL SEPARATOR
| '\u2000' // EN QUAD
| '\u2001' // EM QUAD
| '\u2002' // EN SPACE
| '\u2003' // EM SPACE
| '\u2004' // THREE_PER_EM SPACE
| '\u2005' // FOUR_PER_EM SPACE
| '\u2006' // SIX_PER_EM SPACE
| '\u2008' // PUNCTUATION SPACE
| '\u2009' // THIN SPACE
| '\u200A' // HAIR SPACE
| '\u202F' // NARROW NO_BREAK SPACE
| '\u3000' // IDEOGRAPHIC SPACE
| '\u205F' // MEDIUM MATHEMATICAL SPACE
;
fragment IdentifierOrKeyword
: IdentifierStartCharacter IdentifierPartCharacter*
;
fragment IdentifierStartCharacter
: LetterCharacter
| '_'
;
fragment IdentifierPartCharacter
: LetterCharacter
| DecimalDigitCharacter
| ConnectingCharacter
| CombiningCharacter
| FormattingCharacter
;
//'<A Unicode Character Of Classes Lu, Ll, Lt, Lm, Lo, Or Nl>'
// WARNING: ignores UnicodeEscapeSequence
fragment LetterCharacter
: UnicodeClassLU
| UnicodeClassLL
| UnicodeClassLT
| UnicodeClassLM
| UnicodeClassLO
| UnicodeClassNL
| UnicodeEscapeSequence
;
//'<A Unicode Character Of The Class Nd>'
// WARNING: ignores UnicodeEscapeSequence
fragment DecimalDigitCharacter
: UnicodeClassND
| UnicodeEscapeSequence
;
//'<A Unicode Character Of The Class Pc>'
// WARNING: ignores UnicodeEscapeSequence
fragment ConnectingCharacter
: UnicodeClassPC
| UnicodeEscapeSequence
;
//'<A Unicode Character Of Classes Mn Or Mc>'
// WARNING: ignores UnicodeEscapeSequence
fragment CombiningCharacter
: UnicodeClassMN
| UnicodeClassMC
| UnicodeEscapeSequence
;
//'<A Unicode Character Of The Class Cf>'
// WARNING: ignores UnicodeEscapeSequence
fragment FormattingCharacter
: UnicodeClassCF
| UnicodeEscapeSequence
;
//B.1.5 Unicode Character Escape Sequences
fragment UnicodeEscapeSequence
: '\\u' HexDigit HexDigit HexDigit HexDigit
| '\\U' HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit HexDigit
;
fragment HexDigit : [0-9] | [A-F] | [a-f];
// Unicode character classes
fragment UnicodeClassLU
: '\u0041'..'\u005a'
| '\u00c0'..'\u00d6'
| '\u00d8'..'\u00de'
| '\u0100'..'\u0136'
| '\u0139'..'\u0147'
| '\u014a'..'\u0178'
| '\u0179'..'\u017d'
| '\u0181'..'\u0182'
| '\u0184'..'\u0186'
| '\u0187'..'\u0189'
| '\u018a'..'\u018b'
| '\u018e'..'\u0191'
| '\u0193'..'\u0194'
| '\u0196'..'\u0198'
| '\u019c'..'\u019d'
| '\u019f'..'\u01a0'
| '\u01a2'..'\u01a6'
| '\u01a7'..'\u01a9'
| '\u01ac'..'\u01ae'
| '\u01af'..'\u01b1'
| '\u01b2'..'\u01b3'
| '\u01b5'..'\u01b7'
| '\u01b8'..'\u01bc'
| '\u01c4'..'\u01cd'
| '\u01cf'..'\u01db'
| '\u01de'..'\u01ee'
| '\u01f1'..'\u01f4'
| '\u01f6'..'\u01f8'
| '\u01fa'..'\u0232'
| '\u023a'..'\u023b'
| '\u023d'..'\u023e'
| '\u0241'..'\u0243'
| '\u0244'..'\u0246'
| '\u0248'..'\u024e'
| '\u0370'..'\u0372'
| '\u0376'..'\u037f'
| '\u0386'..'\u0388'
| '\u0389'..'\u038a'
| '\u038c'..'\u038e'
| '\u038f'..'\u0391'
| '\u0392'..'\u03a1'
| '\u03a3'..'\u03ab'
| '\u03cf'..'\u03d2'
| '\u03d3'..'\u03d4'
| '\u03d8'..'\u03ee'
| '\u03f4'..'\u03f7'
| '\u03f9'..'\u03fa'
| '\u03fd'..'\u042f'
| '\u0460'..'\u0480'
| '\u048a'..'\u04c0'
| '\u04c1'..'\u04cd'
| '\u04d0'..'\u052e'
| '\u0531'..'\u0556'
| '\u10a0'..'\u10c5'
| '\u10c7'..'\u10cd'
| '\u1e00'..'\u1e94'
| '\u1e9e'..'\u1efe'
| '\u1f08'..'\u1f0f'
| '\u1f18'..'\u1f1d'
| '\u1f28'..'\u1f2f'
| '\u1f38'..'\u1f3f'
| '\u1f48'..'\u1f4d'
| '\u1f59'..'\u1f5f'
| '\u1f68'..'\u1f6f'
| '\u1fb8'..'\u1fbb'
| '\u1fc8'..'\u1fcb'
| '\u1fd8'..'\u1fdb'
| '\u1fe8'..'\u1fec'
| '\u1ff8'..'\u1ffb'
| '\u2102'..'\u2107'
| '\u210b'..'\u210d'
| '\u2110'..'\u2112'
| '\u2115'..'\u2119'
| '\u211a'..'\u211d'
| '\u2124'..'\u212a'
| '\u212b'..'\u212d'
| '\u2130'..'\u2133'
| '\u213e'..'\u213f'
| '\u2145'..'\u2183'
| '\u2c00'..'\u2c2e'
| '\u2c60'..'\u2c62'
| '\u2c63'..'\u2c64'
| '\u2c67'..'\u2c6d'
| '\u2c6e'..'\u2c70'
| '\u2c72'..'\u2c75'
| '\u2c7e'..'\u2c80'
| '\u2c82'..'\u2ce2'
| '\u2ceb'..'\u2ced'
| '\u2cf2'..'\ua640'
| '\ua642'..'\ua66c'
| '\ua680'..'\ua69a'
| '\ua722'..'\ua72e'
| '\ua732'..'\ua76e'
| '\ua779'..'\ua77d'
| '\ua77e'..'\ua786'
| '\ua78b'..'\ua78d'
| '\ua790'..'\ua792'
| '\ua796'..'\ua7aa'
| '\ua7ab'..'\ua7ad'
| '\ua7b0'..'\ua7b1'
| '\uff21'..'\uff3a'
;
fragment UnicodeClassLL
: '\u0061'..'\u007A'
| '\u00b5'..'\u00df'
| '\u00e0'..'\u00f6'
| '\u00f8'..'\u00ff'
| '\u0101'..'\u0137'
| '\u0138'..'\u0148'
| '\u0149'..'\u0177'
| '\u017a'..'\u017e'
| '\u017f'..'\u0180'
| '\u0183'..'\u0185'
| '\u0188'..'\u018c'
| '\u018d'..'\u0192'
| '\u0195'..'\u0199'
| '\u019a'..'\u019b'
| '\u019e'..'\u01a1'
| '\u01a3'..'\u01a5'
| '\u01a8'..'\u01aa'
| '\u01ab'..'\u01ad'
| '\u01b0'..'\u01b4'
| '\u01b6'..'\u01b9'
| '\u01ba'..'\u01bd'
| '\u01be'..'\u01bf'
| '\u01c6'..'\u01cc'
| '\u01ce'..'\u01dc'
| '\u01dd'..'\u01ef'
| '\u01f0'..'\u01f3'
| '\u01f5'..'\u01f9'
| '\u01fb'..'\u0233'
| '\u0234'..'\u0239'
| '\u023c'..'\u023f'
| '\u0240'..'\u0242'
| '\u0247'..'\u024f'
| '\u0250'..'\u0293'
| '\u0295'..'\u02af'
| '\u0371'..'\u0373'
| '\u0377'..'\u037b'
| '\u037c'..'\u037d'
| '\u0390'..'\u03ac'
| '\u03ad'..'\u03ce'
| '\u03d0'..'\u03d1'
| '\u03d5'..'\u03d7'
| '\u03d9'..'\u03ef'
| '\u03f0'..'\u03f3'
| '\u03f5'..'\u03fb'
| '\u03fc'..'\u0430'
| '\u0431'..'\u045f'
| '\u0461'..'\u0481'
| '\u048b'..'\u04bf'
| '\u04c2'..'\u04ce'
| '\u04cf'..'\u052f'
| '\u0561'..'\u0587'
| '\u1d00'..'\u1d2b'
| '\u1d6b'..'\u1d77'
| '\u1d79'..'\u1d9a'
| '\u1e01'..'\u1e95'
| '\u1e96'..'\u1e9d'
| '\u1e9f'..'\u1eff'
| '\u1f00'..'\u1f07'
| '\u1f10'..'\u1f15'
| '\u1f20'..'\u1f27'
| '\u1f30'..'\u1f37'
| '\u1f40'..'\u1f45'
| '\u1f50'..'\u1f57'
| '\u1f60'..'\u1f67'
| '\u1f70'..'\u1f7d'
| '\u1f80'..'\u1f87'
| '\u1f90'..'\u1f97'
| '\u1fa0'..'\u1fa7'
| '\u1fb0'..'\u1fb4'
| '\u1fb6'..'\u1fb7'
| '\u1fbe'..'\u1fc2'
| '\u1fc3'..'\u1fc4'
| '\u1fc6'..'\u1fc7'
| '\u1fd0'..'\u1fd3'
| '\u1fd6'..'\u1fd7'
| '\u1fe0'..'\u1fe7'
| '\u1ff2'..'\u1ff4'
| '\u1ff6'..'\u1ff7'
| '\u210a'..'\u210e'
| '\u210f'..'\u2113'
| '\u212f'..'\u2139'
| '\u213c'..'\u213d'
| '\u2146'..'\u2149'
| '\u214e'..'\u2184'
| '\u2c30'..'\u2c5e'
| '\u2c61'..'\u2c65'
| '\u2c66'..'\u2c6c'
| '\u2c71'..'\u2c73'
| '\u2c74'..'\u2c76'
| '\u2c77'..'\u2c7b'
| '\u2c81'..'\u2ce3'
| '\u2ce4'..'\u2cec'
| '\u2cee'..'\u2cf3'
| '\u2d00'..'\u2d25'
| '\u2d27'..'\u2d2d'
| '\ua641'..'\ua66d'
| '\ua681'..'\ua69b'
| '\ua723'..'\ua72f'
| '\ua730'..'\ua731'
| '\ua733'..'\ua771'
| '\ua772'..'\ua778'
| '\ua77a'..'\ua77c'
| '\ua77f'..'\ua787'
| '\ua78c'..'\ua78e'
| '\ua791'..'\ua793'
| '\ua794'..'\ua795'
| '\ua797'..'\ua7a9'
| '\ua7fa'..'\uab30'
| '\uab31'..'\uab5a'
| '\uab64'..'\uab65'
| '\ufb00'..'\ufb06'
| '\ufb13'..'\ufb17'
| '\uff41'..'\uff5a'
;
fragment UnicodeClassLT
: '\u01c5'..'\u01cb'
| '\u01f2'..'\u1f88'
| '\u1f89'..'\u1f8f'
| '\u1f98'..'\u1f9f'
| '\u1fa8'..'\u1faf'
| '\u1fbc'..'\u1fcc'
| '\u1ffc'..'\u1ffc'
;
fragment UnicodeClassLM
: '\u02b0'..'\u02c1'
| '\u02c6'..'\u02d1'
| '\u02e0'..'\u02e4'
| '\u02ec'..'\u02ee'
| '\u0374'..'\u037a'
| '\u0559'..'\u0640'
| '\u06e5'..'\u06e6'
| '\u07f4'..'\u07f5'
| '\u07fa'..'\u081a'
| '\u0824'..'\u0828'
| '\u0971'..'\u0e46'
| '\u0ec6'..'\u10fc'
| '\u17d7'..'\u1843'
| '\u1aa7'..'\u1c78'
| '\u1c79'..'\u1c7d'
| '\u1d2c'..'\u1d6a'
| '\u1d78'..'\u1d9b'
| '\u1d9c'..'\u1dbf'
| '\u2071'..'\u207f'
| '\u2090'..'\u209c'
| '\u2c7c'..'\u2c7d'
| '\u2d6f'..'\u2e2f'
| '\u3005'..'\u3031'
| '\u3032'..'\u3035'
| '\u303b'..'\u309d'
| '\u309e'..'\u30fc'
| '\u30fd'..'\u30fe'
| '\ua015'..'\ua4f8'
| '\ua4f9'..'\ua4fd'
| '\ua60c'..'\ua67f'
| '\ua69c'..'\ua69d'
| '\ua717'..'\ua71f'
| '\ua770'..'\ua788'
| '\ua7f8'..'\ua7f9'
| '\ua9cf'..'\ua9e6'
| '\uaa70'..'\uaadd'
| '\uaaf3'..'\uaaf4'
| '\uab5c'..'\uab5f'
| '\uff70'..'\uff9e'
| '\uff9f'..'\uff9f'
;
fragment UnicodeClassLO
: '\u00aa'..'\u00ba'
| '\u01bb'..'\u01c0'
| '\u01c1'..'\u01c3'
| '\u0294'..'\u05d0'
| '\u05d1'..'\u05ea'
| '\u05f0'..'\u05f2'
| '\u0620'..'\u063f'
| '\u0641'..'\u064a'
| '\u066e'..'\u066f'
| '\u0671'..'\u06d3'
| '\u06d5'..'\u06ee'
| '\u06ef'..'\u06fa'
| '\u06fb'..'\u06fc'
| '\u06ff'..'\u0710'
| '\u0712'..'\u072f'
| '\u074d'..'\u07a5'
| '\u07b1'..'\u07ca'
| '\u07cb'..'\u07ea'
| '\u0800'..'\u0815'
| '\u0840'..'\u0858'
| '\u08a0'..'\u08b2'
| '\u0904'..'\u0939'
| '\u093d'..'\u0950'
| '\u0958'..'\u0961'
| '\u0972'..'\u0980'
| '\u0985'..'\u098c'
| '\u098f'..'\u0990'
| '\u0993'..'\u09a8'
| '\u09aa'..'\u09b0'
| '\u09b2'..'\u09b6'
| '\u09b7'..'\u09b9'
| '\u09bd'..'\u09ce'
| '\u09dc'..'\u09dd'
| '\u09df'..'\u09e1'
| '\u09f0'..'\u09f1'
| '\u0a05'..'\u0a0a'
| '\u0a0f'..'\u0a10'
| '\u0a13'..'\u0a28'
| '\u0a2a'..'\u0a30'
| '\u0a32'..'\u0a33'
| '\u0a35'..'\u0a36'
| '\u0a38'..'\u0a39'
| '\u0a59'..'\u0a5c'
| '\u0a5e'..'\u0a72'
| '\u0a73'..'\u0a74'
| '\u0a85'..'\u0a8d'
| '\u0a8f'..'\u0a91'
| '\u0a93'..'\u0aa8'
| '\u0aaa'..'\u0ab0'
| '\u0ab2'..'\u0ab3'
| '\u0ab5'..'\u0ab9'
| '\u0abd'..'\u0ad0'
| '\u0ae0'..'\u0ae1'
| '\u0b05'..'\u0b0c'
| '\u0b0f'..'\u0b10'
| '\u0b13'..'\u0b28'
| '\u0b2a'..'\u0b30'
| '\u0b32'..'\u0b33'
| '\u0b35'..'\u0b39'
| '\u0b3d'..'\u0b5c'
| '\u0b5d'..'\u0b5f'
| '\u0b60'..'\u0b61'
| '\u0b71'..'\u0b83'
| '\u0b85'..'\u0b8a'
| '\u0b8e'..'\u0b90'
| '\u0b92'..'\u0b95'
| '\u0b99'..'\u0b9a'
| '\u0b9c'..'\u0b9e'
| '\u0b9f'..'\u0ba3'
| '\u0ba4'..'\u0ba8'
| '\u0ba9'..'\u0baa'
| '\u0bae'..'\u0bb9'
| '\u0bd0'..'\u0c05'
| '\u0c06'..'\u0c0c'
| '\u0c0e'..'\u0c10'
| '\u0c12'..'\u0c28'
| '\u0c2a'..'\u0c39'
| '\u0c3d'..'\u0c58'
| '\u0c59'..'\u0c60'
| '\u0c61'..'\u0c85'
| '\u0c86'..'\u0c8c'
| '\u0c8e'..'\u0c90'
| '\u0c92'..'\u0ca8'
| '\u0caa'..'\u0cb3'
| '\u0cb5'..'\u0cb9'
| '\u0cbd'..'\u0cde'
| '\u0ce0'..'\u0ce1'
| '\u0cf1'..'\u0cf2'
| '\u0d05'..'\u0d0c'
| '\u0d0e'..'\u0d10'
| '\u0d12'..'\u0d3a'
| '\u0d3d'..'\u0d4e'
| '\u0d60'..'\u0d61'
| '\u0d7a'..'\u0d7f'
| '\u0d85'..'\u0d96'
| '\u0d9a'..'\u0db1'
| '\u0db3'..'\u0dbb'
| '\u0dbd'..'\u0dc0'
| '\u0dc1'..'\u0dc6'
| '\u0e01'..'\u0e30'
| '\u0e32'..'\u0e33'
| '\u0e40'..'\u0e45'
| '\u0e81'..'\u0e82'
| '\u0e84'..'\u0e87'
| '\u0e88'..'\u0e8a'
| '\u0e8d'..'\u0e94'
| '\u0e95'..'\u0e97'
| '\u0e99'..'\u0e9f'
| '\u0ea1'..'\u0ea3'
| '\u0ea5'..'\u0ea7'
| '\u0eaa'..'\u0eab'
| '\u0ead'..'\u0eb0'
| '\u0eb2'..'\u0eb3'
| '\u0ebd'..'\u0ec0'
| '\u0ec1'..'\u0ec4'
| '\u0edc'..'\u0edf'
| '\u0f00'..'\u0f40'
| '\u0f41'..'\u0f47'
| '\u0f49'..'\u0f6c'
| '\u0f88'..'\u0f8c'
| '\u1000'..'\u102a'
| '\u103f'..'\u1050'
| '\u1051'..'\u1055'
| '\u105a'..'\u105d'
| '\u1061'..'\u1065'
| '\u1066'..'\u106e'
| '\u106f'..'\u1070'
| '\u1075'..'\u1081'
| '\u108e'..'\u10d0'
| '\u10d1'..'\u10fa'
| '\u10fd'..'\u1248'
| '\u124a'..'\u124d'
| '\u1250'..'\u1256'
| '\u1258'..'\u125a'
| '\u125b'..'\u125d'
| '\u1260'..'\u1288'
| '\u128a'..'\u128d'
| '\u1290'..'\u12b0'
| '\u12b2'..'\u12b5'
| '\u12b8'..'\u12be'
| '\u12c0'..'\u12c2'
| '\u12c3'..'\u12c5'
| '\u12c8'..'\u12d6'
| '\u12d8'..'\u1310'
| '\u1312'..'\u1315'
| '\u1318'..'\u135a'
| '\u1380'..'\u138f'
| '\u13a0'..'\u13f4'
| '\u1401'..'\u166c'
| '\u166f'..'\u167f'
| '\u1681'..'\u169a'
| '\u16a0'..'\u16ea'
| '\u16f1'..'\u16f8'
| '\u1700'..'\u170c'
| '\u170e'..'\u1711'
| '\u1720'..'\u1731'
| '\u1740'..'\u1751'
| '\u1760'..'\u176c'
| '\u176e'..'\u1770'
| '\u1780'..'\u17b3'
| '\u17dc'..'\u1820'
| '\u1821'..'\u1842'
| '\u1844'..'\u1877'
| '\u1880'..'\u18a8'
| '\u18aa'..'\u18b0'
| '\u18b1'..'\u18f5'
| '\u1900'..'\u191e'
| '\u1950'..'\u196d'
| '\u1970'..'\u1974'
| '\u1980'..'\u19ab'
| '\u19c1'..'\u19c7'
| '\u1a00'..'\u1a16'
| '\u1a20'..'\u1a54'
| '\u1b05'..'\u1b33'
| '\u1b45'..'\u1b4b'
| '\u1b83'..'\u1ba0'
| '\u1bae'..'\u1baf'
| '\u1bba'..'\u1be5'
| '\u1c00'..'\u1c23'
| '\u1c4d'..'\u1c4f'
| '\u1c5a'..'\u1c77'
| '\u1ce9'..'\u1cec'
| '\u1cee'..'\u1cf1'
| '\u1cf5'..'\u1cf6'
| '\u2135'..'\u2138'
| '\u2d30'..'\u2d67'
| '\u2d80'..'\u2d96'
| '\u2da0'..'\u2da6'
| '\u2da8'..'\u2dae'
| '\u2db0'..'\u2db6'
| '\u2db8'..'\u2dbe'
| '\u2dc0'..'\u2dc6'
| '\u2dc8'..'\u2dce'
| '\u2dd0'..'\u2dd6'
| '\u2dd8'..'\u2dde'
| '\u3006'..'\u303c'
| '\u3041'..'\u3096'
| '\u309f'..'\u30a1'
| '\u30a2'..'\u30fa'
| '\u30ff'..'\u3105'
| '\u3106'..'\u312d'
| '\u3131'..'\u318e'
| '\u31a0'..'\u31ba'
| '\u31f0'..'\u31ff'
| '\u3400'..'\u4db5'
| '\u4e00'..'\u9fcc'
| '\ua000'..'\ua014'
| '\ua016'..'\ua48c'
| '\ua4d0'..'\ua4f7'
| '\ua500'..'\ua60b'
| '\ua610'..'\ua61f'
| '\ua62a'..'\ua62b'
| '\ua66e'..'\ua6a0'
| '\ua6a1'..'\ua6e5'
| '\ua7f7'..'\ua7fb'
| '\ua7fc'..'\ua801'
| '\ua803'..'\ua805'
| '\ua807'..'\ua80a'
| '\ua80c'..'\ua822'
| '\ua840'..'\ua873'
| '\ua882'..'\ua8b3'
| '\ua8f2'..'\ua8f7'
| '\ua8fb'..'\ua90a'
| '\ua90b'..'\ua925'
| '\ua930'..'\ua946'
| '\ua960'..'\ua97c'
| '\ua984'..'\ua9b2'
| '\ua9e0'..'\ua9e4'
| '\ua9e7'..'\ua9ef'
| '\ua9fa'..'\ua9fe'
| '\uaa00'..'\uaa28'
| '\uaa40'..'\uaa42'
| '\uaa44'..'\uaa4b'
| '\uaa60'..'\uaa6f'
| '\uaa71'..'\uaa76'
| '\uaa7a'..'\uaa7e'
| '\uaa7f'..'\uaaaf'
| '\uaab1'..'\uaab5'
| '\uaab6'..'\uaab9'
| '\uaaba'..'\uaabd'
| '\uaac0'..'\uaac2'
| '\uaadb'..'\uaadc'
| '\uaae0'..'\uaaea'
| '\uaaf2'..'\uab01'
| '\uab02'..'\uab06'
| '\uab09'..'\uab0e'
| '\uab11'..'\uab16'
| '\uab20'..'\uab26'
| '\uab28'..'\uab2e'
| '\uabc0'..'\uabe2'
| '\uac00'..'\ud7a3'
| '\ud7b0'..'\ud7c6'
| '\ud7cb'..'\ud7fb'
| '\uf900'..'\ufa6d'
| '\ufa70'..'\ufad9'
| '\ufb1d'..'\ufb1f'
| '\ufb20'..'\ufb28'
| '\ufb2a'..'\ufb36'
| '\ufb38'..'\ufb3c'
| '\ufb3e'..'\ufb40'
| '\ufb41'..'\ufb43'
| '\ufb44'..'\ufb46'
| '\ufb47'..'\ufbb1'
| '\ufbd3'..'\ufd3d'
| '\ufd50'..'\ufd8f'
| '\ufd92'..'\ufdc7'
| '\ufdf0'..'\ufdfb'
| '\ufe70'..'\ufe74'
| '\ufe76'..'\ufefc'
| '\uff66'..'\uff6f'
| '\uff71'..'\uff9d'
| '\uffa0'..'\uffbe'
| '\uffc2'..'\uffc7'
| '\uffca'..'\uffcf'
| '\uffd2'..'\uffd7'
| '\uffda'..'\uffdc'
;
fragment UnicodeClassNL
: '\u16EE' // RUNIC ARLAUG SYMBOL
| '\u16EF' // RUNIC TVIMADUR SYMBOL
| '\u16F0' // RUNIC BELGTHOR SYMBOL
| '\u2160' // ROMAN NUMERAL ONE
| '\u2161' // ROMAN NUMERAL TWO
| '\u2162' // ROMAN NUMERAL THREE
| '\u2163' // ROMAN NUMERAL FOUR
| '\u2164' // ROMAN NUMERAL FIVE
| '\u2165' // ROMAN NUMERAL SIX
| '\u2166' // ROMAN NUMERAL SEVEN
| '\u2167' // ROMAN NUMERAL EIGHT
| '\u2168' // ROMAN NUMERAL NINE
| '\u2169' // ROMAN NUMERAL TEN
| '\u216A' // ROMAN NUMERAL ELEVEN
| '\u216B' // ROMAN NUMERAL TWELVE
| '\u216C' // ROMAN NUMERAL FIFTY
| '\u216D' // ROMAN NUMERAL ONE HUNDRED
| '\u216E' // ROMAN NUMERAL FIVE HUNDRED
| '\u216F' // ROMAN NUMERAL ONE THOUSAND
;
fragment UnicodeClassMN
: '\u0300' // COMBINING GRAVE ACCENT
| '\u0301' // COMBINING ACUTE ACCENT
| '\u0302' // COMBINING CIRCUMFLEX ACCENT
| '\u0303' // COMBINING TILDE
| '\u0304' // COMBINING MACRON
| '\u0305' // COMBINING OVERLINE
| '\u0306' // COMBINING BREVE
| '\u0307' // COMBINING DOT ABOVE
| '\u0308' // COMBINING DIAERESIS
| '\u0309' // COMBINING HOOK ABOVE
| '\u030A' // COMBINING RING ABOVE
| '\u030B' // COMBINING DOUBLE ACUTE ACCENT
| '\u030C' // COMBINING CARON
| '\u030D' // COMBINING VERTICAL LINE ABOVE
| '\u030E' // COMBINING DOUBLE VERTICAL LINE ABOVE
| '\u030F' // COMBINING DOUBLE GRAVE ACCENT
| '\u0310' // COMBINING CANDRABINDU
;
fragment UnicodeClassMC
: '\u0903' // DEVANAGARI SIGN VISARGA
| '\u093E' // DEVANAGARI VOWEL SIGN AA
| '\u093F' // DEVANAGARI VOWEL SIGN I
| '\u0940' // DEVANAGARI VOWEL SIGN II
| '\u0949' // DEVANAGARI VOWEL SIGN CANDRA O
| '\u094A' // DEVANAGARI VOWEL SIGN SHORT O
| '\u094B' // DEVANAGARI VOWEL SIGN O
| '\u094C' // DEVANAGARI VOWEL SIGN AU
;
fragment UnicodeClassCF
: '\u00AD' // SOFT HYPHEN
| '\u0600' // ARABIC NUMBER SIGN
| '\u0601' // ARABIC SIGN SANAH
| '\u0602' // ARABIC FOOTNOTE MARKER
| '\u0603' // ARABIC SIGN SAFHA
| '\u06DD' // ARABIC END OF AYAH
;
fragment UnicodeClassPC
: '\u005F' // LOW LINE
| '\u203F' // UNDERTIE
| '\u2040' // CHARACTER TIE
| '\u2054' // INVERTED UNDERTIE
| '\uFE33' // PRESENTATION FORM FOR VERTICAL LOW LINE
| '\uFE34' // PRESENTATION FORM FOR VERTICAL WAVY LOW LINE
| '\uFE4D' // DASHED LOW LINE
| '\uFE4E' // CENTRELINE LOW LINE
| '\uFE4F' // WAVY LOW LINE
| '\uFF3F' // FULLWIDTH LOW LINE
;
fragment UnicodeClassND
: '\u0030'..'\u0039'
| '\u0660'..'\u0669'
| '\u06f0'..'\u06f9'
| '\u07c0'..'\u07c9'
| '\u0966'..'\u096f'
| '\u09e6'..'\u09ef'
| '\u0a66'..'\u0a6f'
| '\u0ae6'..'\u0aef'
| '\u0b66'..'\u0b6f'
| '\u0be6'..'\u0bef'
| '\u0c66'..'\u0c6f'
| '\u0ce6'..'\u0cef'
| '\u0d66'..'\u0d6f'
| '\u0de6'..'\u0def'
| '\u0e50'..'\u0e59'
| '\u0ed0'..'\u0ed9'
| '\u0f20'..'\u0f29'
| '\u1040'..'\u1049'
| '\u1090'..'\u1099'
| '\u17e0'..'\u17e9'
| '\u1810'..'\u1819'
| '\u1946'..'\u194f'
| '\u19d0'..'\u19d9'
| '\u1a80'..'\u1a89'
| '\u1a90'..'\u1a99'
| '\u1b50'..'\u1b59'
| '\u1bb0'..'\u1bb9'
| '\u1c40'..'\u1c49'
| '\u1c50'..'\u1c59'
| '\ua620'..'\ua629'
| '\ua8d0'..'\ua8d9'
| '\ua900'..'\ua909'
| '\ua9d0'..'\ua9d9'
| '\ua9f0'..'\ua9f9'
| '\uaa50'..'\uaa59'
| '\uabf0'..'\uabf9'
| '\uff10'..'\uff19'
;