src/Jit/Parser.php
<?php
namespace Kahlan\Jit;
use Kahlan\Jit\Node\NodeDef;
use Kahlan\Jit\Node\FunctionDef;
use Kahlan\Jit\Node\BlockDef;
/**
* Crude parser providing some code block structure of PHP files to facilitate analysis.
*/
class Parser
{
/**
* @var int
* @internal
*/
public $_T_ARROW_FUNCTION;
/**
* @var int
* @internal
*/
public $_T_DOUBLE_ARROW;
/**
* @var int
* @internal
*/
public $_T_ATTRIBUTE;
protected $_ARROW_FUNCTION;
protected $_DOUBLE_ARROW;
protected $_T_NAME_FULLY_QUALIFIED;
protected $_T_NAME_QUALIFIED;
/**
* The root node.
*
* @var object
*/
protected $_root = null;
/**
* The current streamer.
*
* @var object
*/
protected $_stream = null;
/**
* Indicate the current the current states of the parser.
*
* [
* 'php' => false, // Indicate if the parser is in a PHP block.
* 'class' => false, // Indicate if the parser is in a PHP class.
* 'lines' => false, // Indicate if the parser need to process line mathing.
* 'num' => 0, // Current line number.
* 'root' => object, // Root node.
* 'current' => object, // Current node.
* 'visibility' => [] // Store function visibility.
* 'uses' => [] // Maintain the uses dependencies
* 'body' => '' // Maintain the current parsed content
* ]
*
* @var array
*/
protected $_states = [];
/**
* The constructor function
*
* @param array $config The configuration array.
*/
public function __construct($config = [])
{
$defaults = [
'php' => false,
'lines' => 0,
'num' => 0,
'visibility' => [],
'uses' => [],
'body' => '',
'path' => ''
];
$this->_states = $config + $defaults;
$node = new BlockDef('', 'file');
$node->hasMethods = false;
$this->_root = $this->_states['current'] = $node->namespace = $node;
$this->_T_ARROW_FUNCTION = defined('T_FN') ? T_FN : -1;
$this->_T_DOUBLE_ARROW = defined('T_DOUBLE_ARROW') ? T_DOUBLE_ARROW : -1;
$this->_T_NAME_FULLY_QUALIFIED = defined('T_NAME_FULLY_QUALIFIED') ? T_NAME_FULLY_QUALIFIED : -1;
$this->_T_NAME_QUALIFIED = defined('T_NAME_QUALIFIED') ? T_NAME_QUALIFIED : -1;
$this->_T_ATTRIBUTE = defined('T_ATTRIBUTE') ? T_ATTRIBUTE : -1;
}
/**
* Parsing a file into nested nodes.
*
* @param string $content A file.
* @return object The parsed file node.
*/
protected function _parser($content)
{
$this->_initLines($content);
$this->_stream = new TokenStream(['source' => $content, 'wrap' => $this->_states['php']]);
$blockStartLines = [];
$blockStartLine = null;
while ($token = $this->_stream->current(true)) {
switch ($token[0]) {
case T_DIR:
$this->_states['body'] .= "'" . dirname($this->_states['path']) . "'";
break;
case T_FILE:
$this->_states['body'] .= "'" . $this->_states['path'] . "'";
break;
case T_OPEN_TAG:
case T_OPEN_TAG_WITH_ECHO:
$this->_codeNode();
$this->_states['body'] .= $token[1];
$this->_codeNode('open');
$this->_states['php'] = true;
$blockStartLine = null;
break;
case T_CLOSE_TAG:
$this->_codeNode();
$this->_states['php'] = false;
$this->_states['body'] .= $token[1];
$this->_codeNode('close');
break;
case $this->_T_ATTRIBUTE:
$this->_annotationNode();
break;
case T_DOC_COMMENT:
case T_COMMENT:
$this->_commentNode();
break;
case T_CONSTANT_ENCAPSED_STRING:
if ($this->_states['lines']) {
$blockStartLine = $this->_states['num'];
}
$this->_stringNode('');
break;
case T_START_HEREDOC:
$name = trim(substr($token[1], 3, -1), "'");
$this->_stringNode("\n" . $name, true);
break;
case '"':
$this->_stringNode('"');
break;
case '{':
$this->_states['body'] .= $token[0];
$this->_states['current'] = $this->_codeNode();
$blockStartLine = null;
break;
case '}':
$this->_closeCurly();
$blockStartLine = null;
break;
case '(':
case '[':
$this->_states['body'] .= $token[0];
if ($this->_states['lines']) {
$lines = explode("\n", $this->_states['body']);
$blockStartLines[$token[0]][] = $this->_states['num'] + (count($lines) - 2);
}
break;
case ')':
case ']':
$this->_states['body'] .= $token[0];
if ($this->_states['lines']) {
$char = $token[0] === ']' ? '[' : '(';
$blockStartLine = array_pop($blockStartLines[$char]);
}
break;
case ';':
$this->_states['body'] .= $token[1];
$node = $this->_codeNode(null, true);
if ($this->_states['lines'] && $blockStartLine !== null) {
$node->lines['begin'] = $blockStartLine;
$blockStartLine = null;
}
break;
case T_DECLARE:
$this->_declareNode();
break;
case T_NAMESPACE:
$this->_namespaceNode();
break;
case T_USE:
$this->_useNode();
break;
case T_TRAIT:
$this->_traitNode();
break;
case T_INTERFACE:
$this->_interfaceNode();
break;
case T_CLASS:
$this->_classNode();
break;
case T_FINAL:
case T_ABSTRACT:
case T_PRIVATE:
case T_PROTECTED:
case T_PUBLIC:
case T_STATIC:
$this->_states['visibility'][$token[1]] = true;
$this->_states['body'] .= $token[1];
break;
case T_FUNCTION:
case $this->_T_ARROW_FUNCTION: // use T_FN directly when PHP 7.3 support will be removed.
$this->_functionNode();
break;
case T_YIELD:
$parent = $this->_states['current'];
while ($parent && !$parent instanceof FunctionDef) {
$parent = $parent->parent;
}
$parent->isGenerator = true;
$this->_states['body'] .= $token[1];
break;
case T_VARIABLE:
$this->_states['visibility'] = [];
$this->_states['body'] .= $token[1];
break;
case T_ENDIF:
case T_ENDFOREACH:
case T_ENDSWITCH:
case T_ENDWHILE:
$this->_codeNode();
$this->_states['body'] .= $token[1] . $this->_stream->next([';']);
$this->_codeNode(null, false);
$blockStartLine = null;
break;
case ':':
$this->_states['body'] .= $token[1];
$blockStartLine = null;
break;
default:
$this->_states['body'] .= $token[1];
break;
}
$this->_stream->next();
}
$this->_codeNode();
$this->_flushUses();
$this->_stream->rewind();
$this->_assignCoverable();
return $this->_root;
}
/**
* Manage curly brackets.
*/
protected function _closeCurly()
{
$current = $this->_states['current'];
$this->_codeNode();
$current->close = '}';
if ($current->type === 'function') {
if ($current->isClosure) {
$current->close .= $this->_stream->next([')', ';', ',', ']']);
$this->_states['num'] += substr_count($current->close, "\n");
}
} elseif ($current->type === 'namespace') {
$this->_flushUses();
}
$this->_states['current'] = $current->parent;
if (!$this->_states['lines']) {
return;
}
$current->lines['stop'] = $this->_states['num'];
$current->parent->lines['stop'] = $this->_states['num'];
}
/**
* Manage use statement.
*/
protected function _useNode()
{
$current = $this->_states['current'];
$token = $this->_stream->current(true);
$last = $alias = $use = '';
$as = false;
$stop = ';';
$prefix = '';
while ($token[1] !== $stop) {
$this->_states['body'] .= $token[1];
if (!$token = $this->_stream->next(true)) {
break;
}
switch ($token[0]) {
case ',':
$as ? $this->_states['uses'][$alias] = $prefix . $use : $this->_states['uses'][$last] = $prefix . $use;
$last = $alias = $use = '';
$as = false;
break;
case $this->_T_NAME_FULLY_QUALIFIED:
case $this->_T_NAME_QUALIFIED:
$last = substr($token[1], strrpos($token[1], '\\') + 1);
$use = $token[1];
break;
case T_STRING:
$last = $token[1];
/* Always prefix */
case T_NS_SEPARATOR:
$as ? $alias .= $token[1] : $use .= $token[1];
break;
case T_AS:
$as = true;
break;
case '{':
$prefix = $use;
$use = '';
$stop = $current->type === 'class' ? '}' : ';';
break;
}
}
$this->_states['body'] .= !empty($token[0]) ? $token[0] : '';
$as ? $this->_states['uses'][$alias] = $prefix . $use : $this->_states['uses'][$last] = $prefix . $use;
$this->_codeNode('use');
}
/**
* Build a declare node.
*/
protected function _declareNode()
{
$this->_codeNode();
$body = $this->_stream->current() . $this->_stream->next([';', '{']);
if (preg_match('~ticks~i', $body, $matches)) {
$isBlock = substr($body, -1) === '{';
if ($isBlock) {
$body = substr($body, 0, -1);
}
$node = new NodeDef($body, 'declare');
$this->_contextualize($node);
if ($isBlock) {
$this->_states['body'] .= '{';
$this->_states['current'] = $this->_codeNode();
}
return $node;
}
$this->_states['body'] .= $body;
$node = new BlockDef($body, 'declare');
$node->hasMethods = false;
$this->_states['current'] = $this->_root;
$this->_contextualize($node);
return $this->_states['current'] = $node;
}
/**
* Build a namespace node.
*/
protected function _namespaceNode()
{
$this->_codeNode();
$this->_flushUses();
$body = $this->_stream->current();
$name = $this->_stream->next([';', '{']);
$this->_states['body'] .= $body;
$node = new BlockDef($body . $name, 'namespace');
$node->hasMethods = false;
$node->name = trim(substr($name, 0, -1));
$this->_states['current'] = $this->_root;
$this->_contextualize($node);
return $this->_states['current'] = $node->namespace = $node;
}
/**
* Attache the founded uses to the current namespace.
*/
protected function _flushUses()
{
if ($this->_states['current'] && $this->_states['current']->namespace) {
$this->_states['current']->namespace->uses = $this->_states['uses'];
$this->_states['uses'] = [];
}
}
/**
* Build a trait node.
*/
protected function _traitNode()
{
$this->_codeNode();
$token = $this->_stream->current(true);
$body = $token[1];
$body .= $this->_stream->skipWhitespaces();
$body .= $name = $this->_stream->current();
$body .= $this->_stream->next([';', '{']);
$this->_states['body'] .= $body;
$node = new BlockDef($body, 'trait');
$node->name = $name;
return $this->_states['current'] = $this->_contextualize($node);
}
/**
* Build an interface node.
*/
protected function _interfaceNode()
{
$this->_codeNode();
$token = $this->_stream->current(true);
$body = $token[1];
$body .= $this->_stream->skipWhitespaces();
$body .= $name = $this->_stream->current();
$body .= $this->_stream->next(['{']);
$this->_states['body'] .= $body;
$node = new BlockDef($body, 'interface');
$node->name = $name;
return $this->_states['current'] = $this->_contextualize($node);
}
/**
* Build a class node.
*/
protected function _classNode()
{
if (substr($this->_states['body'], -2) === '::') { // Bails out on `::class`
$this->_states['body'] .= 'class';
return;
}
$this->_codeNode();
$token = $this->_stream->current(true);
$body = $token[1];
$body .= $this->_stream->skipWhitespaces();
$body .= $name = $this->_stream->current();
if ($name !== '{') {
$body .= $this->_stream->next(['{', T_EXTENDS, T_IMPLEMENTS]);
} else {
$name = '';
}
$token = $this->_stream->current(true);
$extends = '';
$implements = '';
if ($token[0] === T_EXTENDS) {
$body .= $this->_stream->skipWhitespaces();
$body .= $extends = $this->_stream->skipWhile([T_STRING, T_NS_SEPARATOR, $this->_T_NAME_QUALIFIED, $this->_T_NAME_FULLY_QUALIFIED]);
$body .= $this->_stream->current();
if ($this->_stream->current() !== '{') {
$body .= $this->_stream->next('{');
}
} elseif ($token[0] === T_IMPLEMENTS) {
$body .= $implements = $this->_stream->next('{');
$implements = substr($implements, 0, -1);
}
$node = new BlockDef($body, 'class');
$node->name = $name;
$node->extends = $this->_normalizeClass($extends);
$node->implements = $this->_normalizeImplements($implements);
$node->final = !empty($this->_states['visibility']['final']);
$this->_states['body'] .= $body;
return $this->_states['current'] = $this->_contextualize($node);
}
/**
* Normalizes a class name.
*
* @param string $name A class name value.
* @return string The fully namespaced class extends value.
*/
protected function _normalizeClass($name)
{
if (!$name || $name[0] === '\\') {
return $name;
}
if ($this->_states['uses']) {
$tokens = explode('\\', $name, 2);
if (isset($this->_states['uses'][$tokens[0]])) {
$prefix = $this->_states['uses'][$tokens[0]];
return count($tokens) === 2 ? '\\' . $prefix . '\\' . $tokens[1] : '\\' . $prefix;
}
}
$current = $this->_states['current'];
$prefix = '\\';
if ($current->namespace) {
$prefix .= $current->namespace->name . '\\';
}
return $prefix . $name;
}
/**
* Formats an implements string.
*
* @param string $implements The implements string.
* @return array The implements array.
*/
protected function _normalizeImplements($implements)
{
if (!$implements) {
return [];
}
return array_map([$this, '_normalizeClass'], array_map('trim', explode(',', $implements)));
}
/**
* Build a function node.
*/
protected function _functionNode()
{
$node = new FunctionDef();
$token = $this->_stream->current(true);
$parent = $this->_states['current'];
$body = $token[1];
$name = substr($this->_stream->next('('), 0, -1);
$body .= $name;
$node->name = trim($name);
$args = $this->_parseArgs();
$node->args = $args['args'];
$suffix = $this->_stream->next([';', '{', $this->_T_DOUBLE_ARROW]); // use T_DOUBLE_ARROW directly when PHP 7.3 support will be removed.
$body .= $args['body'] . $suffix;
if ($parent) {
$isMethod = $parent->hasMethods;
if ($parent->type === 'interface') {
$node->type = 'signature';
}
} else {
$isMethod = false;
}
$node->isNever = preg_match('~\Wnever\W~', $suffix);
$node->isVoid = preg_match('~\Wvoid\W~', $suffix);
$node->isMethod = $isMethod;
$node->isClosure = !$node->name;
if ($isMethod) {
$node->visibility = $this->_states['visibility'];
$this->_states['visibility'] = [];
}
$node->body = $body;
$this->_codeNode();
$this->_states['body'] = $body;
$this->_contextualize($node);
// Looking for curly brackets only if not an "abstract function"
if ($this->_stream->current() === '{') {
$this->_states['current'] = $node;
}
return $node->function = $node;
}
/**
* Extracting a function/method args array from a stream.
*
* @return array The function/method args array.
*/
protected function _parseArgs()
{
$cpt = 0;
$value = $name = '';
$args = [];
$body = '';
while ($token = $this->_stream->current(true)) {
$body .= $token[1];
switch ($token[0]) {
case '(':
if ($cpt) {
$value .= $token[1];
}
$cpt++;
break;
case '=':
$name = $value;
$value = '';
break;
case ')':
$cpt--;
if ($cpt) {
$value .= $token[1];
break;
}
/* Same behavior as comma */
case ',':
$value = trim($value);
if ($value !== '') {
$name ? $args[trim($name)] = $value : $args[] = $value;
}
$name = $value = '';
break;
default:
$value .= $token[1];
break;
}
if ($token[1] === ')' && $cpt === 0) {
break;
}
$this->_stream->next();
}
return compact('args', 'body');
}
/**
* Build a code node.
*/
protected function _codeNode($type = null, $coverable = false)
{
$body = $this->_states['body'];
if ($body === '') {
return;
}
$node = new NodeDef($body, $type ?: $this->_codeType());
return $this->_contextualize($node, $coverable);
}
/**
* Get code type from context
*
* @return string
*/
protected function _codeType()
{
if ($this->_states['php']) {
return $this->_states['current']->hasMethods ? 'attribute' : 'code';
}
return 'plain';
}
/**
* Build a string node.
*/
protected function _stringNode($delimiter = '', $heredoc = false)
{
$this->_codeNode();
$token = $this->_stream->current(true);
if (!$delimiter) {
$this->_states['body'] = $token[1];
} elseif ($delimiter === '"') {
$this->_states['body'] = $token[1] . $this->_stream->next('"');
} else {
$this->_states['body'] = $token[1] . $this->_stream->nextSequence($delimiter);
}
if ($heredoc) {
$this->_states['body'] .= $this->_stream->next([';']);
}
$node = new NodeDef($this->_states['body'], 'string');
$this->_contextualize($node);
return $node;
}
/**
* Build a attribute node.
*/
protected function _annotationNode()
{
$cpt = 1; //because #[
$this->_codeNode();
$token = $this->_stream->current(true);
$this->_states['body'] = $token[1];
while (($body = $this->_stream->next()) !== null) {
$this->_states['body'] .= $body;
switch ($body[0]) {
case '[':
$cpt++;
break;
case ']':
$cpt--;
break;
}
if ($cpt === 0) {
break;
}
}
$node = new NodeDef($this->_states['body'], 'comment');
return $this->_contextualize($node);
}
/**
* Build a comment node.
*/
protected function _commentNode()
{
$this->_codeNode();
$token = $this->_stream->current(true);
$this->_states['body'] = $token[1];
$node = new NodeDef($this->_states['body'], 'comment');
return $this->_contextualize($node);
}
/**
* Contextualize a node.
*/
protected function _contextualize($node, $coverable = false)
{
$parent = $this->_states['current'];
$node->namespace = $parent->namespace;
$node->function = $parent->function;
$node->parent = $parent;
$node->coverable = $parent->hasMethods ? false : $coverable;
$parent->tree[] = $node;
$this->_assignLines($node);
$node->inPhp = $this->_states['php'];
$this->_states['body'] = '';
return $node;
}
/**
* Adds lines stores for root node.
*
* @param string $content A php file content.
*/
protected function _initLines($content)
{
if (!$this->_states['lines']) {
return;
}
$lines = explode("\n", $content);
$nbLines = count($lines);
if ($this->_states['lines']) {
for ($i = 0; $i < $nbLines; $i++) {
$this->_root->lines['content'][$i] = [
'body' => $lines[$i],
'nodes' => [],
'coverable' => false
];
}
}
}
/**
* Assign the node to some lines and makes them availaible at the root node.
*
* @param object $node The node to match.
* @param string $body The to match.
*/
protected function _assignLines($node)
{
if (!$this->_states['lines']) {
return;
}
$body = $node->body;
$num = $this->_states['num'];
$lines = explode("\n", $body);
$nb = count($lines) - 1;
$this->_states['num'] += $nb;
foreach ($lines as $i => $line) {
$this->_assignLine($num + $i, $node, $line);
}
$node->parent->lines['stop'] = $this->_states['num'] - (trim($lines[$nb]) ? 0 : 1);
}
/**
* Assign a node to a specific line.
*
* @param object $node The node to match.
* @param string $body The to match.
*/
protected function _assignLine($index, $node, $line)
{
if ($node->lines['start'] === null) {
$node->lines['start'] = $index;
}
$node->lines['stop'] = $index;
if (trim($line)) {
$this->_root->lines['content'][$index]['nodes'][] = $node;
}
}
/**
* Assign coverable data to lines.
*/
protected function _assignCoverable()
{
if (!$this->_states['lines']) {
return;
}
foreach ($this->_root->lines['content'] as $index => $value) {
$this->_root->lines['content'][$index]['coverable'] = $this->_isCoverable($index);
}
}
/**
* Checks if a specific line is coverable.
*
* @param integer $index The line to check.
* @return boolean
*/
protected function _isCoverable($index)
{
$coverable = false;
foreach ($this->_root->lines['content'][$index]['nodes'] as $node) {
if ($node->coverable && ($node->lines['stop'] === $index)) {
$coverable = true;
}
}
return $coverable;
}
/**
* Parsing a file content into nested nodes.
*
* @param string $content The php string to parse.
* @param array $config The parse config array.
* @return object The parsed file node.
*/
public static function parse($content, $config = [])
{
$parser = new static($config);
return $parser->_parser($content);
}
/**
* Unparsing a node.
*
* @param mixed A node to unparse.
* @return string the unparsed file.
*/
public static function unparse($node)
{
return (string) $node;
}
/**
* Returns a reader-friendly output for debug purpose.
*
* @param mixed A node or a php string to parse.
* @return string the unparsed file.
*/
public static function debug($content)
{
$root = is_object($content) ? $content : static::parse($content, ['lines' => true]);
$result = '';
$abbr = [
'file' => 'file',
'open' => 'open',
'close' => 'close',
'declare' => 'declare',
'namespace' => 'namespace',
'use' => 'use',
'class' => 'class',
'interface' => 'interface',
'trait' => 'trait',
'function' => 'function',
'signature' => 'signature',
'attribute' => 'a',
'code' => 'c',
'comment' => 'd',
'plain' => 'p',
'string' => 's'
];
foreach ($root->lines['content'] as $num => $content) {
$start = $stop = $line = $num + 1;
$result .= '#' . str_pad($line, 6, ' ');
$types = [];
foreach ($content['nodes'] as $node) {
$types[] = $abbr[$node->type];
$stop = max($stop, $node->lines['stop'] + 1);
}
$result .= $content['coverable'] ? '*' : ' ';
$result .= '[' . str_pad(join(',', $types), 19, ' ', STR_PAD_BOTH) . "]";
$result .= ' ' . str_pad("#{$start} > #{$stop}", 16, ' ') . "|";
$result .= $content['body'] . "\n";
}
return $result;
}
}