sirosen/SALVE

View on GitHub
salve/parser/parse.py

Summary

Maintainability
A
35 mins
Test Coverage
import salve

from salve.context import ExecutionContext
from salve.exceptions import ParsingException, BlockException
from salve.block import identifier
from salve.parser.tokenize import Token, tokenize_stream


def check_for_unexpected_token(token):
    """
    Checks the current token to see if it matches the currently expected types.
    If it doesn't, raise a ParsingException with a nicely formatted message.

    Args:
        @token
        The token to check against expected types
    """
    expected = ExecutionContext()['parsing']['expected_token_types']

    # if the token is unexpected, throw an exception and fail
    if token.ty not in expected:
        raise ParsingException(
            'Invalid token.' +
            'Expected a token of types ' + str(expected) +
            ' but got token ' + token.value + ' of type ' + token.ty +
            ' instead.', token.file_context)


def check_parsing_end_state():
    """
    Validates that the parsing state is a safe one for the parser to exit.
    For example, if we're in the middle of a block and the parser attempts to
    exit, this is an error condition and a ParsingException should be raised.
    """
    parsing_context = ExecutionContext()['parsing']
    # if the token list terminates and there is still a block in
    # progress or a TEMPLATE could come next, it means that the block was not
    # teminated properly
    if (parsing_context['in_block'] or
            Token.types.TEMPLATE in parsing_context['expected_token_types']):
        # this PE carries no token because it is the absence of a token
        # that triggers it
        raise ParsingException('Incomplete block in token stream!',
                               parsing_context['current_block'].file_context)


def handle_token_noblock(token):
    """
    Handler for parsing a token outside of a block context. (i.e. not in curly
    braces)

    Looking either for an ID, a primary block attr, or a block start

    Args:
        @token
        The current token being parsed
    """
    parsing_context = ExecutionContext()['parsing']
    # if the token is an identifier found outside of a block, it is the
    # beginning of a new block
    if token.ty == Token.types.IDENTIFIER:
        try:
            current_block = identifier.block_from_identifier(token)
            parsing_context['blocks'].append(current_block)
            parsing_context['current_block'] = current_block
        except BlockException:
            raise ParsingException('Invalid block id ' +
                                   token.value, token.file_context)
        parsing_context['expected_token_types'] = [
            Token.types.BLOCK_START, Token.types.TEMPLATE]

    # not in a block, and not an identifier, so look for a primary attr or {
    else:
        # token.ty not in (BLOCK_END, IDENTIFIER)
        # if the token is a block start, set in_block
        if token.ty == Token.types.BLOCK_START:
            parsing_context['in_block'] = True
            parsing_context['expected_token_types'] = [Token.types.BLOCK_END,
                                                       Token.types.IDENTIFIER]
        # if the token is a template string, assign it to the
        # primary attr
        elif token.ty == Token.types.TEMPLATE:
            parsing_context['expected_token_types'] = [Token.types.BLOCK_START,
                                                       Token.types.IDENTIFIER]
            current_block = parsing_context['current_block']
            current_block[current_block.primary_attr] = token.value


def handle_token_in_block(token):
    """
    Handler for parsing a token inside of a block context. (i.e. in curly
    braces)
    Looks for a block attribute, an attribute value, or a block end.

    Args:
        @token
        The current token being parsed
    """
    # look for block attribute,value pairs, or }
    parsing_context = ExecutionContext()['parsing']

    # if the token is a block end, set current_block to None and
    # set state to not be in block
    if token.ty == Token.types.BLOCK_END:
        parsing_context['in_block'] = False
        parsing_context['expected_token_types'] = [Token.types.IDENTIFIER]
    # if the token is an identifier, it is the name of an attr
    # (because we're in a block)
    elif token.ty == Token.types.IDENTIFIER:
        parsing_context['current_attr'] = token.value.lower()
        parsing_context['expected_token_types'] = [Token.types.TEMPLATE]
    # if the token is a template string, assign it to the
    # current attr
    elif token.ty == Token.types.TEMPLATE:
        parsing_context['expected_token_types'] = [Token.types.BLOCK_END,
                                                   Token.types.IDENTIFIER]
        parsing_context['current_block'][
            parsing_context['current_attr']] = token.value
        parsing_context['current_attr'] = None


def handle_token(token):
    """
    Handles the parsing of a single token.
    Overall, takes the following two steps:
    - Check that the token is one of the expected types
    - Check if parsing is in the middle of building a block, and hand the token
      to the appropriate handler

    Args:
        @token
        The token from tokenization being parsed
    """
    check_for_unexpected_token(token)

    # handling looks very different depending on whether or not we're in a
    # block
    if not ExecutionContext()['parsing']['in_block']:
        handle_token_noblock(token)
    else:
        handle_token_in_block(token)


def parse_tokens(tokens):
    """
    Converts a token list to a block list.
    This is not entirely stateless, but unlike the tokenizer,
    there are no explicit states.

    Args:
        @tokens
        An iterable (generally a list) of Tokens to parse into Blocks.
        Unordered iterables won't work here, as parsing is very
        sensitive to token ordering.
    """
    salve.logger.info('Beginning Parse of Token Stream')

    # use the exec context to store parsing state as a dict
    ExecutionContext()['parsing'] = {}
    parsing_context = ExecutionContext()['parsing']

    parsing_context['blocks'] = []

    # track the expected next token(s)
    parsing_context['expected_token_types'] = [Token.types.IDENTIFIER]
    # tracks whether or not parsing is inside of a "{" "}" delimited block
    parsing_context['in_block'] = False

    # the current_block and current_attr are used to build blocks
    # before they are appended to the blocks list
    parsing_context['current_block'] = None
    parsing_context['current_attr'] = None

    for token in tokens:
        handle_token(token)

    check_parsing_end_state()

    salve.logger.info('Finished Parsing Token Stream')

    return parsing_context['blocks']


def parse_stream(stream):
    """
    Parse a stream or file object into blocks.

    Args:
        @stream
        any file-like object that supports read() or readlines()
        Parsing a stream is just tokenizing it, and then handing those
        tokens to the parser.
    """
    return parse_tokens(tokenize_stream(stream))