View on GitHub


35 mins
Test Coverage
import salve

from salve.context import ExecutionContext
from salve.exceptions import ParsingException, BlockException
from salve.block import identifier
from salve.parser.tokenize import Token, tokenize_stream

def check_for_unexpected_token(token):
    Checks the current token to see if it matches the currently expected types.
    If it doesn't, raise a ParsingException with a nicely formatted message.

        The token to check against expected types
    expected = ExecutionContext()['parsing']['expected_token_types']

    # if the token is unexpected, throw an exception and fail
    if token.ty not in expected:
        raise ParsingException(
            'Invalid token.' +
            'Expected a token of types ' + str(expected) +
            ' but got token ' + token.value + ' of type ' + token.ty +
            ' instead.', token.file_context)

def check_parsing_end_state():
    Validates that the parsing state is a safe one for the parser to exit.
    For example, if we're in the middle of a block and the parser attempts to
    exit, this is an error condition and a ParsingException should be raised.
    parsing_context = ExecutionContext()['parsing']
    # if the token list terminates and there is still a block in
    # progress or a TEMPLATE could come next, it means that the block was not
    # teminated properly
    if (parsing_context['in_block'] or
            Token.types.TEMPLATE in parsing_context['expected_token_types']):
        # this PE carries no token because it is the absence of a token
        # that triggers it
        raise ParsingException('Incomplete block in token stream!',

def handle_token_noblock(token):
    Handler for parsing a token outside of a block context. (i.e. not in curly

    Looking either for an ID, a primary block attr, or a block start

        The current token being parsed
    parsing_context = ExecutionContext()['parsing']
    # if the token is an identifier found outside of a block, it is the
    # beginning of a new block
    if token.ty == Token.types.IDENTIFIER:
            current_block = identifier.block_from_identifier(token)
            parsing_context['current_block'] = current_block
        except BlockException:
            raise ParsingException('Invalid block id ' +
                                   token.value, token.file_context)
        parsing_context['expected_token_types'] = [
            Token.types.BLOCK_START, Token.types.TEMPLATE]

    # not in a block, and not an identifier, so look for a primary attr or {
        # token.ty not in (BLOCK_END, IDENTIFIER)
        # if the token is a block start, set in_block
        if token.ty == Token.types.BLOCK_START:
            parsing_context['in_block'] = True
            parsing_context['expected_token_types'] = [Token.types.BLOCK_END,
        # if the token is a template string, assign it to the
        # primary attr
        elif token.ty == Token.types.TEMPLATE:
            parsing_context['expected_token_types'] = [Token.types.BLOCK_START,
            current_block = parsing_context['current_block']
            current_block[current_block.primary_attr] = token.value

def handle_token_in_block(token):
    Handler for parsing a token inside of a block context. (i.e. in curly
    Looks for a block attribute, an attribute value, or a block end.

        The current token being parsed
    # look for block attribute,value pairs, or }
    parsing_context = ExecutionContext()['parsing']

    # if the token is a block end, set current_block to None and
    # set state to not be in block
    if token.ty == Token.types.BLOCK_END:
        parsing_context['in_block'] = False
        parsing_context['expected_token_types'] = [Token.types.IDENTIFIER]
    # if the token is an identifier, it is the name of an attr
    # (because we're in a block)
    elif token.ty == Token.types.IDENTIFIER:
        parsing_context['current_attr'] = token.value.lower()
        parsing_context['expected_token_types'] = [Token.types.TEMPLATE]
    # if the token is a template string, assign it to the
    # current attr
    elif token.ty == Token.types.TEMPLATE:
        parsing_context['expected_token_types'] = [Token.types.BLOCK_END,
            parsing_context['current_attr']] = token.value
        parsing_context['current_attr'] = None

def handle_token(token):
    Handles the parsing of a single token.
    Overall, takes the following two steps:
    - Check that the token is one of the expected types
    - Check if parsing is in the middle of building a block, and hand the token
      to the appropriate handler

        The token from tokenization being parsed

    # handling looks very different depending on whether or not we're in a
    # block
    if not ExecutionContext()['parsing']['in_block']:

def parse_tokens(tokens):
    Converts a token list to a block list.
    This is not entirely stateless, but unlike the tokenizer,
    there are no explicit states.

        An iterable (generally a list) of Tokens to parse into Blocks.
        Unordered iterables won't work here, as parsing is very
        sensitive to token ordering.
    """'Beginning Parse of Token Stream')

    # use the exec context to store parsing state as a dict
    ExecutionContext()['parsing'] = {}
    parsing_context = ExecutionContext()['parsing']

    parsing_context['blocks'] = []

    # track the expected next token(s)
    parsing_context['expected_token_types'] = [Token.types.IDENTIFIER]
    # tracks whether or not parsing is inside of a "{" "}" delimited block
    parsing_context['in_block'] = False

    # the current_block and current_attr are used to build blocks
    # before they are appended to the blocks list
    parsing_context['current_block'] = None
    parsing_context['current_attr'] = None

    for token in tokens:

    check_parsing_end_state()'Finished Parsing Token Stream')

    return parsing_context['blocks']

def parse_stream(stream):
    Parse a stream or file object into blocks.

        any file-like object that supports read() or readlines()
        Parsing a stream is just tokenizing it, and then handing those
        tokens to the parser.
    return parse_tokens(tokenize_stream(stream))