giganticode/codeprep

View on GitHub
codeprep/parse/matchers.py

Summary

Maintainability
A
1 hr
Test Coverage
# SPDX-FileCopyrightText: 2020 Hlib Babii <hlibbabii@gmail.com>
#
# SPDX-License-Identifier: Apache-2.0

from typing import List, Union

from pygments.token import Token

from codeprep.parse.subtokens import split_into_words, split_string
from codeprep.tokentypes.containers import StringLiteral, OneLineComment, MultilineComment
from codeprep.tokentypes.numeric import Number, Zero, One
from codeprep.tokentypes.rootclasses import ParsedToken
from codeprep.tokentypes.whitespace import NewLine, Tab
from codeprep.tokentypes.word import KeyWord, Operator, Semicolon, OpeningCurlyBracket, ClosingCurlyBracket, \
    OpeningBracket, \
    ClosingBracket, StringLiteralQuote

# TODO these classes should return lists or not lists


class DefaultMatcher(object):
    def match(self, token, value: str) -> bool:
        return True

    def transform(self, value: str) -> List[ParsedToken]:
        return split_into_words(value)


class GenericTokenMatcher(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Generic

    def transform(self, value: str) -> List[ParsedToken]:
        return split_into_words(value)


class StringMatcher(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Literal.String

    def transform(self, value: str) -> Union[StringLiteral, StringLiteralQuote]:
        if value in ["'", '"', '"""', "'''"]:
            return StringLiteralQuote(value)
        else:
            return StringLiteral(split_string(value), len(value))


class OneLineCommentMatcher(object):
    def match(self, token, value: str) -> bool:
        return token is Token.Comment.Single

    def transform(self, value: str) -> OneLineComment:
        return OneLineComment(split_into_words(value))


class MultiLineLineCommentMatcher(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Comment and not token is Token.Comment.Single

    def transform(self, value: str) -> MultilineComment:
        return MultilineComment(split_into_words(value))


class WordMatcher(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Name

    def transform(self, value: str) -> List[ParsedToken]:
        return split_into_words(value)


class GenericLiteralMatcher(object):
    def match(self, token, value: str) -> bool:
        return token is Token.Literal or token is Token.Literal.Date

    def transform(self, value: str) -> List[ParsedToken]:
        return split_into_words(value)


class KeywordMatcher(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Keyword

    def transform(self, value: str) -> KeyWord:
        return KeyWord(value)


class NewLineMatcher(object):
    def match(self, token, value: str) -> bool:
        return value == '\n'

    def transform(self, value: str) -> NewLine:
        return NewLine()


class WhitespaceMatcher(object):
    def match(self, token, value: str) -> bool:
        return value.strip() == ''

    def transform(self, value: str) -> List[Tab]:
        return [Tab()] * (len(value) // 4)


class TabMatcher(object):
    def match(self, token, value: str) -> bool:
        return value == '\t'

    def transform(self, value: str) -> Tab:
        return Tab()


class NumberMatchers(object):
    def match(self, token, value: str) -> bool:
        return token in Token.Literal.Number

    def transform(self, value: str) -> Number:
        if value == '0':
            return Zero()
        elif value == '1':
            return One()
        else:
            return Number(value)


class OperatorMatcher(object):
    def match(self, token, value: str):
        return token is Token.Operator or token in Token.Punctuation

    def transform(self, value: str) -> Operator:
        if value == ';':
            return Semicolon()
        elif value == '{':
            return OpeningCurlyBracket()
        elif value == '}':
            return ClosingCurlyBracket()
        elif value == '(':
            return OpeningBracket()
        elif value == ')':
            return ClosingBracket()
        else:
            return Operator(value)


class WordOperatorMatcher(object):
    def match(self, token, value: str):
        return token is Token.Operator.Word

    def transform(self, value: str) -> List[ParsedToken]:
        return split_into_words(value)