tempy/t.py
# -*- coding: utf-8 -*-
# @author: Federico Cerchiari <federicocerchiari@gmail.com>
import importlib
from html.parser import HTMLParser
from mistune import Markdown
# Internal imports
from .markdown import TempyMarkdownRenderer
from .elements import Tag, VoidTag
class TempyParser(HTMLParser):
"""Custom parser used to translate an html into Tempy Tags.
See https://docs.python.org/3/library/html.parser.html for details on how parsing is performed.
Every tag found by the parser is converted into a TempyTag, every subsequent element will be added
inside this one.
As a result of this, unclosed tags in imput will be closed in the resulting Tempy Tree right before
the parent element is closed.
This behavior is accidental and should not be used as an HTML sanitizing feature.
"""
def __init__(self):
super().__init__()
self.unknown_tag_maker = TempyFactory()
self.tempy_tags = importlib.import_module(".tags", package="tempy")
self._reset()
def _reset(self):
self.result = []
self.current_tag = None
return self
def _make_tempy_tag(self, tag, attrs, void):
"""Searches in tempy.tags for the correct tag to use, if does not exists uses the TempyFactory to
create a custom tag."""
tempy_tag_cls = getattr(self.tempy_tags, tag.title(), None)
if not tempy_tag_cls:
unknow_maker = [self.unknown_tag_maker, self.unknown_tag_maker.Void][void]
tempy_tag_cls = unknow_maker[tag]
attrs = {Tag._TO_SPECIALS.get(k, k): v or True for k, v in attrs}
tempy_tag = tempy_tag_cls(**attrs)
if not self.current_tag:
self.result.append(tempy_tag)
if not void:
self.current_tag = tempy_tag
else:
if not tempy_tag._void:
self.current_tag(tempy_tag)
self.current_tag = self.current_tag.childs[-1]
def handle_starttag(self, tag, attrs):
self._make_tempy_tag(tag, attrs, False)
def handle_startendtag(self, tag, attrs):
self._make_tempy_tag(tag, attrs, True)
def handle_endtag(self, tag):
self.current_tag = self.current_tag.parent
def handle_data(self, data):
if self.current_tag and data.strip():
self.current_tag(data)
else:
self.result.append(data)
def handle_comment(self, data):
pass
def handle_decl(self, decl):
pass
class TempyFactory:
def __init__(self, void_maker=False):
self._void = void_maker
if not self._void:
self.Void = TempyFactory(void_maker=True)
def make_tempy(self, tage_name):
base_class = VoidTag if self._void else Tag
return type(
tage_name,
(base_class, ),
{"_%s__tag" % tage_name: tage_name.lower(), "_from_factory": True},
)
def __getattribute__(self, attr):
try:
return object.__getattribute__(self, attr)
except AttributeError:
return self.make_tempy(attr)
def __getitem__(self, key):
tag = self.make_tempy(key)
return tag
class TempyGod(TempyFactory):
def __init__(self):
super().__init__()
self._html_parser = TempyParser()
self._markdown_parser = Markdown(renderer=TempyMarkdownRenderer())
def from_string(self, html_string):
"""Parses an html string and returns a list of Tempy trees."""
self._html_parser._reset().feed(html_string)
return self._html_parser.result
def dump_string(self, html_string, filename, pretty=False):
tempy_trees = self.from_string(html_string)
self.dump(tempy_trees, filename, pretty=pretty)
def dump(self, tempy_tree_list, filename, pretty=False):
"""Dumps a Tempy object to a python file"""
if not filename:
raise ValueError('"filename" argument should not be none.')
if len(filename.split(".")) > 1 and not filename.endswith(".py"):
raise ValueError(
'"filename" argument should have a .py extension, if given.'
)
if not filename.endswith(".py"):
filename += ".py"
with open(filename, "w") as f:
f.write(
"# -*- coding: utf-8 -*-\nfrom tempy import T\nfrom tempy.tags import *\n"
)
for tempy_tree in tempy_tree_list:
f.write(tempy_tree.to_code(pretty=pretty))
return filename
def from_markdown(self, markdown_string):
return self._markdown_parser(markdown_string)
T = TempyGod()