regulations/generator/layers/tree_builder.py
import itertools
import logging
class AddQueue(object):
""" Maintain a sorted list of nodes to add. This maintains a sorted queue
of (label, node) tuples. """
def __init__(self):
self.queue = []
def sort(self):
self.queue = sorted(self.queue, key=lambda x: len(x[0]), reverse=True)
def insert(self, item):
self.queue.append(item)
self.sort()
def insert_all(self, items):
self.queue += items
self.sort()
def find(self, label):
found_nodes = [n for n in self.queue if n[0] == label]
if found_nodes:
return found_nodes[0]
def delete(self, label):
node_removed = [n for n in self.queue if n[0] != label]
self.queue = node_removed
def build_label(node):
return '-'.join(node['label'])
def build_tree_hash(tree):
""" Build a hash map of a tree's nodes, so that we don't
have to keep walking the tree. """
tree_hash = {}
def per_node(node):
label_id = build_label(node)
tree_hash[label_id] = node
for c in node['children']:
per_node(c)
if tree:
per_node(tree)
return tree_hash
def parent_label(node):
"""This is not perfect. It can not handle children of subparts, for
example"""
if node['node_type'].upper() == 'INTERP':
interpreting = list(itertools.takewhile(
lambda l: l != 'Interp', node['label']))
paragraph = node['label'][len(interpreting)+1:]
if paragraph:
return interpreting + ['Interp'] + paragraph[:-1]
elif len(interpreting) == 1: # Root of interpretations
return interpreting
else:
return interpreting[:-1] + ['Interp']
else:
return node['label'][:-1]
def parent_in_tree(parent_label, tree_hash):
""" Return True if the parent of node_label is in the tree """
return parent_label in tree_hash
def add_node_to_tree(node, parent_label, tree_hash):
""" Add the node to the tree by adding it to it's parent in order. """
parent_node = tree_hash[parent_label]
add_child(parent_node, node)
return tree_hash
def roman_nums():
"""Generator for roman numerals."""
mapping = [
(1, 'i'), (4, 'iv'), (5, 'v'), (9, 'ix'),
(10, 'x'), (40, 'xl'), (50, 'l'), (90, 'xc'),
(100, 'c'), (400, 'cd'), (500, 'd'), (900, 'cm'),
(1000, 'm')
]
i = 1
while True:
next_str = ''
remaining_int = i
remaining_mapping = list(mapping)
while remaining_mapping:
(amount, chars) = remaining_mapping.pop()
while remaining_int >= amount:
next_str += chars
remaining_int -= amount
yield next_str
i += 1
def make_label_sortable(label, roman=False):
""" Make labels sortable, but converting them as appropriate.
Also, appendices have labels that look like 30(a), we make those
appropriately sortable. """
if label.isdigit():
return (int(label),)
if roman:
romans = list(itertools.islice(roman_nums(), 0, 50))
return (1 + romans.index(label),)
# segment the label piece into component parts
# e.g. 45Ai33b becomes (45, 'A', 'i', 33, 'b')
INT, UPPER, LOWER = 1, 2, 3
segments, segment, seg_type = [], "", None
for ch in label:
if ch.isdigit():
ch_type = INT
elif ch.isalpha() and ch == ch.upper():
ch_type = UPPER
elif ch.isalpha() and ch == ch.lower():
ch_type = LOWER
else:
# other character, e.g. parens, guarantee segmentation
ch_type = None
if ch_type != seg_type and segment: # new type of character
segments.append(segment)
segment = ""
seg_type = ch_type
if ch_type:
segment += ch
if segment: # ended with something other than a paren
segments.append(segment)
segments = [int(seg) if seg.isdigit() else seg for seg in segments]
return tuple(segments)
def all_children_are_roman(parent_node):
"""
Return true if all the children of the parent node have roman labels
"""
romans = list(itertools.islice(roman_nums(), 0, 50))
roman_children = [c['label'][-1] in romans
for c in parent_node['children']]
return len(roman_children) > 0 and all(roman_children)
# @todo - this function is _very_ similar to one in regparser.notice.compiler.
# Can it be removed or pulled into a shared library?
def add_child(parent_node, node):
"Add a child node to a parent, maintaining the order of the children."
children = parent_node['children']
children.append(node)
child_labels = set('-'.join(c['label']) for c in children)
order = parent_node.get('child_labels', [])
if child_labels.issubset(set(order)):
lookup = {'-'.join(c['label']): c for c in children}
parent_node['children'] = [lookup[label_id] for label_id in order
if label_id in child_labels]
else: # Explicit sort order not present/doesn't match nodes
logging.warning(
"No child_labels field. Guessing at child order (probably wrong)")
for c in parent_node['children']:
if c['node_type'].upper() == 'INTERP':
if c['label'][-1] == 'Interp':
sortable = make_label_sortable(
c['label'][-2], roman=(len(c['label']) == 6))
else:
paragraph = list(itertools.dropwhile(
lambda l: l != 'Interp', c['label']))[1:]
sortable = make_label_sortable(
paragraph[-1], roman=(len(paragraph) == 2))
if len(parent_node['label']) == 2:
# Highest interpretation node in the land
p = len(list(itertools.takewhile(lambda l: l != 'Interp',
c['label'])))
prefix_length = (p, )
sortable = prefix_length + sortable
c['sortable'] = sortable
elif c['node_type'].upper() == 'APPENDIX':
roman_children = all_children_are_roman(parent_node)
c['sortable'] = make_label_sortable(c['label'][-1],
roman=roman_children)
else:
c['sortable'] = make_label_sortable(
c['label'][-1], roman=(len(c['label']) == 5))
parent_node['children'].sort(key=lambda x: x['sortable'])