Skip to content

Commit

Permalink
Merge pull request eregs#178 from khandelwal/regulation_compilation
Browse files Browse the repository at this point in the history
Regulation compilation: Z notice changes
  • Loading branch information
cmc333333 committed Jan 21, 2014
2 parents 45e6ab1 + e036234 commit b54182d
Show file tree
Hide file tree
Showing 8 changed files with 226 additions and 43 deletions.
2 changes: 1 addition & 1 deletion regparser/grammar/amdpar.py
Original file line number Diff line number Diff line change
Expand Up @@ -217,7 +217,7 @@ def curried(match=None):
if p) + ')']))

multiple_paragraphs = (
atomic.paragraphs_marker
(atomic.paragraphs_marker | atomic.paragraph_marker)
+ make_multiple(unified.any_depth_p)
).setParseAction(make_par_list(lambda m: [m.part, None, m.section,
m.p1, m.p2, m.p3, m.p4, m.p5]))
Expand Down
52 changes: 39 additions & 13 deletions regparser/notice/changes.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,26 @@
from collections import defaultdict

from regparser.tree import struct
from regparser.tree.paragraph import p_levels
from regparser.diff.treediff import node_to_dict


def bad_label(node):
""" Look through a node label, and return True if it's a badly formed
label. We can do this because we know what type of character should up at
what point in the label. """

if node.node_type == struct.Node.REGTEXT:
for i, l in enumerate(node.label):
if i == 0 and not l.isdigit():
return True
elif i == 1 and not l.isdigit():
return True
elif i > 1 and l not in p_levels[i-2]:
return True
return False


def find_candidate(root, label_last):
"""
Look through the tree for a node that has the same paragraph marker as
Expand All @@ -19,28 +36,37 @@ def find_candidate(root, label_last):
markers.
"""
def check(node):
""" Match last part of label, and no children. """
if node.label[-1] == label_last and node.children == []:
return node

response = struct.walk(root, check)
if len(response) > 1:
# If there are multiple choices, look for one where the label might
# be obviously broken
bad_labels = [n for n in response if bad_label(n)]
if len(bad_labels) == 1:
return bad_labels

return response


def resolve_candidates(amend_map, warn=True):
"""Ensure candidate isn't actually accounted for elsewhere, and fix
it's label. """

for label, node in amend_map.items():
if 'node' in node:
node_label = node['node'].label_id()
if node['candidate']:
if node_label not in amend_map:
node['node'].label = label.split('-')
else:
del amend_map[label]
if warn:
mesg = 'Unable to match amendment'
mesg += ' to change for: %s ' % label
logging.warning(mesg)
for label, nodes in amend_map.items():
for node in nodes:
if 'node' in node:
node_label = node['node'].label_id()
if node['candidate']:
if node_label not in amend_map:
node['node'].label = label.split('-')
else:
del amend_map[label]
if warn:
mesg = 'Unable to match amendment'
mesg += ' to change for: %s ' % label
logging.warning(mesg)


def find_misparsed_node(section_node, label, change):
Expand Down
15 changes: 15 additions & 0 deletions regparser/notice/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import copy
import itertools
import re
import logging
from regparser.tree.struct import Node, find
from regparser.utils import roman_nums

Expand Down Expand Up @@ -117,13 +118,27 @@ def replace_node_and_subtree(self, node):
other_children = [c for c in parent.children if c.label != node.label]
parent.children = self.add_child(other_children, node)

def create_empty_node(self, node_label):
""" In rare cases, we need to flush out the tree by adding
an empty node. """
node_label = node_label.split('-')
node = Node('', [], node_label, None, Node.REGTEXT)
parent = self.get_parent(node)
parent.children = self.add_child(parent.children, node)
return parent

def add_node(self, node):
""" Add an entirely new node to the regulation tree. """

if node.node_type == Node.SUBPART:
return self.add_to_root(node)

parent = self.get_parent(node)
if parent is None:
# This is a corner case, where we're trying to add a child
# to a parent that should exist.
logging.warning('No existing parent for: %s' % node.label_id())
parent = self.create_empty_node(get_parent_label(node))
parent.children = self.add_child(parent.children, node)

def add_section(self, node, subpart_label):
Expand Down
60 changes: 56 additions & 4 deletions regparser/notice/diff.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,9 +41,30 @@ def fix_section_node(paragraphs, amdpar_xml):
return section


def find_lost_section(amdpar_xml):
""" This amdpar doesn't have any following siblings, so we
look in the next regtext """
reg_text = amdpar_xml.getparent()
reg_text_siblings = [s for s in reg_text.itersiblings()
if s.tag == 'REGTEXT']
if len(reg_text_siblings) > 0:
candidate_reg_text = reg_text_siblings[0]
amdpars = [a for a in candidate_reg_text if a.tag == 'AMDPAR']
if len(amdpars) == 0:
#Only do this if there are not AMDPARS
for c in candidate_reg_text:
if c.tag == 'SECTION':
return c


def find_section(amdpar_xml):
""" With an AMDPAR xml, return the first section
sibling """
siblings = [s for s in amdpar_xml.itersiblings()]

if len(siblings) == 0:
return find_lost_section(amdpar_xml)

section = None
for sibling in amdpar_xml.itersiblings():
if sibling.tag == 'SECTION':
Expand All @@ -56,9 +77,9 @@ def find_section(amdpar_xml):
return section


def find_subpart(amdpar):
def find_subpart(amdpar_tag):
""" Look amongst an amdpar tag's siblings to find a subpart. """
for sibling in amdpar.itersiblings():
for sibling in amdpar_tag.itersiblings():
if sibling.tag == 'SUBPART':
return sibling

Expand Down Expand Up @@ -89,7 +110,7 @@ def switch_context(token_list, carried_context):
CFR part changes, empty out the context that we carry forward. """

def is_valid_label(label):
return (label and label[0] is not None)
return label and label[0] is not None

if carried_context and carried_context[0] is not None:
token_list = [t for t in token_list if not isinstance(t, tokens.Verb)]
Expand All @@ -102,13 +123,45 @@ def is_valid_label(label):
return carried_context


def contains_one_instance(tokenized, element):
""" Return True if tokenized contains only one instance of the class
element. """
contexts = [t for t in tokenized if isinstance(t, element)]
return len(contexts) == 1


def contains_one_paragraph(tokenized):
""" Returns True if tokenized contains only one tokens.Paragraph """
return contains_one_instance(tokenized, tokens.Paragraph)


def contains_delete(tokenized):
""" Returns True if tokenized contains at least one DELETE. """
contexts = [t for t in tokenized
if isinstance(t, tokens.Verb) and t.verb == 'DELETE']
return len(contexts) > 0


def remove_false_deletes(tokenized, text):
""" Sometimes a statement like 'Removing the 'x' from the end of
paragraph can be confused as removing the paragraph. Ensure that
doesn't happen here. Likely this method needs a little more work. """

if contains_delete(tokenized):
if contains_one_paragraph(tokenized):
if 'end of paragraph' in text:
return []
return tokenized


def parse_amdpar(par, initial_context):
""" Parse the <AMDPAR> tags into a list of paragraphs that have changed.
"""

text = etree.tostring(par, encoding=unicode)
tokenized = [t[0] for t, _, _ in amdpar.token_patterns.scanString(text)]

tokenized = remove_false_deletes(tokenized, text)
tokenized = switch_passive(tokenized)
tokenized, subpart = deal_with_subpart_adds(tokenized)
tokenized = context_to_paragraph(tokenized)
Expand Down Expand Up @@ -175,7 +228,6 @@ def is_designate_token(token):
def contains_one_designate_token(tokenized):
""" Return True if the list of tokens contains only one designate token.
"""

designate_tokens = [t for t in tokenized if is_designate_token(t)]
return len(designate_tokens) == 1

Expand Down
20 changes: 20 additions & 0 deletions tests/grammar_amdpar_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -270,3 +270,23 @@ def test_example_19(self):

v = tokens.Paragraph([None, None, None, 'a', '3', 'v'])
self.assertTrue(v in second_token_list)

def test_example_20(self):
text = "Section 105.32 is amended by"
text += " adding paragraph (b)(3) through (6)"

result = [m[0] for m,_,_ in token_patterns.scanString(text)]
result = [l for l in result if isinstance(l, tokens.TokenList)]
token_list = result[0]

b3 = tokens.Paragraph([None, None, None, 'b', '3'])
self.assertTrue(b3 in token_list)

b4 = tokens.Paragraph([None, None, None, 'b', '4'])
self.assertTrue(b4 in token_list)

b5 = tokens.Paragraph([None, None, None, 'b', '5'])
self.assertTrue(b5 in token_list)

b6 = tokens.Paragraph([None, None, None, None, '6'])
self.assertTrue(b6 in token_list)
54 changes: 29 additions & 25 deletions tests/notice_changes_tests.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
#vim: set encoding=utf-8
from unittest import TestCase
from regparser.notice import changes
from regparser.tree.struct import Node
from regparser.tree.struct import Node, find
from regparser.notice.diff import Amendment


Expand All @@ -23,6 +23,13 @@ def test_find_candidate(self):
result = changes.find_candidate(root, 'i')[0]
self.assertEqual(u'n1i', result.text)

n2c = Node('n3c', label=['200', '2', 'i', 'i'])
n2 = find(root, '200-2')
n2.children = [n2c]

result = changes.find_candidate(root, 'i')[0]
self.assertEqual(result.label, ['200', '2', 'i', 'i'])

def test_not_find_candidate(self):
root = self.build_tree()
result = changes.find_candidate(root, 'j')
Expand All @@ -39,23 +46,6 @@ def test_find_misparsed_node(self):
self.assertTrue(result['candidate'])
self.assertEqual(result['node'], n2)

def test_too_many_candidates(self):
n1 = Node('n1', label=['200', '1'])
n2 = Node('n1i', label=['200', 1, 'i'])
n3 = Node('n2', label=['200', '2'])
n4 = Node('n3', label=['200', '3'])
n5 = Node('n3a', label=['200', '3', 'a'])

n6 = Node('n1ai', label=['200', '1', 'a', 'i'])

n1.children = [n6, n2]
n4.children = [n5]
root = Node('root', label=['200'], children=[n1, n3, n4])

result = {}
result = changes.find_misparsed_node(root, 'i', result)
self.assertEqual(None, result)

def test_create_add_amendment(self):
root = self.build_tree()

Expand Down Expand Up @@ -93,31 +83,31 @@ def test_resolve_candidates(self):
amend_map = {}

n1 = Node('n1', label=['200', '1'])
amend_map['200-1-a'] = {'node': n1, 'candidate': False}
amend_map['200-1-a'] = [{'node': n1, 'candidate': False}]

n2 = Node('n2', label=['200', '2', 'i'])
amend_map['200-2-a-i'] = {'node': n2, 'candidate': True}
amend_map['200-2-a-i'] = [{'node': n2, 'candidate': True}]

self.assertNotEqual(
amend_map['200-2-a-i']['node'].label_id(),
amend_map['200-2-a-i'][0]['node'].label_id(),
'200-2-a-i')

changes.resolve_candidates(amend_map)

self.assertEqual(
amend_map['200-2-a-i']['node'].label_id(),
amend_map['200-2-a-i'][0]['node'].label_id(),
'200-2-a-i')

def test_resolve_candidates_accounted_for(self):
amend_map = {}

n1 = Node('n1', label=['200', '1'])
amend_map['200-1-a'] = {'node': n1, 'candidate': False}
amend_map['200-1-a'] = [{'node': n1, 'candidate': False}]

n2 = Node('n2', label=['200', '2', 'i'])

amend_map['200-2-a-i'] = {'node': n2, 'candidate': True}
amend_map['200-2-i'] = {'node': n2, 'candidate': False}
amend_map['200-2-a-i'] = [{'node': n2, 'candidate': True}]
amend_map['200-2-i'] = [{'node': n2, 'candidate': False}]

changes.resolve_candidates(amend_map, warn=False)
self.assertEqual(2, len(amend_map.keys()))
Expand Down Expand Up @@ -173,3 +163,17 @@ def test_match_labels_and_changes_candidate(self):
self.assertTrue(amend_map['200-2-a-1-i'][0]['candidate'])
self.assertTrue(
amend_map['200-2-a-1-i'][0]['node'].label_id(), '200-2-a-1-i')

def test_bad_label(self):
label = ['205', '4', 'a', '1', 'ii', 'A']
node = Node('text', label=label, node_type=Node.REGTEXT)
self.assertFalse(changes.bad_label(node))

node.label = ['205', '38', 'i', 'vii', 'A']
self.assertTrue(changes.bad_label(node))

node.label = ['205', 'ii']
self.assertTrue(changes.bad_label(node))

node.label = ['205', '38', 'A', 'vii', 'A']
self.assertTrue(changes.bad_label(node))
20 changes: 20 additions & 0 deletions tests/notice_compiler_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -391,3 +391,23 @@ def test_get_parent(self):
node = find(reg_tree.tree, '205-2-a')
parent = reg_tree.get_parent(node)
self.assertEqual(parent.label, ['205', '2'])

def test_create_empty(self):
root = self.tree_with_paragraphs()
reg_tree = compiler.RegulationTree(root)
reg_tree.create_empty_node('205-4-a')

node = find(reg_tree.tree, '205-4-a')
self.assertNotEqual(None, node)
self.assertEqual(node.label, ['205', '4', 'a'])

def test_add_node_no_parent(self):
root = self.tree_with_paragraphs()
reg_tree = compiler.RegulationTree(root)

node = Node('', label=['205', '3', 'a'], node_type=Node.REGTEXT)
reg_tree.add_node(node)

parent = find(reg_tree.tree, '205-3')
self.assertNotEqual(None, parent)
self.assertEqual(parent.text, '')
Loading

0 comments on commit b54182d

Please sign in to comment.