Skip to content

Commit

Permalink
Merge pull request eregs#341 from cmc333333/seperate-interp-1
Browse files Browse the repository at this point in the history
Separate interpretations, phase 1
  • Loading branch information
cmc333333 authored Dec 27, 2016
2 parents 0d1c784 + 21b9827 commit 2ab5bdd
Show file tree
Hide file tree
Showing 11 changed files with 211 additions and 178 deletions.
22 changes: 20 additions & 2 deletions docs/extension_points.rst
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@ of the process for more information about the plugin system in general. Here
we document specific extension points an example uses.


eregs_ns.parser.layers
======================
eregs_ns.parser.layers (deprecated)
===================================

List of strings referencing layer classes (generally implementing the
abstract base class ``regparser.layer.layer:Layer``).
Expand All @@ -19,6 +19,24 @@ Examples:

* `ATF <https://github.com/18F/atf-eregs/blob/c398e553164cd456d6606a78c7762ad5f9ed665b/eregs_extensions/setup.py#L6-L8>`_

This has been deprecated in favor of layers applicable to specific document
types (see below).


eregs_ns.parser.layer.cfr
=========================

Layer classes (implementing the abstract base class
``regparser.layer.layer:Layer``) which should apply the CFR documents.


eregs_ns.parser.layer.preamble
==============================

Layer classes (implementing the abstract base class
``regparser.layer.layer:Layer``) which should apply the "preamble" documents
(i.e. proposed rules).


eregs_ns.parser.preprocessors
=============================
Expand Down
Empty file added interpparser/__init__.py
Empty file.
8 changes: 3 additions & 5 deletions regparser/layer/interpretations.py → interpparser/layers.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,10 @@ def process(self, node):
return the associated layer information. @TODO: Right now, this only
associates if there is a direct match. It should also associate if any
parents match"""

label = tuple(node.label)
if self.lookup_table[label]: # default dict; will always be present
interp_labels = [n.label_id() for n in self.lookup_table[label]
if not self.empty_interpretation(n)]
return [{'reference': l} for l in interp_labels] or None
non_empty = [n for n in self.lookup_table[label]
if not self.empty_interpretation(n)]
return [{'reference': n.label_id()} for n in non_empty] or None

@staticmethod
def empty_interpretation(interp):
Expand Down
16 changes: 16 additions & 0 deletions interpparser/setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
from setuptools import setup, find_packages


setup(
name='interpparser',
version="0.0.1",
packages=find_packages(),
classifiers=[
'License :: Public Domain',
'License :: CC0 1.0 Universal (CC0 1.0) Public Domain Dedication'
],
entry_points={
'eregs_ns.parser.layer.cfr':
'interpretations = interpparser.layers:Interpretations'
}
)
23 changes: 15 additions & 8 deletions regparser/commands/layers.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging

import click
from stevedore.extension import ExtensionManager

from regparser.commands import utils
from regparser.index import dependency, entry
from regparser.plugins import classes_by_shorthand
import settings


logger = logging.getLogger(__name__)
Expand All @@ -14,12 +13,20 @@
def _init_classes():
"""Avoid leaking state variables by wrapping `LAYER_CLASSES` construction
in a function"""
classes = {doc_type: classes_by_shorthand(class_string_list)
for doc_type, class_string_list in settings.LAYERS.items()}
# Also add in the "ALL" layers
for doc_type in classes:
for layer_name, cls in classes['ALL'].items():
classes[doc_type][layer_name] = cls
classes = {}
for doc_type in ('cfr', 'preamble'): # @todo - make this dynamic
namespace = 'eregs_ns.parser.layer.{}'.format(doc_type)
classes[doc_type] = {
extension.name: extension.plugin
for extension in ExtensionManager(namespace)
}

# For backwards compatibility. @todo - remove in later release
old_namespace = 'eregs_ns.parser.layers'
classes['cfr'].update({
extension.plugin.shorthand: extension.plugin
for extension in ExtensionManager(old_namespace)
})
return classes
LAYER_CLASSES = _init_classes()

Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -23,3 +23,4 @@ setuptools==21.1.0
six==1.10.0
stevedore==1.13.0
-e .
-e interpparser
28 changes: 0 additions & 28 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,34 +120,6 @@
"regparser.tree.xml_parser.preprocessors.ImportCategories",
])

# Which layers are to be generated, keyed by document type. The ALL key is
# special; layers in this category automatically apply to all document types
LAYERS = {
'cfr': [
'regparser.layer.meta.Meta',
'regparser.layer.internal_citations.InternalCitationParser',
'regparser.layer.table_of_contents.TableOfContentsLayer',
'regparser.layer.terms.Terms',
'regparser.layer.paragraph_markers.ParagraphMarkers',
'regparser.layer.key_terms.KeyTerms',
# CFPB specific -- these should be moved to plugins
'regparser.layer.interpretations.Interpretations',
# SectionBySection layer is a created via a separate command
],
'preamble': [
'regparser.layer.preamble.key_terms.KeyTerms',
'regparser.layer.preamble.internal_citations.InternalCitations',
'regparser.layer.preamble.paragraph_markers.ParagraphMarkers'
],
# It probably makes more sense to use plugins.update_dictionary, but we're
# keeping this for backwards compatibility
'ALL': plugins.extend_list('eregs_ns.parser.layers', [
'regparser.layer.external_citations.ExternalCitationParser',
'regparser.layer.formatting.Formatting',
'regparser.layer.graphics.Graphics',
]),
}

# Regulations.gov settings. The demo key is rate limited by IP; sign up for
# your own key at
# http://regulationsgov.github.io/developers/key/
Expand Down
29 changes: 28 additions & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,5 +32,32 @@
"six",
"stevedore"
],
entry_points={"console_scripts": ["eregs=eregs:main"]}
entry_points={
"console_scripts": "eregs=eregs:main",
"eregs_ns.parser.layer.cfr": [
"meta = regparser.layer.meta:Meta",
("internal-citations = regparser.layer.internal_citations:"
"InternalCitationParser"),
"toc = regparser.layer.table_of_contents:TableOfContentsLayer",
"terms = regparser.layer.terms:Terms",
("paragraph-markers = regparser.layer.paragraph_markers:"
"ParagraphMarkers"),
"keyterms = regparser.layer.key_terms:KeyTerms",
("external-citations = regparser.layer.external_citations:"
"ExternalCitationParser"),
"formatting = regparser.layer.formatting:Formatting",
"graphics = regparser.layer.graphics:Graphics",
],
"eregs_ns.parser.layer.preamble": [
"keyterms = regparser.layer.preamble.key_terms:KeyTerms",
("internal-citations = regparser.layer.preamble."
"internal_citations:InternalCitations"),
("paragraph-markers = regparser.layer.preamble.paragraph_markers:"
"ParagraphMarkers"),
("external-citations = regparser.layer.external_citations:"
"ExternalCitationParser"),
"formatting = regparser.layer.formatting:Formatting",
"graphics = regparser.layer.graphics:Graphics",
]
}
)
Empty file added tests/interpparser/__init__.py
Empty file.
128 changes: 128 additions & 0 deletions tests/interpparser/layers_tests.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,128 @@
from regparser.tree.struct import Node
from interpparser.layers import Interpretations


def test_process():
root = Node(children=[
Node("Interp11a",
[Node("child1"), Node("child2")],
['102', '11', 'a', Node.INTERP_MARK],
node_type=Node.INTERP),
Node("Interp11c5v",
label=['102', '11', 'c', '5', 'v', Node.INTERP_MARK],
node_type=Node.INTERP),
Node("InterpB5ii",
label=['102', 'B', '5', 'ii', Node.INTERP_MARK],
node_type=Node.INTERP),
Node(children=[
Node(label=['102'], children=[
Node("Interp9c1",
label=['102', '9', 'c', '1', Node.INTERP_MARK],
node_type=Node.INTERP)
])
])
])

interp = Interpretations(root)
interp.pre_process()
interp11a = interp.process(Node(label=['102', '11', 'a']))
interp11c5v = interp.process(Node(
label=['102', '11', 'c', '5', 'v']
))
interpB5ii = interp.process(Node(label=['102', 'B', '5', 'ii']))
interp9c1 = interp.process(Node(label=['102', '9', 'c', '1']))

assert len(interp11a) == 1
assert len(interp11c5v) == 1
assert len(interpB5ii) == 1
assert len(interp9c1) == 1
assert interp11a[0]['reference'] == '102-11-a-Interp'
assert interp11c5v[0]['reference'] == '102-11-c-5-v-Interp'
assert interpB5ii[0]['reference'] == '102-B-5-ii-Interp'
assert interp9c1[0]['reference'] == '102-9-c-1-Interp'
assert interp.process(Node(label=['102', '10', 'a'])) is None


def test_process_subparagraph_of_referenced_text():
root = Node(label=['100'], children=[
Node("\n\n\n",
node_type=Node.INTERP,
label=['100', '11', 'a', Node.INTERP_MARK],
children=[Node("Interp11a1", node_type=Node.INTERP,
label=['100', '11', 'a', '1', Node.INTERP_MARK])])
])
interp = Interpretations(root)
interp.pre_process()
assert interp.process(Node(label=['100', '11', 'a'])) is None
assert interp.process(Node(label=['100', '11', 'a', '1'])) is not None


def test_process_has_multiple_paragraphs():
root = Node(label=['100'], children=[
Node("\n\n\n",
node_type=Node.INTERP,
label=['100', '11', 'a', Node.INTERP_MARK],
children=[Node("Interp11a-1", node_type=Node.INTERP,
label=['100', '11', 'a', Node.INTERP_MARK, '1'])])
])
interp = Interpretations(root)
interp.pre_process()
assert interp.process(Node(label=['100', '11', 'a'])) is not None


def test_process_applies_to_multiple():
i1a = Node('Text', title='Paragraph 1(a) and 1(b)', node_type=Node.INTERP,
label=['100', '1', 'a', Node.INTERP_MARK])
i1 = Node(label=['100', '1', Node.INTERP_MARK], node_type=Node.INTERP,
children=[i1a])
root = Node(label=['100', Node.INTERP_MARK], node_type=Node.INTERP,
children=[i1])
interp = Interpretations(root)
interp.pre_process()
assert interp.process(Node(label=['100', '1', 'a'])) is not None
assert interp.process(Node(label=['100', '1', 'b'])) is not None


def test_process_regressions():
i1a = Node('Text', title='Paragraph 1(a) and 1(b)',
label=['100', '1', 'a', Node.INTERP_MARK])
interp = Interpretations(i1a)
interp.pre_process()
assert interp.process(Node(label=['100', '1', 'a'])) is None

i1a1 = Node('Text', title='Paragraph 1(a) and 1(b)',
label=['100', '1', 'a', Node.INTERP_MARK, '1'],
node_type=Node.INTERP)
interp = Interpretations(i1a1)
interp.pre_process()
assert interp.process(Node(label=['100', '1', 'a'])) is None


def test_empty_interpretations():
interp = Interpretations(None)
assert interp.empty_interpretation(Node('\n\n'))
assert interp.empty_interpretation(Node('', [Node('Subpar')]))
assert not interp.empty_interpretation(Node('Content'))
assert not interp.empty_interpretation(
Node('', [Node('Something', label=['1', Node.INTERP_MARK, '3'])]))


def test_pre_process_multiple_interps():
interpG = Node('GGGG', title='Appendix G',
label=['1111', 'G', 'Interp'], node_type=Node.INTERP)
interpH = Node('HHHH', title='Appendix H',
label=['1111', 'H', 'Interp'], node_type=Node.INTERP)
interpGH = Node('GHGHGH', title='Appendices G and H',
label=['1111', 'G_H', 'Interp'],
node_type=Node.INTERP)

tree = Node(label=['1111'], children=[
Node(label=['1111', 'Interp'], node_type=Node.INTERP, children=[
interpGH, interpG, interpH])])

interp = Interpretations(tree)
interp.pre_process()

node = Node('App G', label=['1111', 'G'], node_type=Node.APPENDIX)
assert interp.process(node) == [{'reference': '1111-G_H-Interp'},
{'reference': '1111-G-Interp'}]
Loading

0 comments on commit 2ab5bdd

Please sign in to comment.