Skip to content

Commit

Permalink
Move existing preprocessors to entry_points
Browse files Browse the repository at this point in the history
This also allows us to remove some logic from regparser.plugins
  • Loading branch information
cmc333333 committed Dec 27, 2016
1 parent 2ab5bdd commit cb87c5e
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 83 deletions.
36 changes: 1 addition & 35 deletions regparser/plugins.py
Original file line number Diff line number Diff line change
@@ -1,23 +1,9 @@
from collections import OrderedDict, defaultdict

from importlib import import_module
from collections import defaultdict

from stevedore import extension
from stevedore.exception import NoMatches


def extend_list(namespace, original_list):
"""Use the plugin manager to tack on extra entries to a list of strings"""
try:
results = list(original_list) # shallow copy
mgr = extension.ExtensionManager(namespace=namespace,
invoke_on_load=False)
mgr.map(lambda ext: results.append(ext.entry_point_target))
return results
except NoMatches:
return original_list


def update_dictionary(namespace, original):
"""
Use the extension manager to update a dictionary.
Expand All @@ -39,23 +25,3 @@ def handle_plugin(ext):
return dict(original)
except NoMatches:
return dict(original)


def class_paths_to_classes(class_paths):
"""We use an idiom of a list of class paths relatively often. This
function will convert that list into the appropriate classes"""
results = []
for class_path in class_paths:
split_char = ':' if ':' in class_path else '.'
mod_string, class_name = class_path.rsplit(split_char, 1)
mod = import_module(mod_string)
results.append(getattr(mod, class_name))
return results


def classes_by_shorthand(class_paths):
"""We often give our plugin entities specific names, indicated by their
"shorthand" field. This creates an (ordered) dictionary, mapping the
constructed classes by their shorthand name"""
return OrderedDict([(cls.shorthand, cls)
for cls in class_paths_to_classes(class_paths)])
8 changes: 4 additions & 4 deletions regparser/tree/xml_parser/xml_wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

from lxml import etree
import six
from stevedore.extension import ExtensionManager

from regparser.plugins import class_paths_to_classes
from regparser.tree.xml_parser.preprocessors import replace_html_entities
import settings


class XMLWrapper(object):
Expand All @@ -29,8 +28,9 @@ def preprocess(self):
attempts to fix some of those (general) flaws. For specific issues, we
tend to instead use the files in settings.LOCAL_XML_PATHS"""

for preprocessor in class_paths_to_classes(settings.PREPROCESSORS):
preprocessor().transform(self.xml)
for extension in ExtensionManager('eregs_ns.parser.preprocessors',
invoke_on_load=True):
extension.plugin.transform(self.xml)

return self

Expand Down
14 changes: 0 additions & 14 deletions settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,20 +106,6 @@
"ATF I 5300.1": "https://atf-eregs.apps.cloud.gov/static/atf_eregs/5300_1.pdf",
"ATF I 5300.2": "https://www.atf.gov/file/58806/download"}

PREPROCESSORS = plugins.extend_list('eregs_ns.parser.preprocessors', [
"regparser.tree.xml_parser.preprocessors.MoveLastAMDPar",
"regparser.tree.xml_parser.preprocessors.SupplementAMDPar",
"regparser.tree.xml_parser.preprocessors.ParenthesesCleanup",
"regparser.tree.xml_parser.preprocessors.MoveAdjoiningChars",
"regparser.tree.xml_parser.preprocessors.ApprovalsFP",
"regparser.tree.xml_parser.preprocessors.ExtractTags",
"regparser.tree.xml_parser.preprocessors.Footnotes",
"regparser.tree.xml_parser.preprocessors.ParseAMDPARs",
"regparser.tree.xml_parser.preprocessors.AtfI50032",
"regparser.tree.xml_parser.preprocessors.AtfI50031",
"regparser.tree.xml_parser.preprocessors.ImportCategories",
])

# Regulations.gov settings. The demo key is rate limited by IP; sign up for
# your own key at
# http://regulationsgov.github.io/developers/key/
Expand Down
21 changes: 21 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,27 @@
"ExternalCitationParser"),
"formatting = regparser.layer.formatting:Formatting",
"graphics = regparser.layer.graphics:Graphics",
],
"eregs_ns.parser.preprocessors": [
("move-last-amdpar = regparser.tree.xml_parser.preprocessors:"
"MoveLastAMDPar"),
("supplement-amd-par = regparser.tree.xml_parser.preprocessors:"
"SupplementAMDPar"),
("parenthesis-cleanup = regparser.tree.xml_parser.preprocessors:"
"ParenthesesCleanup"),
("move-adjoining-chars = regparser.tree.xml_parser.preprocessors:"
"MoveAdjoiningChars"),
("approvals-fp = regparser.tree.xml_parser.preprocessors:"
"ApprovalsFP"),
("extract-tags = regparser.tree.xml_parser.preprocessors:"
"ExtractTags"),
"footnotes = regparser.tree.xml_parser.preprocessors:Footnotes",
("parse-amdpars = regparser.tree.xml_parser.preprocessors:"
"ParseAMDPARs"),
"atf-i-50032 = regparser.tree.xml_parser.preprocessors:AtfI50032",
"atf-i-50031 = regparser.tree.xml_parser.preprocessors:AtfI50031",
("atf-import-categories = regparser.tree.xml_parser.preprocessors:"
"ImportCategories"),
]
}
)
30 changes: 0 additions & 30 deletions tests/plugins_tests.py

This file was deleted.

0 comments on commit cb87c5e

Please sign in to comment.