diff --git a/README.md b/README.md index d1989e9d..1261b142 100644 --- a/README.md +++ b/README.md @@ -42,6 +42,8 @@ This repository is part of a larger project. To read about it, please see * pyparsing (1.5.7) - Used to do generic parsing on the plain text * inflection (0.1.2) - Helps determine pluralization (for terms layer) * requests (1.2.3) - Client library for writing output to an API +* requests_cache (0.4.4) - *Optional* - Library for caching request results + (speeds up rebuilding regulations) If running tests: diff --git a/build_from.py b/build_from.py index 50f654b9..585550c9 100644 --- a/build_from.py +++ b/build_from.py @@ -2,13 +2,23 @@ import logging import sys +try: + import requests_cache + requests_cache.install_cache('fr_cache') +except ImportError: + # If the cache library isn't present, do nothing -- we'll just make full + # HTTP requests rather than looking it up from the cache + pass + from regparser.diff import treediff from regparser.builder import Builder + logger = logging.getLogger('build_from') logger.setLevel(logging.INFO) logger.addHandler(logging.StreamHandler()) + if __name__ == "__main__": if len(sys.argv) < 6: print("Usage: python build_from.py regulation.xml title " @@ -43,12 +53,13 @@ builder.gen_and_write_layers(reg_tree, sys.argv[4:6]) if len(sys.argv) < 7 or sys.argv[6].lower() == 'true': all_versions = {doc_number: reg_tree} - for version, old, new_tree in builder.revision_generator(reg_tree): + for version, old, new_tree, notices in builder.revision_generator( + reg_tree): logger.info("Version %s", version) all_versions[version] = new_tree builder.doc_number = version builder.write_regulation(new_tree) - builder.gen_and_write_layers(new_tree, sys.argv[4:6]) + builder.gen_and_write_layers(new_tree, sys.argv[4:6], notices) # now build diffs - include "empty" diffs comparing a version to itself for lhs_version, lhs_tree in all_versions.iteritems(): diff --git a/regparser/builder.py b/regparser/builder.py index 46ce2741..0f09e32c 100644 --- a/regparser/builder.py +++ b/regparser/builder.py @@ -24,7 +24,8 @@ def __init__(self, cfr_title, cfr_part, doc_number): self.doc_number = doc_number self.writer = api_writer.Client() - self.notices = fetch_notices(self.cfr_title, self.cfr_part) + self.notices = fetch_notices(self.cfr_title, self.cfr_part, + only_final=True) modify_effective_dates(self.notices) # Only care about final self.notices = [n for n in self.notices if 'effective_on' in n] @@ -39,7 +40,9 @@ def write_notices(self): def write_regulation(self, reg_tree): self.writer.regulation(self.cfr_part, self.doc_number).write(reg_tree) - def gen_and_write_layers(self, reg_tree, act_info): + def gen_and_write_layers(self, reg_tree, act_info, notices=None): + if notices is None: + notices = applicable_notices(self.notices, self.doc_number) for ident, layer_class in ( ('external-citations', external_citations.ExternalCitationParser), @@ -55,7 +58,7 @@ def gen_and_write_layers(self, reg_tree, act_info): ('formatting', formatting.Formatting), ('graphics', graphics.Graphics)): layer = layer_class(reg_tree, self.cfr_title, self.doc_number, - self.notices, act_info).build() + notices, act_info).build() self.writer.layer(ident, self.cfr_part, self.doc_number).write( layer) @@ -65,13 +68,13 @@ def revision_generator(self, reg_tree): relevant_notices.extend( n for n in self.eff_notices[date] if 'changes' in n and n['document_number'] != self.doc_number) - for notice in relevant_notices: version = notice['document_number'] old_tree = reg_tree merged_changes = self.merge_changes(version, notice['changes']) reg_tree = compile_regulation(old_tree, merged_changes) - yield version, old_tree, reg_tree + notices = applicable_notices(self.notices, version) + yield version, old_tree, reg_tree, notices def merge_changes(self, document_number, changes): patches = content.RegPatches().get(document_number) diff --git a/regparser/federalregister.py b/regparser/federalregister.py index 990a2422..b6742469 100644 --- a/regparser/federalregister.py +++ b/regparser/federalregister.py @@ -32,9 +32,9 @@ def fetch_notice_json(cfr_title, cfr_part, only_final=False): return [] -def fetch_notices(cfr_title, cfr_part): +def fetch_notices(cfr_title, cfr_part, only_final=False): """Search and then convert to notice objects (including parsing)""" notices = [] - for result in fetch_notice_json(cfr_title, cfr_part): + for result in fetch_notice_json(cfr_title, cfr_part, only_final): notices.append(build_notice(cfr_title, cfr_part, result)) return notices diff --git a/tests/builder_tests.py b/tests/builder_tests.py new file mode 100644 index 00000000..c2bd5589 --- /dev/null +++ b/tests/builder_tests.py @@ -0,0 +1,40 @@ +from unittest import TestCase + +from mock import patch + +from regparser.builder import Builder + + +class BuilderTests(TestCase): + @patch.object(Builder, 'merge_changes') + @patch.object(Builder, '__init__') + def test_revision_generator_notices(self, init, merge_changes): + init.return_value = None + merge_changes = [] + b = Builder() # Don't need parameters as init's been mocked out + aaaa = {'document_number': 'aaaa', 'effective_on': '2012-12-12', + 'publication_date': '2011-11-11', 'changes': []} + bbbb = {'document_number': 'bbbb', 'effective_on': '2012-12-12', + 'publication_date': '2011-11-12', 'changes': []} + cccc = {'document_number': 'cccc', 'effective_on': '2013-01-01', + 'publication_date': '2012-01-01', 'changes': []} + b.notices = [aaaa, bbbb, cccc] + b.eff_notices = {'2012-12-12': [aaaa, bbbb], '2013-01-01': [cccc]} + b.doc_number = 'aaaa' + tree = {} + version_list = [] + notice_lists = [] + for version, _, _, notices in b.revision_generator(tree): + version_list.append(version) + notice_lists.append(notices) + self.assertEqual(['bbbb', 'cccc'], version_list) + self.assertEqual(2, len(notice_lists)) + + self.assertEqual(2, len(notice_lists[0])) + self.assertTrue(aaaa in notice_lists[0]) + self.assertTrue(bbbb in notice_lists[0]) + + self.assertEqual(3, len(notice_lists[1])) + self.assertTrue(aaaa in notice_lists[1]) + self.assertTrue(bbbb in notice_lists[1]) + self.assertTrue(cccc in notice_lists[1])