Skip to content

Commit

Permalink
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
wip: date changes
Browse files Browse the repository at this point in the history
dragon-dxw committed Mar 22, 2024
1 parent de5ebf2 commit 1b7ff77
Showing 2 changed files with 98 additions and 6 deletions.
36 changes: 33 additions & 3 deletions src/caselawclient/models/documents.py
Original file line number Diff line number Diff line change
@@ -33,6 +33,7 @@
)

MINIMUM_ENRICHMENT_TIME = datetime.timedelta(minutes=20)
MINIMUM_REPARSE_TIME = datetime.timedelta(minutes=20)


class UnparsableDate(Warning):
@@ -285,6 +286,24 @@ def enrichment_datetime(self) -> Optional[datetime.datetime]:
"""When was this document successfully enriched (date from XML)"""
return self.get_latest_manifestation_datetime("tna-enriched")

@cached_property
def request_reparse_datetime(self) -> Optional[datetime.datetime]:
"""When was a reparse request last sent to TDR?"""
date_as_string = self.api_client.get_property(self.uri, "last_sent_to_parser")
try:
return datetime.datetime.fromisoformat(date_as_string)
except ValueError:
return None

@cached_property
def request_enrich_datetime(self) -> Optional[datetime.datetime]:
"""When was a reparse request last sent to TDR?"""
date_as_string = self.api_client.get_property(self.uri, "last_sent_to_parser")
try:
return datetime.datetime.fromisoformat(date_as_string)
except ValueError:
return None

@cached_property
def is_published(self) -> bool:
return self.api_client.get_published(self.uri)
@@ -525,9 +544,9 @@ def can_enrich(self) -> bool:
@cached_property
def enriched_recently(self) -> bool:
"""
Has this document been enriched recently?
Has this document been sent for enrichment recently?
"""
last_enrichment = self.enrichment_datetime
last_enrichment = self.request_enrich_datetime
now = datetime.datetime.now(tz=datetime.timezone.utc)
if last_enrichment and now - last_enrichment < MINIMUM_ENRICHMENT_TIME:
return True
@@ -645,7 +664,18 @@ def can_reparse(self) -> bool:
"""
Is it sensible to reparse this document?
"""
if self.docx_exists():
if (self.reparsed_recently is False) and self.docx_exists():
return True
return False

@cached_property
def reparsed_recently(self) -> bool:
"""
Has this document been sent to reparsing recently?
"""
last_reparse = self.request_reparse_datetime
now = datetime.datetime.now(tz=datetime.timezone.utc)
if last_reparse and now - last_reparse < MINIMUM_REPARSE_TIME:
return True
return False

68 changes: 65 additions & 3 deletions tests/models/test_documents.py
Original file line number Diff line number Diff line change
@@ -427,7 +427,7 @@ def test_unpublish(
)


class TestDocumentEnrichedRecently:
class TestDocumentEnrichedOrReparsedRecently:
def test_enriched_recently_returns_false_when_never_enriched(self, mock_api_client):
document = Document("test/1234", mock_api_client)
mock_api_client.get_property.return_value = ""
@@ -436,20 +436,42 @@ def test_enriched_recently_returns_false_when_never_enriched(self, mock_api_clie

def test_enriched_recently_returns_true_within_cooldown(self, mock_api_client):
document = Document("test/1234", mock_api_client)
document.enrichment_datetime = datetime.datetime.now(
document.request_enrich_datetime = datetime.datetime.now(
tz=datetime.timezone.utc
) - datetime.timedelta(seconds=30)

assert document.enriched_recently is True

def test_enriched_recently_returns_false_outside_cooldown(self, mock_api_client):
document = Document("test/1234", mock_api_client)
document.enrichment_datetime = datetime.datetime.now(
document.request_enrich_datetime = datetime.datetime.now(
tz=datetime.timezone.utc
) - datetime.timedelta(days=2)

assert document.enriched_recently is False

def test_reparsed_recently_returns_false_when_never_enriched(self, mock_api_client):
document = Document("test/1234", mock_api_client)
mock_api_client.get_property.return_value = ""

assert document.reparsed_recently is False

def test_reparsed_recently_returns_true_within_cooldown(self, mock_api_client):
document = Document("test/1234", mock_api_client)
document.request_reparse_datetime = datetime.datetime.now(
tz=datetime.timezone.utc
) - datetime.timedelta(seconds=30)

assert document.reparsed_recently is True

def test_reparsed_recently_returns_false_outside_cooldown(self, mock_api_client):
document = Document("test/1234", mock_api_client)
document.request_reparse_datetime = datetime.datetime.now(
tz=datetime.timezone.utc
) - datetime.timedelta(days=2)

assert document.reparsed_recently is False


class TestCanEnrich:
@pytest.mark.parametrize(
@@ -493,6 +515,46 @@ def test_returns_true_when_enriched_recently_is_true_and_validates_against_schem
assert document.can_enrich is can_enrich


class TestCanReparse:
@pytest.mark.parametrize(
"reparsed_recently, docx_exists, can_reparse",
[
(
True,
True,
False,
), # Reparsed recently and docx exists - Can't reparse
(
True,
False,
False,
), # Reparsed recently and no docx- Can't reparse
(
False,
False,
False,
), # Not reparsed recently and no docx - Can't reparse
(
False,
True,
True,
), # Not reparsed recently and docx exists - Can reparse
],
)
def test_can_reparse_logic(
self, mock_api_client, reparsed_recently, docx_exists, can_reparse
):
document = Document("test/1234", mock_api_client)
with patch.object(
Document, "reparsed_recently", new_callable=PropertyMock
) as mock_reparsed_recently:
with patch.object(Document, "docx_exists") as mock_docx_exists:
mock_reparsed_recently.return_value = reparsed_recently
mock_docx_exists.return_value = docx_exists

assert document.can_reparse is can_reparse


class TestDocumentEnrich:
@time_machine.travel(datetime.datetime(1955, 11, 5, 6))
@patch("caselawclient.models.documents.announce_document_event")

0 comments on commit 1b7ff77

Please sign in to comment.