From e31d9f39906d9e078b30d102cad149ae7e688b2e Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Mon, 27 Jan 2025 10:47:11 -0400 Subject: [PATCH 1/7] feat(lib): Enhances get_parties_from_case_name method This commit enhances the get_parties_from_case_name method by removing common strings from bankruptcy case names before extracting party information. This improves the accuracy of party identification. - Adds a new separator character to the list of valid separators for identifying parties in bankruptcy cases. --- cl/lib/search_index_utils.py | 18 +++++--- cl/lib/tests.py | 84 ++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+), 7 deletions(-) diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py index 7d6d7feb5b..abee6abc16 100644 --- a/cl/lib/search_index_utils.py +++ b/cl/lib/search_index_utils.py @@ -1,3 +1,4 @@ +import re from datetime import date from cl.lib.date_time import midnight_pt @@ -49,14 +50,17 @@ def get_parties_from_case_name(case_name: str) -> list[str]: :return: A list of parties. If no valid separator is found, returns an empty list. """ + # Removes text enclosed in parentheses at the end of the string. + cleaned_case_name = re.sub(r"\s*\([^)]*\)$", "", case_name) - valid_case_name_separators = [ - " v ", - " v. ", - " vs. ", - " vs ", - ] + # Removes any HTML at the end of the string. + cleaned_case_name = re.sub(r"\s*<.*$", "", cleaned_case_name) + + # Removes text following "-BELOW" or "-ABOVE" at the end of the string. + cleaned_case_name = re.sub(r"\s*(-BELOW|-ABOVE).*$", "", cleaned_case_name) + + valid_case_name_separators = [" v ", " v. ", " vs. ", " vs ", " and "] for separator in valid_case_name_separators: if separator in case_name: - return case_name.split(separator, 1) + return cleaned_case_name.split(separator, 1) return [] diff --git a/cl/lib/tests.py b/cl/lib/tests.py index ce1aa40741..7d36ecdc74 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -39,6 +39,7 @@ get_redis_interface, release_redis_lock, ) +from cl.lib.search_index_utils import get_parties_from_case_name from cl.lib.string_utils import normalize_dashes, trunc from cl.lib.utils import ( check_for_proximity_tokens, @@ -1203,6 +1204,89 @@ def test_check_and_sanitize_queries_bad_syntax(self) -> None: ) self.assertEqual(output, test["sanitized"]) + def test_can_get_parties_from_case_name(self) -> None: + class PartiesNameTestType(TypedDict): + case_name: str + output: list[str] + + tests: list[PartiesNameTestType] = [ + { + "case_name": "Mendelsohn. Singh", + "output": [], + }, + { + "case_name": "Cadle Co. v Matos", + "output": ["Cadle Co.", "Matos"], + }, + { + "case_name": "Cadle Co. v Matos", + "output": ["Cadle Co.", "Matos"], + }, + { + "case_name": "Cadle Co. v. Matos", + "output": ["Cadle Co.", "Matos"], + }, + { + "case_name": "Cadle Co. vs Matos", + "output": ["Cadle Co.", "Matos"], + }, + { + "case_name": "Cadle Co. vs. Matos", + "output": ["Cadle Co.", "Matos"], + }, + { + "case_name": "Paul Thomas Presbury, Jr. and Lisa Rae Presbury", + "output": ["Paul Thomas Presbury, Jr.", "Lisa Rae Presbury"], + }, + { + "case_name": "Ma Margarita Bernal Sosa -ABOVE MED", + "output": [], + }, + { + "case_name": "Jennifer Renee' Abbott and Quentin Andrew Abbott -ABOVE MED", + "output": ["Jennifer Renee' Abbott", "Quentin Andrew Abbott"], + }, + { + "case_name": "Aiesha Renee -BELOW MED", + "output": [], + }, + { + "case_name": "Justin Kaiser and Belinda Kaiser -BELOW MED", + "output": ["Justin Kaiser", "Belinda Kaiser"], + }, + { + "case_name": "Cosmorex Ltd. (in Liquidation)", + "output": [], + }, + { + "case_name": "Cowen & Co. v. Zagar (In re Zagar)", + "output": ["Cowen & Co.", "Zagar"], + }, + { + "case_name": 'Advantage LLC Jointly Administered under 23-90886.', + "output": [], + }, + { + "case_name": 'Sather v. CarlsonDO NOT DOCKET. CASE TRANSFERRED OUT.', + "output": ["Sather", "Carlson"], + }, + { + "case_name": 'Saucedo and Green Dream International, LLC Case Consolidated under 23-03142 ', + "output": ["Saucedo", "Green Dream International, LLC"], + }, + ] + for test in tests: + with self.subTest( + input=test["case_name"], msg="get parties names from case name" + ): + parties: list[str] = get_parties_from_case_name( + test["case_name"] + ) + self.assertEqual( + parties, + test["output"], + ) + class TestRedisUtils(SimpleTestCase): """Test Redis utils functions.""" From ca278cdb85dad0ef68fe94a871c51363aa0394c2 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 31 Jan 2025 11:52:19 -0400 Subject: [PATCH 2/7] feat(lib): Add dedicated helper function to extract parties from bankruptcy case name --- cl/lib/search_index_utils.py | 31 ++++++++++++++++++++++++++++--- cl/lib/tests.py | 16 ++++++++-------- cl/search/documents.py | 12 +++++++++--- cl/search/tasks.py | 15 ++++++++++++--- cl/search/tests/tests_es_recap.py | 11 +++++++---- 5 files changed, 64 insertions(+), 21 deletions(-) diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py index abee6abc16..16e99cee2b 100644 --- a/cl/lib/search_index_utils.py +++ b/cl/lib/search_index_utils.py @@ -41,6 +41,8 @@ def __init__(self, message): list(range(0, 10)) + list(range(11, 13)) + list(range(14, 32)) ) +VALID_CASE_NAME_SEPARATORS = [" v ", " v. ", " vs. ", " vs "] + def get_parties_from_case_name(case_name: str) -> list[str]: """Extracts the parties from case_name by splitting on common case_name @@ -50,6 +52,28 @@ def get_parties_from_case_name(case_name: str) -> list[str]: :return: A list of parties. If no valid separator is found, returns an empty list. """ + for separator in VALID_CASE_NAME_SEPARATORS: + if separator in case_name: + return case_name.split(separator, 1) + return [] + + +def get_parties_from_bankruptcy_case_name(case_name: str) -> list[str]: + """Extracts the parties involved in a bankruptcy case from the case name. + + This function attempts to identify the parties by splitting the case name + string based on common separators. It also performs some cleanup to + remove extraneous information like court designations in parentheses, + trailing HTML, and text related to "BELOW" or "ABOVE" designations. + + Args: + case_name: The bankruptcy case name string. + + Returns: + A list of strings, where each string represents a party involved + in the case. If no recognized separator is found, the function + returns a list containing the cleaned case name as a single element. + """ # Removes text enclosed in parentheses at the end of the string. cleaned_case_name = re.sub(r"\s*\([^)]*\)$", "", case_name) @@ -59,8 +83,9 @@ def get_parties_from_case_name(case_name: str) -> list[str]: # Removes text following "-BELOW" or "-ABOVE" at the end of the string. cleaned_case_name = re.sub(r"\s*(-BELOW|-ABOVE).*$", "", cleaned_case_name) - valid_case_name_separators = [" v ", " v. ", " vs. ", " vs ", " and "] - for separator in valid_case_name_separators: + case_name_separators = VALID_CASE_NAME_SEPARATORS.copy() + case_name_separators.append(" and ") + for separator in case_name_separators: if separator in case_name: return cleaned_case_name.split(separator, 1) - return [] + return [cleaned_case_name] diff --git a/cl/lib/tests.py b/cl/lib/tests.py index 7d36ecdc74..e83e7ce376 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -39,7 +39,7 @@ get_redis_interface, release_redis_lock, ) -from cl.lib.search_index_utils import get_parties_from_case_name +from cl.lib.search_index_utils import get_parties_from_bankruptcy_case_name from cl.lib.string_utils import normalize_dashes, trunc from cl.lib.utils import ( check_for_proximity_tokens, @@ -1204,7 +1204,7 @@ def test_check_and_sanitize_queries_bad_syntax(self) -> None: ) self.assertEqual(output, test["sanitized"]) - def test_can_get_parties_from_case_name(self) -> None: + def test_can_get_parties_from_bankruptcy_case_name(self) -> None: class PartiesNameTestType(TypedDict): case_name: str output: list[str] @@ -1212,7 +1212,7 @@ class PartiesNameTestType(TypedDict): tests: list[PartiesNameTestType] = [ { "case_name": "Mendelsohn. Singh", - "output": [], + "output": ["Mendelsohn. Singh"], }, { "case_name": "Cadle Co. v Matos", @@ -1240,7 +1240,7 @@ class PartiesNameTestType(TypedDict): }, { "case_name": "Ma Margarita Bernal Sosa -ABOVE MED", - "output": [], + "output": ["Ma Margarita Bernal Sosa"], }, { "case_name": "Jennifer Renee' Abbott and Quentin Andrew Abbott -ABOVE MED", @@ -1248,7 +1248,7 @@ class PartiesNameTestType(TypedDict): }, { "case_name": "Aiesha Renee -BELOW MED", - "output": [], + "output": ["Aiesha Renee"], }, { "case_name": "Justin Kaiser and Belinda Kaiser -BELOW MED", @@ -1256,7 +1256,7 @@ class PartiesNameTestType(TypedDict): }, { "case_name": "Cosmorex Ltd. (in Liquidation)", - "output": [], + "output": ["Cosmorex Ltd."], }, { "case_name": "Cowen & Co. v. Zagar (In re Zagar)", @@ -1264,7 +1264,7 @@ class PartiesNameTestType(TypedDict): }, { "case_name": 'Advantage LLC Jointly Administered under 23-90886.', - "output": [], + "output": ["Advantage LLC"], }, { "case_name": 'Sather v. CarlsonDO NOT DOCKET. CASE TRANSFERRED OUT.', @@ -1279,7 +1279,7 @@ class PartiesNameTestType(TypedDict): with self.subTest( input=test["case_name"], msg="get parties names from case name" ): - parties: list[str] = get_parties_from_case_name( + parties: list[str] = get_parties_from_bankruptcy_case_name( test["case_name"] ) self.assertEqual( diff --git a/cl/search/documents.py b/cl/search/documents.py index fcae4baaf6..ff7f1e3d92 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -15,7 +15,11 @@ from cl.lib.command_utils import logger from cl.lib.elasticsearch_utils import build_es_base_query from cl.lib.fields import JoinField, PercolatorField -from cl.lib.search_index_utils import get_parties_from_case_name, null_map +from cl.lib.search_index_utils import ( + get_parties_from_bankruptcy_case_name, + get_parties_from_case_name, + null_map, +) from cl.lib.utils import deepgetattr from cl.people_db.models import ( Attorney, @@ -1247,8 +1251,10 @@ def prepare_parties(self, instance): if not out["party"]: # Get party from docket case_name if no normalized parties are # available. - party_from_case_name = get_parties_from_case_name( - instance.case_name + party_from_case_name = ( + get_parties_from_bankruptcy_case_name(instance.case_name) + if instance.court_id.endswith("b") + else get_parties_from_case_name(instance.case_name) ) out["party"] = party_from_case_name if party_from_case_name else [] diff --git a/cl/search/tasks.py b/cl/search/tasks.py index 602ac95700..b02ee4cdeb 100644 --- a/cl/search/tasks.py +++ b/cl/search/tasks.py @@ -33,7 +33,10 @@ from cl.audio.models import Audio from cl.celery_init import app from cl.lib.elasticsearch_utils import build_daterange_query -from cl.lib.search_index_utils import get_parties_from_case_name +from cl.lib.search_index_utils import ( + get_parties_from_bankruptcy_case_name, + get_parties_from_case_name, +) from cl.people_db.models import Person, Position from cl.search.documents import ( ES_CHILD_ID, @@ -316,8 +319,14 @@ def document_fields_to_update( # parties are available. if main_instance.parties.exists(): continue - field_value = get_parties_from_case_name( - main_instance.case_name + field_value = ( + get_parties_from_bankruptcy_case_name( + main_instance.case_name + ) + if main_instance.court_id.endswith("b") + else get_parties_from_case_name( + main_instance.case_name + ) ) else: field_value = getattr(related_instance, field) diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py index 933913ea7e..ef4023a436 100644 --- a/cl/search/tests/tests_es_recap.py +++ b/cl/search/tests/tests_es_recap.py @@ -5399,7 +5399,9 @@ def test_verify_empty_lists_type_fields_after_partial_update(self): """ with self.captureOnCommitCallbacks(execute=True) as callbacks: d = DocketFactory( - case_name="Lorem Ipsum", court=self.court, source=Docket.RECAP + case_name="Lorem Ipsum", + court=self.court_2, + source=Docket.RECAP, ) firm = AttorneyOrganizationFactory( lookup_key="00kingofprussiaroadradnorkesslertopazmeltze87437", @@ -7376,10 +7378,11 @@ def test_index_party_from_case_name_when_parties_are_not_available( docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk) self.assertEqual(docket_doc_no_parties.party, ["America", "Smith"]) - # Test that parties are not extracted from the case_name if it does not contain - # a valid separator. + # Test that parties are not extracted from the case_name if the case + # originates from a district court and lacks a valid separator. + court_2 = CourtFactory(id="akd", jurisdiction="FD") docket_with_no_parties_no_separator = DocketFactory( - court=self.court, + court=court_2, case_name="In re: Bank Smith", docket_number="1:21-bk-4446", source=Docket.RECAP, From ba2735b272cea95d24f7ee6b91c226e8c3a70fe1 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Fri, 31 Jan 2025 15:53:59 -0400 Subject: [PATCH 3/7] refactor(lib): Rename helper function to extract parties from case name --- cl/lib/search_index_utils.py | 2 +- cl/lib/tests.py | 4 ++-- cl/search/documents.py | 4 ++-- cl/search/tasks.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py index 16e99cee2b..d652f13569 100644 --- a/cl/lib/search_index_utils.py +++ b/cl/lib/search_index_utils.py @@ -58,7 +58,7 @@ def get_parties_from_case_name(case_name: str) -> list[str]: return [] -def get_parties_from_bankruptcy_case_name(case_name: str) -> list[str]: +def get_parties_from_case_name_bankr(case_name: str) -> list[str]: """Extracts the parties involved in a bankruptcy case from the case name. This function attempts to identify the parties by splitting the case name diff --git a/cl/lib/tests.py b/cl/lib/tests.py index e83e7ce376..8ad04bcbd0 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -39,7 +39,7 @@ get_redis_interface, release_redis_lock, ) -from cl.lib.search_index_utils import get_parties_from_bankruptcy_case_name +from cl.lib.search_index_utils import get_parties_from_case_name_bankr from cl.lib.string_utils import normalize_dashes, trunc from cl.lib.utils import ( check_for_proximity_tokens, @@ -1279,7 +1279,7 @@ class PartiesNameTestType(TypedDict): with self.subTest( input=test["case_name"], msg="get parties names from case name" ): - parties: list[str] = get_parties_from_bankruptcy_case_name( + parties: list[str] = get_parties_from_case_name_bankr( test["case_name"] ) self.assertEqual( diff --git a/cl/search/documents.py b/cl/search/documents.py index ff7f1e3d92..847bd12423 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -16,8 +16,8 @@ from cl.lib.elasticsearch_utils import build_es_base_query from cl.lib.fields import JoinField, PercolatorField from cl.lib.search_index_utils import ( - get_parties_from_bankruptcy_case_name, get_parties_from_case_name, + get_parties_from_case_name_bankr, null_map, ) from cl.lib.utils import deepgetattr @@ -1252,7 +1252,7 @@ def prepare_parties(self, instance): # Get party from docket case_name if no normalized parties are # available. party_from_case_name = ( - get_parties_from_bankruptcy_case_name(instance.case_name) + get_parties_from_case_name_bankr(instance.case_name) if instance.court_id.endswith("b") else get_parties_from_case_name(instance.case_name) ) diff --git a/cl/search/tasks.py b/cl/search/tasks.py index b02ee4cdeb..533b9ff1f6 100644 --- a/cl/search/tasks.py +++ b/cl/search/tasks.py @@ -34,8 +34,8 @@ from cl.celery_init import app from cl.lib.elasticsearch_utils import build_daterange_query from cl.lib.search_index_utils import ( - get_parties_from_bankruptcy_case_name, get_parties_from_case_name, + get_parties_from_case_name_bankr, ) from cl.people_db.models import Person, Position from cl.search.documents import ( @@ -320,7 +320,7 @@ def document_fields_to_update( if main_instance.parties.exists(): continue field_value = ( - get_parties_from_bankruptcy_case_name( + get_parties_from_case_name_bankr( main_instance.case_name ) if main_instance.court_id.endswith("b") From 7442b1e9d961c1a45ad5f76c35d16acab7437b53 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 4 Feb 2025 14:01:09 -0400 Subject: [PATCH 4/7] feat(court): Adds is_bankruptcy_court helper function This commit introduces a new helper function, `is_bankruptcy_court`, which checks if a given court ID corresponds to a bankruptcy court. --- cl/corpus_importer/utils.py | 17 +++++++++++++++++ cl/search/documents.py | 3 ++- cl/search/tasks.py | 3 ++- 3 files changed, 21 insertions(+), 2 deletions(-) diff --git a/cl/corpus_importer/utils.py b/cl/corpus_importer/utils.py index 966a98c7bc..5abb58d9e1 100644 --- a/cl/corpus_importer/utils.py +++ b/cl/corpus_importer/utils.py @@ -107,6 +107,23 @@ async def mark_ia_upload_needed(d: Docket, save_docket: bool) -> None: await d.asave() +def is_bankruptcy_court(court_id: str) -> bool: + """Checks if a given court ID corresponds to a bankruptcy court. + + This function queries the database to determine if the provided court + ID is associated with a federal bankruptcy court. + + Args: + court_id: The ID of the court to check (string). + + Returns: + True if the court ID corresponds to a bankruptcy court, False otherwise + (boolean). + """ + bankr_court_ids = Court.federal_courts.bankruptcy_pacer_courts() + return bankr_court_ids.filter(pk=court_id).exists() + + def is_appellate_court(court_id: str) -> bool: """Checks if the given court_id belongs to an appellate court. diff --git a/cl/search/documents.py b/cl/search/documents.py index 847bd12423..8ff1f85712 100644 --- a/cl/search/documents.py +++ b/cl/search/documents.py @@ -8,6 +8,7 @@ from cl.alerts.models import Alert from cl.audio.models import Audio +from cl.corpus_importer.utils import is_bankruptcy_court from cl.custom_filters.templatetags.text_filters import ( best_case_name, html_decode, @@ -1253,7 +1254,7 @@ def prepare_parties(self, instance): # available. party_from_case_name = ( get_parties_from_case_name_bankr(instance.case_name) - if instance.court_id.endswith("b") + if is_bankruptcy_court(instance.court_id) else get_parties_from_case_name(instance.case_name) ) out["party"] = party_from_case_name if party_from_case_name else [] diff --git a/cl/search/tasks.py b/cl/search/tasks.py index 533b9ff1f6..189e41fbf7 100644 --- a/cl/search/tasks.py +++ b/cl/search/tasks.py @@ -32,6 +32,7 @@ ) from cl.audio.models import Audio from cl.celery_init import app +from cl.corpus_importer.utils import is_bankruptcy_court from cl.lib.elasticsearch_utils import build_daterange_query from cl.lib.search_index_utils import ( get_parties_from_case_name, @@ -323,7 +324,7 @@ def document_fields_to_update( get_parties_from_case_name_bankr( main_instance.case_name ) - if main_instance.court_id.endswith("b") + if is_bankruptcy_court(main_instance.court_id) else get_parties_from_case_name( main_instance.case_name ) From 41ea9cd18839920fb8cf953edd7d5a54c912411f Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Tue, 4 Feb 2025 20:02:04 -0400 Subject: [PATCH 5/7] tests(search): Verify ES indexing uses helper to parse parties --- cl/search/tests/tests_es_recap.py | 65 +++++++++++++++++++++++++++---- 1 file changed, 58 insertions(+), 7 deletions(-) diff --git a/cl/search/tests/tests_es_recap.py b/cl/search/tests/tests_es_recap.py index ef4023a436..8034bd6306 100644 --- a/cl/search/tests/tests_es_recap.py +++ b/cl/search/tests/tests_es_recap.py @@ -28,6 +28,10 @@ simplify_estimated_count, ) from cl.lib.redis_utils import get_redis_interface +from cl.lib.search_index_utils import ( + get_parties_from_case_name, + get_parties_from_case_name_bankr, +) from cl.lib.test_helpers import ( RECAPSearchTestCase, rd_type_v4_api_keys, @@ -7319,15 +7323,58 @@ def test_prepare_parties(self) -> None: {firm.name, firm_2.name, firm_2_1.name, firm_1_2.name}, ) + @mock.patch( + "cl.search.documents.get_parties_from_case_name_bankr", + wraps=get_parties_from_case_name_bankr, + ) + @mock.patch( + "cl.search.tasks.get_parties_from_case_name_bankr", + wraps=get_parties_from_case_name_bankr, + ) + def test_index_party_from_bankr_case_name( + self, mock_party_parser_task, mock_party_parser_document + ): + """Confirm that the party field is populated by splitting the case_name + of a bankruptcy case when a valid separator is present. + """ + docket_with_no_parties = DocketFactory( + court=self.court, + case_name="Lorem v. Dolor", + docket_number="1:21-bk-4444", + source=Docket.RECAP, + ) + docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk) + # Assert party on initial indexing. + self.assertEqual(docket_doc_no_parties.party, ["Lorem", "Dolor"]) + mock_party_parser_document.assert_called_once() + + # Modify the docket case_name. Assert that parties are updated if the + # docket does not contain normalized parties. + docket_with_no_parties.case_name = "America v. Smith" + docket_with_no_parties.save() + docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk) + self.assertEqual(docket_doc_no_parties.party, ["America", "Smith"]) + mock_party_parser_task.assert_called_once() + + docket_with_no_parties.delete() + + @mock.patch( + "cl.search.documents.get_parties_from_case_name", + wraps=get_parties_from_case_name, + ) + @mock.patch( + "cl.search.tasks.get_parties_from_case_name", + wraps=get_parties_from_case_name, + ) def test_index_party_from_case_name_when_parties_are_not_available( - self, + self, mock_party_parser_task, mock_party_parser_document ) -> None: """Confirm that the party field is populated by splitting the case_name when a valid separator is present. """ - + district_court = CourtFactory(id="akd", jurisdiction="FD") docket_with_parties = DocketFactory( - court=self.court, + court=district_court, case_name="Lorem v. Dolor", docket_number="1:21-bk-4444", source=Docket.RECAP, @@ -7350,8 +7397,9 @@ def test_index_party_from_case_name_when_parties_are_not_available( docket=docket_with_parties, ) index_docket_parties_in_es.delay(docket_with_parties.pk) + mock_party_parser_document.reset_mock() docket_with_no_parties = DocketFactory( - court=self.court, + court=district_court, case_name="Bank v. Smith", docket_number="1:21-bk-4445", source=Docket.RECAP, @@ -7363,13 +7411,16 @@ def test_index_party_from_case_name_when_parties_are_not_available( # Assert party on initial indexing. self.assertEqual(docket_doc_parties.party, ["Mary Williams Corp."]) self.assertEqual(docket_doc_no_parties.party, ["Bank", "Smith"]) + mock_party_parser_document.assert_called_once() # Modify the docket case_name. Assert that parties are not overwritten - # in a docket with normalized parties. + # in a docket with normalized parties and also check the helper to + # parse parties is not called. docket_with_parties.case_name = "Lorem v. Ipsum" docket_with_parties.save() docket_doc_parties = DocketDocument.get(docket_with_parties.pk) self.assertEqual(docket_doc_parties.party, ["Mary Williams Corp."]) + mock_party_parser_task.assert_not_called() # Modify the docket case_name. Assert that parties are updated if the # docket does not contain normalized parties. @@ -7377,12 +7428,12 @@ def test_index_party_from_case_name_when_parties_are_not_available( docket_with_no_parties.save() docket_doc_no_parties = DocketDocument.get(docket_with_no_parties.pk) self.assertEqual(docket_doc_no_parties.party, ["America", "Smith"]) + mock_party_parser_task.assert_called_once() # Test that parties are not extracted from the case_name if the case # originates from a district court and lacks a valid separator. - court_2 = CourtFactory(id="akd", jurisdiction="FD") docket_with_no_parties_no_separator = DocketFactory( - court=court_2, + court=district_court, case_name="In re: Bank Smith", docket_number="1:21-bk-4446", source=Docket.RECAP, From 1aa355b6fc46dfe64011ffef7dc4fb621d8c5161 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 6 Feb 2025 17:13:17 -0400 Subject: [PATCH 6/7] feat(lib): Handle "in re" and "in the matter of" This commit introduces logic to handle bankruptcy case names that begin with "in re" or "in the matter of". These types of case names typically don't contain party information in the standard format, so the function now returns an empty list in these cases. This prevents incorrect parsing and ensures more accurate extraction of party names. --- cl/lib/search_index_utils.py | 20 +++++++++++++------- cl/lib/tests.py | 28 ++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 7 deletions(-) diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py index d652f13569..2322df49e2 100644 --- a/cl/lib/search_index_utils.py +++ b/cl/lib/search_index_utils.py @@ -66,14 +66,20 @@ def get_parties_from_case_name_bankr(case_name: str) -> list[str]: remove extraneous information like court designations in parentheses, trailing HTML, and text related to "BELOW" or "ABOVE" designations. - Args: - case_name: The bankruptcy case name string. - - Returns: - A list of strings, where each string represents a party involved - in the case. If no recognized separator is found, the function - returns a list containing the cleaned case name as a single element. + If the case name begins with "in re" or "in the matter of", an empty list + is returned, as these typically don't contain party information in the + standard format. + + :param case_name: The bankruptcy case name string. + :return: A list of strings, where each string represents a party involved + in the case. If no recognized separator is found, the function returns + a list containing the cleaned case name as a single element. """ + # Handle cases beginning with "in re" or "in the matter of". + # These usually don't contain party information in the expected format. + if re.match(r"^(in re|in the matter of)", case_name, re.IGNORECASE): + return [] + # Removes text enclosed in parentheses at the end of the string. cleaned_case_name = re.sub(r"\s*\([^)]*\)$", "", case_name) diff --git a/cl/lib/tests.py b/cl/lib/tests.py index 8ad04bcbd0..ad5a4dae5d 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -1274,6 +1274,34 @@ class PartiesNameTestType(TypedDict): "case_name": 'Saucedo and Green Dream International, LLC Case Consolidated under 23-03142 ', "output": ["Saucedo", "Green Dream International, LLC"], }, + { + "case_name": "In re: Matter of Nicholas M. Wajda", + "output": [], + }, + { + "case_name": "In re Matter of Proof of Claim Replacement Filings", + "output": [], + }, + { + "case_name": "In re T.H.", + "output": [], + }, + { + "case_name": "In Re: Dempsey Clay Ward", + "output": [], + }, + { + "case_name": "In re: Receivership of Horses and Equipment v. Gabriel", + "output": [], + }, + { + "case_name": "In Re: Appearances of Attorney James G. ORourke in Pending Bankruptcy Cases", + "output": [], + }, + { + "case_name": "In the matter of Attorney Rodney D. Shepherd", + "output": [], + }, ] for test in tests: with self.subTest( From efa2ac6eba173a3ae5e406c1061dcf9758ea53e3 Mon Sep 17 00:00:00 2001 From: Eduardo Rosendo Date: Thu, 6 Feb 2025 22:44:46 -0400 Subject: [PATCH 7/7] feat(lib): Removes "- Adversary Proceeding" from case names --- cl/lib/search_index_utils.py | 11 ++++++++++- cl/lib/tests.py | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/cl/lib/search_index_utils.py b/cl/lib/search_index_utils.py index 2322df49e2..8023ab1e11 100644 --- a/cl/lib/search_index_utils.py +++ b/cl/lib/search_index_utils.py @@ -77,7 +77,11 @@ def get_parties_from_case_name_bankr(case_name: str) -> list[str]: """ # Handle cases beginning with "in re" or "in the matter of". # These usually don't contain party information in the expected format. - if re.match(r"^(in re|in the matter of)", case_name, re.IGNORECASE): + if re.match( + r"^(in re|in the matter of|unknown case title)", + case_name, + re.IGNORECASE, + ): return [] # Removes text enclosed in parentheses at the end of the string. @@ -89,6 +93,11 @@ def get_parties_from_case_name_bankr(case_name: str) -> list[str]: # Removes text following "-BELOW" or "-ABOVE" at the end of the string. cleaned_case_name = re.sub(r"\s*(-BELOW|-ABOVE).*$", "", cleaned_case_name) + # Removes text following "- Adversary Proceeding" at the end of the string. + cleaned_case_name = re.sub( + r"\s*- Adversary Proceeding.*$", "", cleaned_case_name + ) + case_name_separators = VALID_CASE_NAME_SEPARATORS.copy() case_name_separators.append(" and ") for separator in case_name_separators: diff --git a/cl/lib/tests.py b/cl/lib/tests.py index ad5a4dae5d..b082508c88 100644 --- a/cl/lib/tests.py +++ b/cl/lib/tests.py @@ -1302,6 +1302,26 @@ class PartiesNameTestType(TypedDict): "case_name": "In the matter of Attorney Rodney D. Shepherd", "output": [], }, + { + "case_name": "Rochester Drug Cooperative, Inc. - Adversary Proceeding", + "output": ["Rochester Drug Cooperative, Inc."], + }, + { + "case_name": "Ronald W. Howland, Jr and Marilee R Howland - Adversary Proceeding", + "output": ["Ronald W. Howland, Jr", "Marilee R Howland"], + }, + { + "case_name": "Derrick D. Thomas v Kacy L. Thomas - Adversary Proceeding", + "output": ["Derrick D. Thomas", "Kacy L. Thomas"], + }, + { + "case_name": "Unknown Case Title", + "output": [], + }, + { + "case_name": "Unknown Case Title - Adversary Proceeding", + "output": [], + }, ] for test in tests: with self.subTest(