Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(lib): Enhances get_parties_from_case_name method #4971

Merged
merged 13 commits into from
Feb 7, 2025
Merged
Show file tree
Hide file tree
Changes from 12 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 17 additions & 0 deletions cl/corpus_importer/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,6 +107,23 @@ async def mark_ia_upload_needed(d: Docket, save_docket: bool) -> None:
await d.asave()


def is_bankruptcy_court(court_id: str) -> bool:
"""Checks if a given court ID corresponds to a bankruptcy court.

This function queries the database to determine if the provided court
ID is associated with a federal bankruptcy court.

Args:
court_id: The ID of the court to check (string).

Returns:
True if the court ID corresponds to a bankruptcy court, False otherwise
(boolean).
"""
bankr_court_ids = Court.federal_courts.bankruptcy_pacer_courts()
return bankr_court_ids.filter(pk=court_id).exists()


def is_appellate_court(court_id: str) -> bool:
"""Checks if the given court_id belongs to an appellate court.

Expand Down
51 changes: 43 additions & 8 deletions cl/lib/search_index_utils.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import re
from datetime import date

from cl.lib.date_time import midnight_pt
Expand Down Expand Up @@ -40,6 +41,8 @@ def __init__(self, message):
list(range(0, 10)) + list(range(11, 13)) + list(range(14, 32))
)

VALID_CASE_NAME_SEPARATORS = [" v ", " v. ", " vs. ", " vs "]


def get_parties_from_case_name(case_name: str) -> list[str]:
"""Extracts the parties from case_name by splitting on common case_name
Expand All @@ -49,14 +52,46 @@ def get_parties_from_case_name(case_name: str) -> list[str]:
:return: A list of parties. If no valid separator is found, returns an
empty list.
"""

valid_case_name_separators = [
" v ",
" v. ",
" vs. ",
" vs ",
]
for separator in valid_case_name_separators:
for separator in VALID_CASE_NAME_SEPARATORS:
if separator in case_name:
return case_name.split(separator, 1)
return []


def get_parties_from_case_name_bankr(case_name: str) -> list[str]:
"""Extracts the parties involved in a bankruptcy case from the case name.

This function attempts to identify the parties by splitting the case name
string based on common separators. It also performs some cleanup to
remove extraneous information like court designations in parentheses,
trailing HTML, and text related to "BELOW" or "ABOVE" designations.

If the case name begins with "in re" or "in the matter of", an empty list
is returned, as these typically don't contain party information in the
standard format.

:param case_name: The bankruptcy case name string.
:return: A list of strings, where each string represents a party involved
in the case. If no recognized separator is found, the function returns
a list containing the cleaned case name as a single element.
"""
# Handle cases beginning with "in re" or "in the matter of".
# These usually don't contain party information in the expected format.
if re.match(r"^(in re|in the matter of)", case_name, re.IGNORECASE):
return []

# Removes text enclosed in parentheses at the end of the string.
cleaned_case_name = re.sub(r"\s*\([^)]*\)$", "", case_name)

# Removes any HTML at the end of the string.
cleaned_case_name = re.sub(r"\s*<.*$", "", cleaned_case_name)

# Removes text following "-BELOW" or "-ABOVE" at the end of the string.
cleaned_case_name = re.sub(r"\s*(-BELOW|-ABOVE).*$", "", cleaned_case_name)

case_name_separators = VALID_CASE_NAME_SEPARATORS.copy()
case_name_separators.append(" and ")
ERosendo marked this conversation as resolved.
Show resolved Hide resolved
for separator in case_name_separators:
if separator in case_name:
return cleaned_case_name.split(separator, 1)
return [cleaned_case_name]
112 changes: 112 additions & 0 deletions cl/lib/tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
get_redis_interface,
release_redis_lock,
)
from cl.lib.search_index_utils import get_parties_from_case_name_bankr
from cl.lib.string_utils import normalize_dashes, trunc
from cl.lib.utils import (
check_for_proximity_tokens,
Expand Down Expand Up @@ -1203,6 +1204,117 @@ def test_check_and_sanitize_queries_bad_syntax(self) -> None:
)
self.assertEqual(output, test["sanitized"])

def test_can_get_parties_from_bankruptcy_case_name(self) -> None:
class PartiesNameTestType(TypedDict):
case_name: str
output: list[str]

tests: list[PartiesNameTestType] = [
{
"case_name": "Mendelsohn. Singh",
"output": ["Mendelsohn. Singh"],
},
{
"case_name": "Cadle Co. v Matos",
"output": ["Cadle Co.", "Matos"],
},
{
"case_name": "Cadle Co. v Matos",
"output": ["Cadle Co.", "Matos"],
},
{
"case_name": "Cadle Co. v. Matos",
"output": ["Cadle Co.", "Matos"],
},
{
"case_name": "Cadle Co. vs Matos",
"output": ["Cadle Co.", "Matos"],
},
{
"case_name": "Cadle Co. vs. Matos",
"output": ["Cadle Co.", "Matos"],
},
{
"case_name": "Paul Thomas Presbury, Jr. and Lisa Rae Presbury",
"output": ["Paul Thomas Presbury, Jr.", "Lisa Rae Presbury"],
},
{
"case_name": "Ma Margarita Bernal Sosa -ABOVE MED",
"output": ["Ma Margarita Bernal Sosa"],
},
{
"case_name": "Jennifer Renee' Abbott and Quentin Andrew Abbott -ABOVE MED",
"output": ["Jennifer Renee' Abbott", "Quentin Andrew Abbott"],
},
{
"case_name": "Aiesha Renee -BELOW MED",
"output": ["Aiesha Renee"],
},
{
"case_name": "Justin Kaiser and Belinda Kaiser -BELOW MED",
"output": ["Justin Kaiser", "Belinda Kaiser"],
},
{
"case_name": "Cosmorex Ltd. (in Liquidation)",
"output": ["Cosmorex Ltd."],
},
{
"case_name": "Cowen & Co. v. Zagar (In re Zagar)",
"output": ["Cowen & Co.", "Zagar"],
},
{
"case_name": 'Advantage LLC <b><font color="red">Jointly Administered under 23-90886.</font></b>',
"output": ["Advantage LLC"],
},
{
"case_name": 'Sather v. Carlson<b><font color="red">DO NOT DOCKET. CASE TRANSFERRED OUT.</font></b>',
"output": ["Sather", "Carlson"],
},
{
"case_name": 'Saucedo and Green Dream International, LLC <b> <font color="red"> Case Consolidated under 23-03142 </font> </b>',
"output": ["Saucedo", "Green Dream International, LLC"],
Comment on lines +1274 to +1275
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This one looks like it's actually wrong, but not sure we can do much better.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Also from #4802 (comment) it seems that there might be cases where the words In or of are not part of the party names.

For instance something like:
In re: Advantage LLC

Is this possible in bankruptcy?

If so, in these cases, the indexed party would be In re: Advantage LLC, which doesn't seem correct. In district courts, we simply ignore anything that doesn't have a valid separator, but here, it seems more complicated since we're performing cleanup before splitting parties.

Perhaps, in these cases, we could completely ignore anything that contains In or of? Or we could look for examples of these case names and check if we can identify a common pattern for cleanup?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this possible in bankruptcy?

I looked into how often "In re" appears in case names. After refining the dataset to include more records (2 million total records with RECAP source or a derived one) and searching, I found only 36 instances (0.0018%) where a case name begins with "In re." A few examples are:

I think we should add a step to the cleanup process that removes "In re" before we try to figure out the party names.

For reference, here's a CSV file containing these 36 instances:

case_names_re_recap.csv

@albertisfu Let me know what you think.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks! Yeah, it seems like this type of case name is not very common.

I think we can try removing in re or in re: before splitting the parties; however, that would also require removing other common terms that seem to be typical in this type of case name structure but do not appear to be part of the parties, such as:

Matter of
Receivership of
Appearances of

Not sure if it's possible to compile a list of all potential terms that might appear in a bankruptcy case name but are not part of the parties.

Additionally, some case names don't seem to contain parties at all.
In re Matter of Ascendium Replacement Filings
In re: Proceedings to Review Attorney Usage of CM/ECF Filing Credentials
In re: Proceedings to Enforce Fed.R.Bankr.9036
In Re: Proceedings to Enforce Fed. R. Bankr. P. 9036 as to various high-volume paper-notice recipients relating to cases pending within the District of Connecticut.
In re Matter of Proof of Claim Replacement Filings
In re Appointments and Reappointments of Ohio Sout

In these cases, if we remove "in re," it might not be correct to treat the remaining text as a party.

Another option is to simply ignore any case name that contains in re or in re: and not index parties from those cases. Perhaps @mlissner has an opinion on this?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

For bankruptcy, I'm fine with just not indexing anything that starts with in re or in the matter of, etc.

},
{
"case_name": "In re: Matter of Nicholas M. Wajda",
"output": [],
},
{
"case_name": "In re Matter of Proof of Claim Replacement Filings",
"output": [],
},
{
"case_name": "In re T.H.",
"output": [],
},
{
"case_name": "In Re: Dempsey Clay Ward",
"output": [],
},
{
"case_name": "In re: Receivership of Horses and Equipment v. Gabriel",
"output": [],
},
{
"case_name": "In Re: Appearances of Attorney James G. ORourke in Pending Bankruptcy Cases",
"output": [],
},
{
"case_name": "In the matter of Attorney Rodney D. Shepherd",
"output": [],
},
]
for test in tests:
with self.subTest(
input=test["case_name"], msg="get parties names from case name"
):
parties: list[str] = get_parties_from_case_name_bankr(
test["case_name"]
)
self.assertEqual(
parties,
test["output"],
)


class TestRedisUtils(SimpleTestCase):
"""Test Redis utils functions."""
Expand Down
13 changes: 10 additions & 3 deletions cl/search/documents.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,14 +8,19 @@

from cl.alerts.models import Alert
from cl.audio.models import Audio
from cl.corpus_importer.utils import is_bankruptcy_court
from cl.custom_filters.templatetags.text_filters import (
best_case_name,
html_decode,
)
from cl.lib.command_utils import logger
from cl.lib.elasticsearch_utils import build_es_base_query
from cl.lib.fields import JoinField, PercolatorField
from cl.lib.search_index_utils import get_parties_from_case_name, null_map
from cl.lib.search_index_utils import (
get_parties_from_case_name,
get_parties_from_case_name_bankr,
null_map,
)
from cl.lib.utils import deepgetattr
from cl.people_db.models import (
Attorney,
Expand Down Expand Up @@ -1258,8 +1263,10 @@ def prepare_parties(self, instance):
if not out["party"]:
# Get party from docket case_name if no normalized parties are
# available.
party_from_case_name = get_parties_from_case_name(
instance.case_name
party_from_case_name = (
get_parties_from_case_name_bankr(instance.case_name)
if is_bankruptcy_court(instance.court_id)
else get_parties_from_case_name(instance.case_name)
)
out["party"] = party_from_case_name if party_from_case_name else []

Expand Down
16 changes: 13 additions & 3 deletions cl/search/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,12 @@
)
from cl.audio.models import Audio
from cl.celery_init import app
from cl.corpus_importer.utils import is_bankruptcy_court
from cl.lib.elasticsearch_utils import build_daterange_query
from cl.lib.search_index_utils import get_parties_from_case_name
from cl.lib.search_index_utils import (
get_parties_from_case_name,
get_parties_from_case_name_bankr,
)
from cl.people_db.models import Person, Position
from cl.search.documents import (
ES_CHILD_ID,
Expand Down Expand Up @@ -316,8 +320,14 @@ def document_fields_to_update(
# parties are available.
if main_instance.parties.exists():
continue
field_value = get_parties_from_case_name(
main_instance.case_name
field_value = (
get_parties_from_case_name_bankr(
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Since we're adding a special method for splitting parties in bankruptcy cases both here and in prepare_parties, I’d suggest adding a test case similar to those in test_index_party_from_case_name_when_parties_are_not_available to confirm that the correct method is selected for bankruptcy.

I think two additional test cases should be enough:

  1. Splitting parties from the case_name when creating a bankruptcy docket (which will use the logic in prepare_parties).
  2. Splitting parties when updating a case_name (which will use the logic in document_fields_to_update).

Currently, in test_index_party_from_case_name_when_parties_are_not_available, the factory docket_with_no_parties comes from a bankruptcy court. To differentiate the method get_parties_from_case_name_bankr, it would be necessary to change the court in this factory to a district court and create a new factory for bankruptcy. You could rely on the expected parties for the assertion, considering that get_parties_from_case_name_bankr performs some cleanup, or simply confirm that the correct method is being called using a mock. The same approach can be applied for the case_name update test case for bankruptcy.

I don’t think it'd necessary to replicate the rest of the assertions from test_index_party_from_case_name_when_parties_are_not_available for bankruptcy since they share common logic that hasn’t changed.

main_instance.case_name
)
if is_bankruptcy_court(main_instance.court_id)
else get_parties_from_case_name(
main_instance.case_name
)
)
else:
field_value = getattr(related_instance, field)
Expand Down
Loading