Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add command to standardize formatting of DACT IDs and Fragmentarium IDs #1651

Merged
merged 1 commit into from
Oct 4, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
A command designed to do a one-time reformatting of DACT IDs and Fragment
IDs in the database.

Fragment IDs should be of the form "F-XXXX" where XXXX is some alphanumeric.
Fragment IDs are currently assumed to be in the form "F-XXXX" or "XXXX".
DACT IDs should be of the form "D:0XXXX" where XXXX is the Fragment ID alphanumeric.
DACT IDs are currently assumed to be in the form "0XXXX" or "D-0XXXX".

This command simply adds the prefix "F-" to all Fragment IDs and "D:" to all
DACT IDs where they are missing.
"""

from django.core.management.base import BaseCommand

from main_app.models import Source


class Command(BaseCommand):
help = "Reformat DACT IDs and Fragment IDs in the database."

def handle(self, *args, **options):
sources = Source.objects.all()
for source in sources:
if source.dact_id:
if len(source.dact_id) == 5 and source.dact_id.startswith("0"):
source.dact_id = f"D:{source.dact_id}"
elif len(source.dact_id) == 7 and source.dact_id.startswith("D-0"):
source.dact_id = f"D:{source.dact_id[2:]}"
else:
self.stdout.write(
self.style.WARNING(
f"{source.id} | DACT ID {source.dact_id} is not in the correct format."
)
)
if source.fragmentarium_id:
if len(source.fragmentarium_id) == 4:
source.fragmentarium_id = f"F-{source.fragmentarium_id}"
elif len(
source.fragmentarium_id
) == 6 and source.fragmentarium_id.startswith("F-"):
pass
else:
self.stdout.write(
self.style.WARNING(
f"{source.id} | Fragment ID {source.fragmentarium_id} is not in the correct format."
)
)
source.save()
56 changes: 56 additions & 0 deletions django/cantusdb_project/main_app/tests/test_reformat_source_ids.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
from django.test import TestCase
from django.core.management import call_command

from main_app.models import Source
from main_app.tests.make_fakes import make_fake_institution, make_fake_segment


class TestReformatSourceIDs(TestCase):
def test_command(self):
segment = make_fake_segment()
fake_inst = make_fake_institution()
correct_source_1 = Source.objects.create(
segment=segment,
shelfmark="Correct Source 1",
holding_institution=fake_inst,
dact_id="0a1b3",
fragmentarium_id="a1b3",
)
correct_source_2 = Source.objects.create(
segment=segment,
shelfmark="Correct Source 2",
holding_institution=fake_inst,
dact_id="D-0a1b3",
fragmentarium_id="F-a1b3",
)
source_with_no_ids = Source.objects.create(
segment=segment,
shelfmark="Source with no IDs",
holding_institution=fake_inst,
)
source_with_incorrect_ids = Source.objects.create(
segment=segment,
shelfmark="Source with incorrect IDs",
holding_institution=fake_inst,
dact_id="a1b3",
fragmentarium_id="F-1b3",
)

call_command("reformat_source_ids")
self.assertEqual(Source.objects.get(pk=correct_source_1.pk).dact_id, "D:0a1b3")
self.assertEqual(
Source.objects.get(pk=correct_source_1.pk).fragmentarium_id, "F-a1b3"
)
self.assertEqual(Source.objects.get(pk=correct_source_2.pk).dact_id, "D:0a1b3")
self.assertEqual(
Source.objects.get(pk=correct_source_2.pk).fragmentarium_id, "F-a1b3"
)
self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).dact_id)
self.assertIsNone(Source.objects.get(pk=source_with_no_ids.pk).fragmentarium_id)
self.assertEqual(
Source.objects.get(pk=source_with_incorrect_ids.pk).dact_id, "a1b3"
)
self.assertEqual(
Source.objects.get(pk=source_with_incorrect_ids.pk).fragmentarium_id,
"F-1b3",
)
Loading