Skip to content

Commit

Permalink
feat: improve the lookup & usage of Taxon names (#696)
Browse files Browse the repository at this point in the history
  • Loading branch information
mihow authored Jan 27, 2025
1 parent 334cb29 commit 47dcbcd
Show file tree
Hide file tree
Showing 5 changed files with 26 additions and 11 deletions.
4 changes: 4 additions & 0 deletions ami/main/admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,10 @@ class TaxonAdmin(admin.ModelAdmin[Taxon]):
)
list_filter = ("lists", "rank", TaxonParentFilter)
search_fields = ("name",)
autocomplete_fields = (
"parent",
"synonym_of",
)

# annotate queryset with occurrence counts and allow sorting
# https://docs.djangoproject.com/en/3.2/ref/contrib/admin/#django.contrib.admin.ModelAdmin.list_display
Expand Down
2 changes: 2 additions & 0 deletions ami/main/api/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -1080,6 +1080,7 @@ def suggest(self, request):
taxa = (
Taxon.objects.select_related("parent", "parent__parent")
.annotate(similarity=TrigramSimilarity("name", query))
.filter(active=True)
.order_by("-similarity")[:limit]
)
return Response(TaxonNestedSerializer(taxa, many=True, context={"request": request}).data)
Expand All @@ -1088,6 +1089,7 @@ def suggest(self, request):
Taxon.objects.filter(name__icontains=query)
.annotate(similarity=TrigramSimilarity("name", query))
.order_by("-similarity")[:default_results_limit]
.filter(active=True)
.values("id", "name", "rank")[:limit]
)
return Response(TaxonSearchResultSerializer(taxa, many=True, context={"request": request}).data)
Expand Down
1 change: 1 addition & 0 deletions ami/main/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2573,6 +2573,7 @@ class Taxon(BaseModel):
gbif_taxon_key = models.BigIntegerField("GBIF taxon key", blank=True, null=True)
bold_taxon_bin = models.CharField("BOLD taxon BIN", max_length=255, blank=True, null=True)
inat_taxon_id = models.BigIntegerField("iNaturalist taxon ID", blank=True, null=True)
# lepsai_id = models.BigIntegerField("LepsAI / Fieldguide ID", blank=True, null=True)

notes = models.TextField(blank=True)

Expand Down
7 changes: 6 additions & 1 deletion ami/ml/models/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,12 @@ def with_taxa(self, category_field="label", only_indexes: list[int] | None = Non
labels_data = self.data
labels_label = self.labels

taxa = Taxon.objects.filter(models.Q(name__in=labels_label) | models.Q(search_names__overlap=labels_label))
# @TODO standardize species search / lookup.
# See similar query in ml.models.pipeline.get_or_create_taxon_for_classification()
taxa = Taxon.objects.filter(
models.Q(name__in=labels_label) | models.Q(search_names__overlap=labels_label),
active=True,
)
taxon_map = {taxon.name: taxon for taxon in taxa}

for category in labels_data:
Expand Down
23 changes: 13 additions & 10 deletions ami/ml/models/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,25 +482,28 @@ def get_or_create_taxon_for_classification(
:return: The Taxon object
"""
taxa_list, _created = TaxaList.objects.get_or_create(
taxa_list, created = TaxaList.objects.get_or_create(
name=f"Taxa returned by {algorithm.name}",
)
if _created:
if created:
logger.info(f"Created new taxa list {taxa_list}")
else:
logger.debug(f"Using existing taxa list {taxa_list}")

# Get top label from classification scores
assert algorithm.category_map, f"No category map found for algorithm {algorithm}"
label_data: dict = algorithm.category_map.data[classification_resp.scores.index(max(classification_resp.scores))]
taxon, _created = Taxon.objects.get_or_create(
name=classification_resp.classification,
defaults={
"name": classification_resp.classification,
"rank": label_data.get("taxon_rank", TaxonRank.UNKNOWN),
},
)
if _created:
returned_taxon_name = classification_resp.classification
# @TOOD standardize the Taxon search / lookup. See similar query in ml.models.algorithm.AlgorithmCategoryMap
taxon = Taxon.objects.filter(
models.Q(name=returned_taxon_name) | models.Q(search_names__overlap=[returned_taxon_name]),
active=True,
).first()
if not taxon:
taxon = Taxon.objects.create(
name=returned_taxon_name,
rank=label_data.get("taxon_rank", TaxonRank.UNKNOWN),
)
logger.info(f"Registered new taxon {taxon}")

taxa_list.taxa.add(taxon)
Expand Down

0 comments on commit 47dcbcd

Please sign in to comment.