Skip to content

Commit

Permalink
Add data integrity check for bad emails and malformed repositories (#…
Browse files Browse the repository at this point in the history
…1379)

1. Make sure that repository isn't accidentally annotated as
`bioregistry` or given not as a URL. Fixes 3 issues here
2. Make sure that emails curated as contacts aren't used that look like
group emails (e.g. contains `help@`, `discuss@`)
  • Loading branch information
cthoyt authored Jan 25, 2025
1 parent 4a4d972 commit de94940
Show file tree
Hide file tree
Showing 2 changed files with 33 additions and 3 deletions.
4 changes: 1 addition & 3 deletions src/bioregistry/data/bioregistry.json
Original file line number Diff line number Diff line change
Expand Up @@ -38378,7 +38378,6 @@
"homepage": "https://www.fao.org/fishery/en/collection/asfis/en",
"name": "Aquatic Sciences and Fisheries Information System",
"pattern": "^\\d+$",
"repository": "bioregistry",
"reviewer": {
"email": "[email protected]",
"github": "cthoyt",
Expand Down Expand Up @@ -97696,7 +97695,7 @@
"name": "The data cube vocabulary",
"pattern": "^\\w+$",
"preferred_prefix": "qb",
"repository": "UKGovLD/publishing-statistical-data",
"repository": "https://github.com/UKGovLD/publishing-statistical-data",
"uri_format": "http://purl.org/linked-data/cube#$1",
"zazuko": {
"prefix": "qb",
Expand Down Expand Up @@ -109711,7 +109710,6 @@
"homepage": "https://www.w3.org/TR/vocab-ssn/",
"license": "http://www.w3.org/Consortium/Legal/2015/copyright-software-and-document",
"name": "System capabilities, operating ranges, and survival ranges ontology",
"repository": "bioregistry",
"reviewer": {
"email": "[email protected]",
"github": "cthoyt",
Expand Down
32 changes: 32 additions & 0 deletions tests/test_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,14 @@

logger = logging.getLogger(__name__)

disallowed_email_parts = {
"contact@",
"help@",
"helpdesk@",
"discuss@",
"support@",
}


class TestRegistry(unittest.TestCase):
"""Tests for the registry."""
Expand Down Expand Up @@ -770,6 +778,14 @@ def assert_contact_metadata(self, author: Attributable):
self.assertNotIn(" ", author.orcid)
if author.email:
self.assertRegex(author.email, EMAIL_RE)
self.assertFalse(
any(
disallowed_email_part in author.email
for disallowed_email_part in disallowed_email_parts
),
msg=f"Bioregistry policy states that an email must correspond to a single person. "
f"The email provided appears to be for a group/mailing list: {author.email}",
)

def test_contributors(self):
"""Check contributors have minimal metadata."""
Expand Down Expand Up @@ -1038,3 +1054,19 @@ def test_resolvable_annotation(self):
resource.comment,
msg="Any resource with a non-resolvable URI format needs a comment as to why",
)

def test_repository(self) -> None:
"""Test the repository annotation."""
for prefix, resource in self.registry.items():
if resource.repository is None:
continue
with self.subTest(prefix=prefix):
self.assertNotEqual(
"bioregistry",
resource.repository,
msg="repository accidentally kept flag from GitHub",
)
self.assertTrue(
resource.repository.startswith("http"),
msg=f"repository is not a valid URL: {resource.repository}",
)

0 comments on commit de94940

Please sign in to comment.