From 26d49d78a26427a12628cd3a1a4cd49d030c6956 Mon Sep 17 00:00:00 2001 From: Holly Gong <39108850+hogo6002@users.noreply.github.com> Date: Fri, 24 Jan 2025 16:07:54 +1100 Subject: [PATCH] fix(api): normalize PyPI package names (#3088) Partially resolves https://github.com/google/osv.dev/issues/3082 Normalize PyPI package names in API queries: Refactor `worker.maybe_normalize_package_names` to use shared code. --- gcp/api/server.py | 5 +++++ gcp/workers/worker/worker.py | 9 ++++----- osv/ecosystems/_ecosystems.py | 11 +++++++++++ osv/ecosystems/_ecosystems_test.py | 9 +++++++++ 4 files changed, 29 insertions(+), 5 deletions(-) diff --git a/gcp/api/server.py b/gcp/api/server.py index fa1380160e3..0ef0a3d1c25 100644 --- a/gcp/api/server.py +++ b/gcp/api/server.py @@ -793,6 +793,11 @@ def do_query(query: osv_service_v1_pb2.Query, context.service_context.abort(grpc.StatusCode.INVALID_ARGUMENT, 'Invalid ecosystem.') + # Normalize package names as necessary. + if package_name: + package_name = ecosystems.maybe_normalize_package_names( + package_name, ecosystem) + # Hack to work around ubuntu having extremely large individual entries if ecosystem.startswith('Ubuntu'): # Specifically the linux entries diff --git a/gcp/workers/worker/worker.py b/gcp/workers/worker/worker.py index 176e4135ab4..56cfa0eb2b7 100644 --- a/gcp/workers/worker/worker.py +++ b/gcp/workers/worker/worker.py @@ -18,7 +18,6 @@ import json import logging import os -import re import redis import requests import resource @@ -281,10 +280,10 @@ def fix_invalid_ghsa(vulnerability): def maybe_normalize_package_names(vulnerability): """Normalize package names as necessary.""" for affected in vulnerability.affected: - if affected.package.ecosystem == 'PyPI': - # per https://peps.python.org/pep-0503/#normalized-names - affected.package.name = re.sub(r'[-_.]+', '-', - affected.package.name).lower() + if not affected.package.ecosystem: + continue + affected.package.name = osv.ecosystems.maybe_normalize_package_names( + affected.package.name, affected.package.ecosystem) return vulnerability diff --git a/osv/ecosystems/_ecosystems.py b/osv/ecosystems/_ecosystems.py index 716bbdea7fb..66795fb81c4 100644 --- a/osv/ecosystems/_ecosystems.py +++ b/osv/ecosystems/_ecosystems.py @@ -13,6 +13,8 @@ # limitations under the License. """Ecosystem helpers.""" +import re + from osv.ecosystems.chainguard import Chainguard from osv.ecosystems.wolfi import Wolfi from .helper_base import Ecosystem, OrderingUnsupportedEcosystem @@ -184,3 +186,12 @@ def is_supported_in_deps_dev(ecosystem_name: str) -> bool: def map_ecosystem_to_deps_dev(ecosystem_name: str) -> str: return _OSV_TO_DEPS_ECOSYSTEMS_MAP.get(ecosystem_name) + + +def maybe_normalize_package_names(package_name: str, ecosystem: str) -> str: + """Normalize package names as necessary.""" + if ecosystem == 'PyPI': + # per https://peps.python.org/pep-0503/#normalized-names + package_name = re.sub(r'[-_.]+', '-', package_name).lower() + + return package_name diff --git a/osv/ecosystems/_ecosystems_test.py b/osv/ecosystems/_ecosystems_test.py index b76edd9f953..2d0073dd6e1 100644 --- a/osv/ecosystems/_ecosystems_test.py +++ b/osv/ecosystems/_ecosystems_test.py @@ -44,3 +44,12 @@ def test_add_matching_ecosystems(self): expected_output = ['Debian', 'Debian:11', 'Debian:12', 'Debian:13'] actual_output.sort() self.assertEqual(list(actual_output), expected_output) + + def test_maybe_normalize_package_names(self): + """Test normalize package name""" + package_name = 'Flask' + ecosystem = 'PyPI' + expected = 'flask' + + actual = ecosystems.maybe_normalize_package_names(package_name, ecosystem) + self.assertEqual(actual, expected)