Skip to content

Commit

Permalink
fix(api): normalize PyPI package names (google#3088)
Browse files Browse the repository at this point in the history
Partially resolves google#3082

Normalize PyPI package names in API queries: Refactor
`worker.maybe_normalize_package_names` to use shared code.
  • Loading branch information
hogo6002 authored Jan 24, 2025
1 parent dca42d3 commit 26d49d7
Show file tree
Hide file tree
Showing 4 changed files with 29 additions and 5 deletions.
5 changes: 5 additions & 0 deletions gcp/api/server.py
Original file line number Diff line number Diff line change
Expand Up @@ -793,6 +793,11 @@ def do_query(query: osv_service_v1_pb2.Query,
context.service_context.abort(grpc.StatusCode.INVALID_ARGUMENT,
'Invalid ecosystem.')

# Normalize package names as necessary.
if package_name:
package_name = ecosystems.maybe_normalize_package_names(
package_name, ecosystem)

# Hack to work around ubuntu having extremely large individual entries
if ecosystem.startswith('Ubuntu'):
# Specifically the linux entries
Expand Down
9 changes: 4 additions & 5 deletions gcp/workers/worker/worker.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,6 @@
import json
import logging
import os
import re
import redis
import requests
import resource
Expand Down Expand Up @@ -281,10 +280,10 @@ def fix_invalid_ghsa(vulnerability):
def maybe_normalize_package_names(vulnerability):
"""Normalize package names as necessary."""
for affected in vulnerability.affected:
if affected.package.ecosystem == 'PyPI':
# per https://peps.python.org/pep-0503/#normalized-names
affected.package.name = re.sub(r'[-_.]+', '-',
affected.package.name).lower()
if not affected.package.ecosystem:
continue
affected.package.name = osv.ecosystems.maybe_normalize_package_names(
affected.package.name, affected.package.ecosystem)

return vulnerability

Expand Down
11 changes: 11 additions & 0 deletions osv/ecosystems/_ecosystems.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,8 @@
# limitations under the License.
"""Ecosystem helpers."""

import re

from osv.ecosystems.chainguard import Chainguard
from osv.ecosystems.wolfi import Wolfi
from .helper_base import Ecosystem, OrderingUnsupportedEcosystem
Expand Down Expand Up @@ -184,3 +186,12 @@ def is_supported_in_deps_dev(ecosystem_name: str) -> bool:

def map_ecosystem_to_deps_dev(ecosystem_name: str) -> str:
return _OSV_TO_DEPS_ECOSYSTEMS_MAP.get(ecosystem_name)


def maybe_normalize_package_names(package_name: str, ecosystem: str) -> str:
"""Normalize package names as necessary."""
if ecosystem == 'PyPI':
# per https://peps.python.org/pep-0503/#normalized-names
package_name = re.sub(r'[-_.]+', '-', package_name).lower()

return package_name
9 changes: 9 additions & 0 deletions osv/ecosystems/_ecosystems_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,3 +44,12 @@ def test_add_matching_ecosystems(self):
expected_output = ['Debian', 'Debian:11', 'Debian:12', 'Debian:13']
actual_output.sort()
self.assertEqual(list(actual_output), expected_output)

def test_maybe_normalize_package_names(self):
"""Test normalize package name"""
package_name = 'Flask'
ecosystem = 'PyPI'
expected = 'flask'

actual = ecosystems.maybe_normalize_package_names(package_name, ecosystem)
self.assertEqual(actual, expected)

0 comments on commit 26d49d7

Please sign in to comment.