Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support list format fallbacks #1099

Merged
merged 3 commits into from
Jul 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 40 additions & 11 deletions babel/lists.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,11 @@
DEFAULT_LOCALE = default_locale()


def format_list(lst: Sequence[str],
style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
locale: Locale | str | None = DEFAULT_LOCALE) -> str:
def format_list(
lst: Sequence[str],
style: Literal['standard', 'standard-short', 'or', 'or-short', 'unit', 'unit-short', 'unit-narrow'] = 'standard',
locale: Locale | str | None = DEFAULT_LOCALE,
) -> str:
"""
Format the items in `lst` as a list.
Expand All @@ -39,7 +41,11 @@ def format_list(lst: Sequence[str],
>>> format_list(['omena', 'peruna', 'aplari'], style='or', locale='fi')
u'omena, peruna tai aplari'
These styles are defined, but not all are necessarily available in all locales.
Not all styles are necessarily available in all locales.
The function will attempt to fall back to replacement styles according to the rules
set forth in the CLDR root XML file, and raise a ValueError if no suitable replacement
can be found.
The following text is verbatim from the Unicode TR35-49 spec [1].
* standard:
Expand Down Expand Up @@ -76,14 +82,9 @@ def format_list(lst: Sequence[str],
if len(lst) == 1:
return lst[0]

if style not in locale.list_patterns:
raise ValueError(
f'Locale {locale} does not support list formatting style {style!r} '
f'(supported are {sorted(locale.list_patterns)})',
)
patterns = locale.list_patterns[style]
patterns = _resolve_list_style(locale, style)

if len(lst) == 2:
if len(lst) == 2 and '2' in patterns:
return patterns['2'].format(*lst)

result = patterns['start'].format(lst[0], lst[1])
Expand All @@ -92,3 +93,31 @@ def format_list(lst: Sequence[str],
result = patterns['end'].format(result, lst[-1])

return result


# Based on CLDR 45's root.xml file's `<alias>`es.
# The root file defines both `standard` and `or`,
# so they're always available.
# TODO: It would likely be better to use the
# babel.localedata.Alias mechanism for this,
# but I'm not quite sure how it's supposed to
# work with inheritance and data in the root.
_style_fallbacks = {
"or-narrow": ["or-short", "or"],
"or-short": ["or"],
"standard-narrow": ["standard-short", "standard"],
"standard-short": ["standard"],
"unit": ["unit-short", "standard"],
"unit-narrow": ["unit-short", "unit", "standard"],
"unit-short": ["standard"],
}


def _resolve_list_style(locale: Locale, style: str):
for style in (style, *(_style_fallbacks.get(style, []))): # noqa: B020
if style in locale.list_patterns:
return locale.list_patterns[style]
raise ValueError(
f"Locale {locale} does not support list formatting style {style!r} "
f"(supported are {sorted(locale.list_patterns)})",
)
9 changes: 5 additions & 4 deletions scripts/import_cldr.py
Original file line number Diff line number Diff line change
Expand Up @@ -530,10 +530,11 @@ def parse_locale_display_names(data, tree):

def parse_list_patterns(data, tree):
list_patterns = data.setdefault('list_patterns', {})
for listType in tree.findall('.//listPatterns/listPattern'):
by_type = list_patterns.setdefault(listType.attrib.get('type', 'standard'), {})
for listPattern in listType.findall('listPatternPart'):
by_type[listPattern.attrib['type']] = _text(listPattern)
for list_pattern_el in tree.findall('.//listPatterns/listPattern'):
pattern_type = list_pattern_el.attrib.get('type', 'standard')
for pattern_part_el in list_pattern_el.findall('listPatternPart'):
pattern_part_type = pattern_part_el.attrib['type']
list_patterns.setdefault(pattern_type, {})[pattern_part_type] = _text(pattern_part_el)


def parse_dates(data, tree, sup, regions, territory):
Expand Down
34 changes: 23 additions & 11 deletions tests/test_lists.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,32 @@
import pytest

from babel import lists
from babel import lists, units


def test_format_list():
for list, locale, expected in [
([], 'en', ''),
(['string'], 'en', 'string'),
(['string1', 'string2'], 'en', 'string1 and string2'),
(['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'),
(['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'),
(['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'),
]:
assert lists.format_list(list, locale=locale) == expected
@pytest.mark.parametrize(('list', 'locale', 'expected'), [
([], 'en', ''),
(['string'], 'en', 'string'),
(['string1', 'string2'], 'en', 'string1 and string2'),
(['string1', 'string2', 'string3'], 'en', 'string1, string2, and string3'),
(['string1', 'string2', 'string3'], 'zh', 'string1、string2和string3'),
(['string1', 'string2', 'string3', 'string4'], 'ne', 'string1,string2, string3 र string4'),
])
def test_format_list(list, locale, expected):
assert lists.format_list(list, locale=locale) == expected


def test_format_list_error():
with pytest.raises(ValueError):
lists.format_list(['a', 'b', 'c'], style='orange', locale='en')


def test_issue_1098():
one_foot = units.format_unit(1, "length-foot", length="short", locale="zh_CN")
five_inches = units.format_unit(5, "length-inch", length="short", locale="zh_CN")
# zh-CN does not specify the "unit" style, so we fall back to "unit-short" style.
assert (
lists.format_list([one_foot, five_inches], style="unit", locale="zh_CN") ==
lists.format_list([one_foot, five_inches], style="unit-short", locale="zh_CN") ==
# Translation verified using Google Translate. It would add more spacing, but the glyphs are correct.
"1英尺5英寸"
)
Loading