diff --git a/price_parser/parser.py b/price_parser/parser.py index 364ab2c..cc360f9 100644 --- a/price_parser/parser.py +++ b/price_parser/parser.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- import re import string +from collections import namedtuple from typing import Callable, Optional, Pattern, List, Tuple from decimal import Decimal, InvalidOperation @@ -37,18 +38,21 @@ def fromstring(cls, price: Optional[str], ``price`` string, it could be **preferred** over a value extracted from ``currency_hint`` string. """ - amount_text = extract_price_text(price) if price is not None else None - amount_num = ( - parse_number(amount_text, decimal_separator) - if amount_text is not None else None - ) currency = extract_currency_symbol(price, currency_hint) if currency is not None: currency = currency.strip() + + price_amount = _extract_price_amount(price, currency) + + amount_num = ( + parse_number(price_amount.text, decimal_separator, price_amount.negative) + if price_amount.text is not None else None + ) + return Price( amount=amount_num, currency=currency, - amount_text=amount_text, + amount_text=price_amount.text, ) @@ -189,6 +193,34 @@ def extract_price_text(price: str) -> Optional[str]: >>> extract_price_text("50") '50' """ + price_amount = _extract_price_amount(price, currency) + return price_amount.text + + +def _extract_price_amount(price: str, currency: Optional[str] = "") -> Optional[str]: + """ + Extract from a string the text of a price and a flag indicating + if this is a string of a negative price. + """ + + PriceAmount = namedtuple('PriceAmount', ['text', 'negative']) + + if price is None: + return PriceAmount(text=None, negative=False) + + negative_regexes = [ + r"-\s*?\d[\d.,\d]*", + r"\d[\d.,\d]*\d-", + ] + if currency is not None: + negative_regexes.append(r"-{}\d[\d.,\d]*".format(re.escape(currency))) + negative_amount_search = re.search( + r"({})(?:[^%\d]|$)".format("|".join(negative_regexes)), + price, + re.VERBOSE + ) + negative_amount = bool(negative_amount_search) + if price.count('€') == 1: m = re.search(r""" [\d\s.,]*?\d # number, probably with thousand separators @@ -197,7 +229,8 @@ def extract_price_text(price: str) -> Optional[str]: (?:$|[^\d]) # something which is not a digit """, price, re.VERBOSE) if m: - return m.group(0).replace(' ', '') + return PriceAmount(text=m.group(0).replace(' ', ''), negative=negative_amount) + m = re.search(r""" (\d[\d\s.,]*) # number, probably with thousand separators \s*? # skip whitespace @@ -205,10 +238,12 @@ def extract_price_text(price: str) -> Optional[str]: """, price, re.VERBOSE) if m: - return m.group(1).strip(',.').strip() + return PriceAmount(text=m.group(1).strip(',.').strip(), negative=negative_amount) + if 'free' in price.lower(): - return '0' - return None + return PriceAmount(text="0", negative=negative_amount) + + return PriceAmount(text=None, negative=negative_amount) # NOTE: Keep supported separators in sync with parse_number() @@ -244,7 +279,8 @@ def get_decimal_separator(price: str) -> Optional[str]: def parse_number(num: str, - decimal_separator: Optional[str] = None) -> Optional[Decimal]: + decimal_separator: Optional[str] = None, + is_negative: Optional[bool] = False) -> Optional[Decimal]: """ Parse a string with a number to a Decimal, guessing its format: decimal separator, thousand separator. Return None if parsing fails. @@ -294,6 +330,7 @@ def parse_number(num: str, assert decimal_separator == '€' num = num.replace('.', '').replace(',', '').replace('€', '.') try: - return Decimal(num) + multiplier = -1 if is_negative else 1 + return multiplier * Decimal(num) except InvalidOperation: return None diff --git a/tests/test_price_parsing.py b/tests/test_price_parsing.py index 28ee0a3..f751389 100644 --- a/tests/test_price_parsing.py +++ b/tests/test_price_parsing.py @@ -1980,6 +1980,28 @@ def __eq__(self, other): ] +PRICE_PARSING_NEGATIVE_PRICE = [ + Example(None, 'R$ -2,00', + 'R$', '2,00', -2.0), + Example(None, '- 1.400,00', + None, '1.400,00', -1400), + Example(None, '-3,95', + None, '3,95', -3.95), + Example(None, '-1.649,69', + None, '1.649,69', -1649.69), + Example(None, '-£127.54', + "£", '127.54', -127.54), + Example(None, '-R$127.54', + "R$", '127.54', -127.54), + Example(None, '-127,54 €', + "€", '127,54', -127.54), + Example(None, 'kr-127,54', + "kr", '127,54', -127.54), + Example(None, '€ 127,54-', + "€", '127,54', -127.54), +] + + @pytest.mark.parametrize( ["example"], [[e] for e in PRICE_PARSING_EXAMPLES_BUGS_CAUGHT] + @@ -1990,6 +2012,7 @@ def __eq__(self, other): [[e] for e in PRICE_PARSING_EXAMPLES_NO_PRICE] + [[e] for e in PRICE_PARSING_EXAMPLES_NO_CURRENCY] + [[e] for e in PRICE_PARSING_DECIMAL_SEPARATOR_EXAMPLES] + + [[e] for e in PRICE_PARSING_NEGATIVE_PRICE] + [pytest.param(e, marks=pytest.mark.xfail()) for e in PRICE_PARSING_EXAMPLES_XFAIL] )