diff --git a/scraper/domains.py b/scraper/domains.py index f2bdc4f2..eb24334d 100644 --- a/scraper/domains.py +++ b/scraper/domains.py @@ -254,9 +254,8 @@ def _get_product_name(self) -> str: return self.request_data.find("span", id="productTitle").text.strip() def _get_product_price(self) -> float: - return float( - self.request_data.find("span", class_="a-price").span.text.replace("$", "").replace(",", "").replace(" ", "") - ) + raw_price = self.request_data.find("span", class_="a-price").span.text.replace(",", "").replace(" ", "") + return float(get_number_string(raw_price)) def _get_product_currency(self) -> str: regex_pattern = "%22currencyCode%22%3A%22(.{3})%22" @@ -548,6 +547,13 @@ def get_website_handler(url: str) -> BaseWebsiteHandler: return website_handler(url) +def get_number_string(value: str) -> str: + """Return string with only digits, commas (,) and periods (.)""" + text_pattern = re.compile(r"[^\d.,]+") + result = text_pattern.sub("", value) + return result + + SUPPORTED_DOMAINS: dict[str, BaseWebsiteHandler] = { "komplett": KomplettHandler, "proshop": ProshopHandler, diff --git a/tests/test_domains.py b/tests/test_domains.py index eae87961..f9a9c2de 100644 --- a/tests/test_domains.py +++ b/tests/test_domains.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import pytest -from scraper.domains import get_website_name +from scraper.domains import get_website_name, get_number_string @dataclass @@ -42,3 +42,24 @@ def test_get_website_name(url: str, setting: UrlSetting, expected: str) -> None: keep_subdomain=setting.keep_subdomain, ) assert result == expected + + +test_price_values = [ + ("USD 12.40", "12.40"), + ("$234.00", "234.00"), + ("£345.37", "345.37"), + ("486,89 kr", "486,89"), + ("$345.37", "345.37"), + ("£1345.37", "1345.37"), + ("1345,37 DKK", "1345,37"), + ("1345.37 DKK", "1345.37"), + ("USD 1345.37", "1345.37"), + ("USD 10345.37", "10345.37"), +] + + +@pytest.mark.parametrize("value,expected", test_price_values) +def test_get_number_string(value: str, expected: str) -> None: + result = get_number_string(value) + + assert result == expected diff --git a/tests/test_objects.json b/tests/test_objects.json index 0c65e877..948e6435 100644 --- a/tests/test_objects.json +++ b/tests/test_objects.json @@ -37,10 +37,10 @@ "expected_currency": "DKK" }, "amazon": { - "link": "https://www.amazon.com/Sony-WH-1000XM5-Canceling-Headphones-Hands-Free/dp/B09XS7JWHH", - "expected_title": "Sony WH-1000XM5 The Best Wireless Noise Canceling Headphones with Auto Noise Canceling Optimizer, Crystal Clear Hands-Free Calling, and Alexa Voice Control, Black", - "expected_id": "B09XS7JWHH", - "expected_currency": "USD" + "link": "https://www.amazon.de/-/en/Google-Pixel-Pro-Smartphone-Obsidian/dp/B0DG9DD9VN", + "expected_title": "Google Pixel 9 Pro (512GB, Obsi, EU / UK) + Pixel 9/9 Pro Case, Obsidian", + "expected_id": "B0DG9DD9VN", + "expected_currency": "EUR" }, "ebay_with_itm": { "link": "https://www.ebay.com/itm/265771092654",