Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions scraper/domains.py
Original file line number Diff line number Diff line change
Expand Up @@ -254,9 +254,8 @@ def _get_product_name(self) -> str:
return self.request_data.find("span", id="productTitle").text.strip()

def _get_product_price(self) -> float:
return float(
self.request_data.find("span", class_="a-price").span.text.replace("$", "").replace(",", "").replace(" ", "")
)
raw_price = self.request_data.find("span", class_="a-price").span.text.replace(",", "").replace(" ", "")
return float(get_number_string(raw_price))

def _get_product_currency(self) -> str:
regex_pattern = "%22currencyCode%22%3A%22(.{3})%22"
Expand Down Expand Up @@ -548,6 +547,13 @@ def get_website_handler(url: str) -> BaseWebsiteHandler:
return website_handler(url)


def get_number_string(value: str) -> str:
"""Return string with only digits, commas (,) and periods (.)"""
text_pattern = re.compile(r"[^\d.,]+")
result = text_pattern.sub("", value)
return result


SUPPORTED_DOMAINS: dict[str, BaseWebsiteHandler] = {
"komplett": KomplettHandler,
"proshop": ProshopHandler,
Expand Down
23 changes: 22 additions & 1 deletion tests/test_domains.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
from dataclasses import dataclass
import pytest

from scraper.domains import get_website_name
from scraper.domains import get_website_name, get_number_string


@dataclass
Expand Down Expand Up @@ -42,3 +42,24 @@ def test_get_website_name(url: str, setting: UrlSetting, expected: str) -> None:
keep_subdomain=setting.keep_subdomain,
)
assert result == expected


test_price_values = [
("USD 12.40", "12.40"),
("$234.00", "234.00"),
("£345.37", "345.37"),
("486,89 kr", "486,89"),
("$345.37", "345.37"),
("£1345.37", "1345.37"),
("1345,37 DKK", "1345,37"),
("1345.37 DKK", "1345.37"),
("USD 1345.37", "1345.37"),
("USD 10345.37", "10345.37"),
]


@pytest.mark.parametrize("value,expected", test_price_values)
def test_get_number_string(value: str, expected: str) -> None:
result = get_number_string(value)

assert result == expected
8 changes: 4 additions & 4 deletions tests/test_objects.json
Original file line number Diff line number Diff line change
Expand Up @@ -37,10 +37,10 @@
"expected_currency": "DKK"
},
"amazon": {
"link": "https://www.amazon.com/Sony-WH-1000XM5-Canceling-Headphones-Hands-Free/dp/B09XS7JWHH",
"expected_title": "Sony WH-1000XM5 The Best Wireless Noise Canceling Headphones with Auto Noise Canceling Optimizer, Crystal Clear Hands-Free Calling, and Alexa Voice Control, Black",
"expected_id": "B09XS7JWHH",
"expected_currency": "USD"
"link": "https://www.amazon.de/-/en/Google-Pixel-Pro-Smartphone-Obsidian/dp/B0DG9DD9VN",
"expected_title": "Google Pixel 9 Pro (512GB, Obsi, EU / UK) + Pixel 9/9 Pro Case, Obsidian",
"expected_id": "B0DG9DD9VN",
"expected_currency": "EUR"
},
"ebay_with_itm": {
"link": "https://www.ebay.com/itm/265771092654",
Expand Down
Loading