Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 9 additions & 7 deletions oz_tree_build/images_and_vernaculars/get_wiki_images.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@
import re
import sys
import time
import urllib.request
from pathlib import Path

import requests
Expand Down Expand Up @@ -71,6 +70,9 @@
"img",
)

# See https://meta.wikimedia.org/wiki/User-Agent_policy
wiki_http_headers = {"User-Agent": "OneZoomBot/0.1 (https://www.onezoom.org/; " "mail@onezoom.org) get-wiki-images/0.1"}


# Copied from OZTree/OZprivate/ServerScripts/Utilities/getEOL_crops.py
def subdir_name(doID):
Expand All @@ -90,11 +92,6 @@ def make_http_request_with_retries(url):
retrying if we get a 429 rate limit error.
"""

# See https://meta.wikimedia.org/wiki/User-Agent_policy
wiki_http_headers = {
"User-Agent": "OneZoomBot/0.1 (https://www.onezoom.org/; " "mail@onezoom.org) get-wiki-images/0.1"
}

retries = 6
delay = 1
for i in range(retries):
Expand Down Expand Up @@ -374,7 +371,12 @@ def save_wiki_image(db, leaf_data, image_name, src, src_id, rating, output_dir,

# Download the uncropped image
uncropped_image_path = f"{image_dir}/{src_id}_uncropped.jpg"
urllib.request.urlretrieve(image_url, uncropped_image_path)
try:
response = requests.get(image_url, headers=wiki_http_headers)
with open(uncropped_image_path, "wb") as f:
f.write(response.content)
except Exception as e:
logger.warning(f"Error downloading {image_url}: {e}")

if cropper is None:
# Default to centering the crop
Expand Down
14 changes: 4 additions & 10 deletions tests/test_get_wiki_images.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,10 @@
import logging
import os
import shutil
import urllib.request
from types import SimpleNamespace
from unittest import mock

import pytest
import requests
from PIL import Image

from oz_tree_build._OZglobals import src_flags
Expand Down Expand Up @@ -57,7 +56,9 @@ def __init__(self, mock_qid):
self.temp_image_path = "/tmp/mocked_urlretrieve_image.jpg"
if not os.path.exists(self.temp_image_path):
image_url = "https://upload.wikimedia.org/wikipedia/commons/thumb/7/73/Lion_waiting_in_Namibia.jpg/500px-Lion_waiting_in_Namibia.jpg"
urllib.request.urlretrieve(image_url, self.temp_image_path)
response = requests.get(image_url, headers=get_wiki_images.wiki_http_headers)
with open(self.temp_image_path, "wb") as f:
f.write(response.content)
with open(self.temp_image_path, "rb") as f:
self.temp_image_content = f.read()

Expand Down Expand Up @@ -108,12 +109,6 @@ def mocked_requests_get(self, *args, **kwargs):
return MockResponse(200, self.mocked_requests[args[0]], content)
return MockResponse(404)

def mocked_urlretrieve(self, *args, **kwargs):
# Instead of actually downloading, just copy the test image to the destination
if not args[0].startswith("http"):
raise ValueError("Only HTTP URLs are supported in these tests")
shutil.copyfile(self.temp_image_path, args[1])

# Mock the Azure Vision API smart crop response
def mocked_analyze_from_url(self, *args, **kwargs):
return SimpleNamespace(
Expand Down Expand Up @@ -194,7 +189,6 @@ def wikidata_response(self, image_data, vernacular_data):

def mock_patch_all_web_request_methods(self, f):
@mock.patch("requests.get", side_effect=self.mocked_requests_get)
@mock.patch("urllib.request.urlretrieve", side_effect=self.mocked_urlretrieve)
@mock.patch(
"azure.ai.vision.imageanalysis.ImageAnalysisClient.analyze_from_url",
side_effect=self.mocked_analyze_from_url,
Expand Down
Loading