From 849652a381697b7df90aeb440bef1bf9325ec844 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 26 Jan 2025 01:09:36 +0000 Subject: [PATCH 1/4] Add PIL fallback for image resizing when PyVips is not available --- moondream/torch/image_crops.py | 44 +++++++++++----- test_image_crops.py | 93 ++++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 14 deletions(-) create mode 100644 test_image_crops.py diff --git a/moondream/torch/image_crops.py b/moondream/torch/image_crops.py index a095d52e..6c45f023 100644 --- a/moondream/torch/image_crops.py +++ b/moondream/torch/image_crops.py @@ -1,10 +1,16 @@ import math import numpy as np import torch -import pyvips - from typing import TypedDict +from PIL import Image + +try: + import pyvips + HAVE_PYVIPS = True +except ImportError: + HAVE_PYVIPS = False + def select_tiling( height: int, width: int, crop_size: int, max_crops: int @@ -113,18 +119,28 @@ def overlap_crop_image( tiling[1] * crop_window_size + total_margin_pixels, ) - # Convert to vips for resizing - vips_image = pyvips.Image.new_from_array(image) - scale_x = target_size[1] / image.shape[1] - scale_y = target_size[0] / image.shape[0] - resized = vips_image.resize(scale_x, vscale=scale_y) - image = resized.numpy() - - # Create global crop - scale_x = base_size[1] / vips_image.width - scale_y = base_size[0] / vips_image.height - global_vips = vips_image.resize(scale_x, vscale=scale_y) - crops[0] = global_vips.numpy() + if HAVE_PYVIPS: + # Convert to vips for resizing + vips_image = pyvips.Image.new_from_array(image) + scale_x = target_size[1] / image.shape[1] + scale_y = target_size[0] / image.shape[0] + resized = vips_image.resize(scale_x, vscale=scale_y) + image = resized.numpy() + + # Create global crop + scale_x = base_size[1] / vips_image.width + scale_y = base_size[0] / vips_image.height + global_vips = vips_image.resize(scale_x, vscale=scale_y) + crops[0] = global_vips.numpy() + else: + # Use PIL for resizing + pil_image = Image.fromarray(image) + resized = pil_image.resize((target_size[1], target_size[0]), Image.Resampling.LANCZOS) + image = np.array(resized) + + # Create global crop + global_pil = pil_image.resize((base_size[1], base_size[0]), Image.Resampling.LANCZOS) + crops[0] = np.array(global_pil) for i in range(tiling[0]): for j in range(tiling[1]): diff --git a/test_image_crops.py b/test_image_crops.py new file mode 100644 index 00000000..f62812e0 --- /dev/null +++ b/test_image_crops.py @@ -0,0 +1,93 @@ +import numpy as np +from moondream.torch import image_crops +from moondream.torch.image_crops import overlap_crop_image + +def create_test_pattern(height, width): + """Create a test image with a recognizable pattern.""" + # Create gradient patterns + y = np.linspace(0, 1, height)[:, np.newaxis] + x = np.linspace(0, 1, width)[np.newaxis, :] + + # Broadcast to create 2D patterns + y_grid = np.broadcast_to(y, (height, width)) + x_grid = np.broadcast_to(x, (height, width)) + + # Create RGB channels with different patterns + r = (y_grid * 255).astype(np.uint8) # Vertical gradient + g = (x_grid * 255).astype(np.uint8) # Horizontal gradient + b = ((x_grid + y_grid) * 127).astype(np.uint8) # Diagonal gradient + + return np.dstack([r, g, b]) + +def compare_results(pyvips_result, pil_result): + """Compare results from PyVips and PIL implementations.""" + pyvips_crops = pyvips_result["crops"] + pil_crops = pil_result["crops"] + + # Compare number of crops and tiling + assert pyvips_result["tiling"] == pil_result["tiling"], \ + f"Tiling mismatch: PyVips {pyvips_result['tiling']} vs PIL {pil_result['tiling']}" + assert len(pyvips_crops) == len(pil_crops), \ + f"Number of crops mismatch: PyVips {len(pyvips_crops)} vs PIL {len(pil_crops)}" + + # Compare each crop + max_diff = 0 + mean_diff = 0 + + for i, (vips_crop, pil_crop) in enumerate(zip(pyvips_crops, pil_crops)): + # Convert to float for comparison + vips_float = vips_crop.astype(float) + pil_float = pil_crop.astype(float) + + # Calculate differences + diff = np.abs(vips_float - pil_float) + crop_max_diff = np.max(diff) + crop_mean_diff = np.mean(diff) + + max_diff = max(max_diff, crop_max_diff) + mean_diff += crop_mean_diff / len(pyvips_crops) + + print(f"Crop {i}:") + print(f" Max pixel difference: {crop_max_diff:.2f}") + print(f" Mean pixel difference: {crop_mean_diff:.2f}") + + print(f"\nOverall:") + print(f" Maximum pixel difference: {max_diff:.2f}") + print(f" Average pixel difference: {mean_diff:.2f}") + + # Fail if differences are too large + assert max_diff < 5.0, f"Max pixel difference ({max_diff:.2f}) is too large" + assert mean_diff < 2.0, f"Mean pixel difference ({mean_diff:.2f}) is too large" + +def test_image_crops(): + # Create a test image with a known pattern + test_image = create_test_pattern(300, 400) + + # Get results using PyVips + image_crops.HAVE_PYVIPS = True + pyvips_result = overlap_crop_image( + image=test_image, + overlap_margin=4, + max_crops=12, + base_size=(378, 378), + patch_size=14 + ) + print("\nPyVips test completed") + + # Get results using PIL + image_crops.HAVE_PYVIPS = False + pil_result = overlap_crop_image( + image=test_image, + overlap_margin=4, + max_crops=12, + base_size=(378, 378), + patch_size=14 + ) + print("\nPIL test completed") + + # Compare results + print("\nComparing results:") + compare_results(pyvips_result, pil_result) + +if __name__ == "__main__": + test_image_crops() \ No newline at end of file From bd7af47897aa9139b609698654ff81edc7d53d74 Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 26 Jan 2025 01:13:56 +0000 Subject: [PATCH 2/4] Add PIL fallback for image resizing when PyVips is not available --- moondream/torch/image_crops.py | 13 +++-- test_image_crops.py | 93 ---------------------------------- 2 files changed, 9 insertions(+), 97 deletions(-) delete mode 100644 test_image_crops.py diff --git a/moondream/torch/image_crops.py b/moondream/torch/image_crops.py index 6c45f023..2798dfe7 100644 --- a/moondream/torch/image_crops.py +++ b/moondream/torch/image_crops.py @@ -3,10 +3,9 @@ import torch from typing import TypedDict -from PIL import Image - try: import pyvips + HAVE_PYVIPS = True except ImportError: HAVE_PYVIPS = False @@ -134,12 +133,18 @@ def overlap_crop_image( crops[0] = global_vips.numpy() else: # Use PIL for resizing + from PIL import Image + pil_image = Image.fromarray(image) - resized = pil_image.resize((target_size[1], target_size[0]), Image.Resampling.LANCZOS) + resized = pil_image.resize( + (target_size[1], target_size[0]), Image.Resampling.LANCZOS + ) image = np.array(resized) # Create global crop - global_pil = pil_image.resize((base_size[1], base_size[0]), Image.Resampling.LANCZOS) + global_pil = pil_image.resize( + (base_size[1], base_size[0]), Image.Resampling.LANCZOS + ) crops[0] = np.array(global_pil) for i in range(tiling[0]): diff --git a/test_image_crops.py b/test_image_crops.py deleted file mode 100644 index f62812e0..00000000 --- a/test_image_crops.py +++ /dev/null @@ -1,93 +0,0 @@ -import numpy as np -from moondream.torch import image_crops -from moondream.torch.image_crops import overlap_crop_image - -def create_test_pattern(height, width): - """Create a test image with a recognizable pattern.""" - # Create gradient patterns - y = np.linspace(0, 1, height)[:, np.newaxis] - x = np.linspace(0, 1, width)[np.newaxis, :] - - # Broadcast to create 2D patterns - y_grid = np.broadcast_to(y, (height, width)) - x_grid = np.broadcast_to(x, (height, width)) - - # Create RGB channels with different patterns - r = (y_grid * 255).astype(np.uint8) # Vertical gradient - g = (x_grid * 255).astype(np.uint8) # Horizontal gradient - b = ((x_grid + y_grid) * 127).astype(np.uint8) # Diagonal gradient - - return np.dstack([r, g, b]) - -def compare_results(pyvips_result, pil_result): - """Compare results from PyVips and PIL implementations.""" - pyvips_crops = pyvips_result["crops"] - pil_crops = pil_result["crops"] - - # Compare number of crops and tiling - assert pyvips_result["tiling"] == pil_result["tiling"], \ - f"Tiling mismatch: PyVips {pyvips_result['tiling']} vs PIL {pil_result['tiling']}" - assert len(pyvips_crops) == len(pil_crops), \ - f"Number of crops mismatch: PyVips {len(pyvips_crops)} vs PIL {len(pil_crops)}" - - # Compare each crop - max_diff = 0 - mean_diff = 0 - - for i, (vips_crop, pil_crop) in enumerate(zip(pyvips_crops, pil_crops)): - # Convert to float for comparison - vips_float = vips_crop.astype(float) - pil_float = pil_crop.astype(float) - - # Calculate differences - diff = np.abs(vips_float - pil_float) - crop_max_diff = np.max(diff) - crop_mean_diff = np.mean(diff) - - max_diff = max(max_diff, crop_max_diff) - mean_diff += crop_mean_diff / len(pyvips_crops) - - print(f"Crop {i}:") - print(f" Max pixel difference: {crop_max_diff:.2f}") - print(f" Mean pixel difference: {crop_mean_diff:.2f}") - - print(f"\nOverall:") - print(f" Maximum pixel difference: {max_diff:.2f}") - print(f" Average pixel difference: {mean_diff:.2f}") - - # Fail if differences are too large - assert max_diff < 5.0, f"Max pixel difference ({max_diff:.2f}) is too large" - assert mean_diff < 2.0, f"Mean pixel difference ({mean_diff:.2f}) is too large" - -def test_image_crops(): - # Create a test image with a known pattern - test_image = create_test_pattern(300, 400) - - # Get results using PyVips - image_crops.HAVE_PYVIPS = True - pyvips_result = overlap_crop_image( - image=test_image, - overlap_margin=4, - max_crops=12, - base_size=(378, 378), - patch_size=14 - ) - print("\nPyVips test completed") - - # Get results using PIL - image_crops.HAVE_PYVIPS = False - pil_result = overlap_crop_image( - image=test_image, - overlap_margin=4, - max_crops=12, - base_size=(378, 378), - patch_size=14 - ) - print("\nPIL test completed") - - # Compare results - print("\nComparing results:") - compare_results(pyvips_result, pil_result) - -if __name__ == "__main__": - test_image_crops() \ No newline at end of file From 71c46c3bd2574dedfb941d366351ed38415f24dc Mon Sep 17 00:00:00 2001 From: openhands Date: Sun, 26 Jan 2025 01:17:50 +0000 Subject: [PATCH 3/4] Move PIL import to top level --- moondream/torch/image_crops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/moondream/torch/image_crops.py b/moondream/torch/image_crops.py index 2798dfe7..69be0711 100644 --- a/moondream/torch/image_crops.py +++ b/moondream/torch/image_crops.py @@ -2,6 +2,7 @@ import numpy as np import torch from typing import TypedDict +from PIL import Image try: import pyvips @@ -133,8 +134,6 @@ def overlap_crop_image( crops[0] = global_vips.numpy() else: # Use PIL for resizing - from PIL import Image - pil_image = Image.fromarray(image) resized = pil_image.resize( (target_size[1], target_size[0]), Image.Resampling.LANCZOS From 08119fea8a9d15c47fd20c8469efa7a305e8c76b Mon Sep 17 00:00:00 2001 From: openhands Date: Tue, 28 Jan 2025 07:31:52 +0000 Subject: [PATCH 4/4] Move PIL import inside except block --- moondream/torch/image_crops.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/moondream/torch/image_crops.py b/moondream/torch/image_crops.py index 69be0711..a3ca42e8 100644 --- a/moondream/torch/image_crops.py +++ b/moondream/torch/image_crops.py @@ -2,13 +2,12 @@ import numpy as np import torch from typing import TypedDict -from PIL import Image try: import pyvips - HAVE_PYVIPS = True except ImportError: + from PIL import Image HAVE_PYVIPS = False