eadali · eadali · May 27, 2025 · May 19, 2025 · May 19, 2025 · May 19, 2025
diff --git a/.gitattributes b/.gitattributes
@@ -0,0 +1,3 @@
+*.onnx filter=lfs diff=lfs merge=lfs -text
+*.jpg filter=lfs diff=lfs merge=lfs -text
+*.mp4 filter=lfs diff=lfs merge=lfs -text
diff --git a/.github/workflows/publish.yaml b/.github/workflows/publish.yaml
@@ -0,0 +1,35 @@
+name: Publish Package
+
+on:
+  push:
+    tags:
+      - 'v*'  # Triggers on version tags like v1.0.0
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.7'  # Specify the Python version
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install setuptools wheel twine
+
+      - name: Build the package
+        run: |
+          python setup.py sdist bdist_wheel
+
+      - name: Publish to PyPI
+        env:
+          TWINE_USERNAME: ${{ secrets.PYPI_USERNAME }}
+          TWINE_PASSWORD: ${{ secrets.PYPI_PASSWORD }}
+        run: |
+          twine upload dist/*
diff --git a/.github/workflows/test.yaml b/.github/workflows/test.yaml
@@ -0,0 +1,53 @@
+name: Test Package
+
+on:
+  push:
+    branches:
+      - '**'
+    tags-ignore:
+      - 'v*'
+  pull_request:
+
+jobs:
+  test:
+    runs-on: ubuntu-latest
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v2
+
+      - name: Set up Python
+        uses: actions/setup-python@v2
+        with:
+          python-version: '3.11'
+
+      - name: Install system dependencies
+        run: |
+          sudo apt-get update
+          sudo apt-get install -y libglib2.0-0 libsm6 libxrender1 libxext6
+
+      - name: Install Python dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r requirements.txt
+          pip install pytest
+
+      - name: Lint with flake8
+        run: |
+          pip install flake8
+          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics || true
+          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+
+      - name: Run tests
+        run: |
+          pytest --maxfail=3 --disable-warnings -q || echo "No tests found"
+
+      - name: Build the package
+        run: |
+          pip install build
+          python -m build
+
+      - name: Check the package
+        run: |
+          pip install twine
+          twine check dist/*
diff --git a/.gitignore b/.gitignore
@@ -1,6 +1,3 @@
-# Virtual environment
-env/
-
 # Byte-compiled / optimized / DLL files
 __pycache__/
 *.py[cod]
@@ -14,7 +11,6 @@ __pycache__/
 build/
 develop-eggs/
 dist/
-downloads/
 eggs/
 .eggs/
 lib/

diff --git a/README.md b/README.md
@@ -1,31 +1,56 @@
-# PeSAR: Perception for Search and Rescue  
-✈️ *AI-powered visual detection system for aerial search operations*    
-![Alt Text](data/output.gif)    
+# PiSAR: Pipeline for Aerial Search and Rescue  
+✈️ *AI-powered visual detection pipeline for aerial search operations*  
+
+PiSAR is an open-source pipeline designed to streamline aerial search and rescue missions using advanced AI-based visual detection. It enables rapid analysis of aerial imagery and video to assist responders in locating people or objects of interest.
+
+**Try PiSAR online:**  
+You can test PiSAR directly in your browser via our [PiSAR Space](https://huggingface.co/spaces/eadali/PiSAR).
+
+![Demo GIF](data/output.gif)    
 
 
 ## Installation
+
 ### Prerequisites
-- Python 3.8 or higher
-- CUDA (optional, for GPU support)
+- Python 3.8+
+- pip3 (Python package installer)
+- *(Optional)* CUDA-enabled GPU & CUDA Toolkit for GPU acceleration
 
+### Setup
 
-### Steps
-1. Clone the repository:
-```bash
-  git clone https://github.com/your-username/aerial-object-detection.git
-  cd aerial-object-detection
-```
+1. **Clone the repository**
+    ```bash
+    git clone https://github.com/eadali/PiSAR.git
+    cd PiSAR
+    ```
 
-2. Install the required dependencies:
-```bash
-  pip3 install -r requirements.txt
-```
+2. **(Recommended) Create a virtual environment**
+    ```bash
+    python3 -m venv pisar
+    source pisar/bin/activate
+    ```
 
-3. (Optional) If you want to use GPU acceleration, ensure you have the correct version of PyTorch installed with CUDA support. You can install it using:
-```bash
-  pip3 install torch torchvision --index-url https://download.pytorch.org/whl/cu118
-```
+3. **Install dependencies**
+    - **CPU only:**
+        ```bash
+        pip3 install -r requirements.txt
+        ```
+    - **GPU (CUDA) support:**
+        ```bash
+        pip3 install -r requirements-cuda.txt
+        ```
+
+4. **(Optional) Install PyTorch with a specific CUDA version**  
+   See [PyTorch's official instructions](https://pytorch.org/get-started/locally/).
+
+5. **Verify installation**
+    ```bash
+    python3 -c "import torch; print(torch.cuda.is_available())"
+    ```
 
+*See `requirements.txt` and `requirements-cuda.txt` for details.*
+
+---
 
 ## Usage
 ### Running the Script
@@ -52,5 +77,8 @@ The script supports the following command-line arguments:
 | --device	              | Device to run the model on (cpu or cuda).       | cpu       |
 
 
+---
+
 ## License
-This project is licensed under the MIT License. See the LICENSE file for details.
+
+This project is licensed under the MIT License. See the [LICENSE](LICENSE) file for details.
diff --git a/config/yolo8n-bytetrack-cpu.yaml b/config/yolo8n-bytetrack-cpu.yaml
@@ -0,0 +1,14 @@
+# YOLOv8n + ByteTrack Configuration
+pipeline: 
+  detector:
+    model: yolov8n
+    categories: ['LightVehicle', 'Person', 'Building', 'UPole', 'Boat', 'Bike', 'Container', 'Truck', 'Gastank', 'Digger', 'Solarpanels', 'Bus']
+    thresholds:
+      confidence: 0.6
+      iou: 0.4
+    slicing:
+      overlap: 0.2
+    device: cpu
+
+  tracker:
+    algorithm: bytetrack
diff --git a/config/yolo8n-bytetrack-cuda.yaml b/config/yolo8n-bytetrack-cuda.yaml
@@ -0,0 +1,14 @@
+# YOLOv8n + ByteTrack Configuration
+pipeline: 
+  detector:
+    model: yolov8n
+    categories: ['LightVehicle', 'Person', 'Building', 'UPole', 'Boat', 'Bike', 'Container', 'Truck', 'Gastank', 'Digger', 'Solarpanels', 'Bus']
+    thresholds:
+      confidence: 0.6
+      iou: 0.4
+    slicing:
+      overlap: 0.2
+    device: cuda:0
+
+  tracker:
+    algorithm: bytetrack
diff --git a/data/WALDO30_yolov8n_640x640.pt b/data/WALDO30_yolov8n_640x640.pt
diff --git a/data/image_dense_example.png b/data/image_dense_example.png
diff --git a/data/output.gif b/data/output.gif
diff --git a/data/video_dense_example.mp4 b/data/video_dense_example.mp4
diff --git a/demo.py b/demo.py
@@ -1,81 +1,86 @@
 import argparse
 import cv2
-import tqdm
-from models import build_model
-from engine import run_on_frame
-from visualization import draw_estimations
-
-
-# Constants
-WINDOW_NAME = 'Aerial Detections'
-
-
-def get_args_parser():
-    parser = argparse.ArgumentParser('Set aerial object detector', add_help=False)
-    # Input arguments
-    parser.add_argument('--image-input', help='Path to image file')
-    parser.add_argument('--video-input', help='Path to video file')
-    # Detector arguments
-    parser.add_argument('--detector', default='waldo30', type=str, help='Detector model')
-    parser.add_argument('--confidence-threshold', default=0.8, type=float, help='Confidence threshold for detections')
-    parser.add_argument('--overlap-height-ratio', default=0.2, type=float, help='Overlap height ratio')
-    parser.add_argument('--overlap-width-ratio', default=0.2, type=float, help='Overlap width ratio')
-    # Tracker arguments
-    parser.add_argument('--tracker', type=str, help='Tracker type')
-    # Device arguments
-    parser.add_argument('--device', default='cpu', type=str, help='Device to run the model on')
-    return parser
-
-
-def frame_from_video(video):
-    while video.isOpened():
-        success, frame = video.read()
-        if success:
-            yield frame
-        else:
+import numpy as np
+from tqdm import tqdm
+from pipeline import build_pipeline
+from util import cfg, load_config, load_onnx_model
+import supervision as sv
+
+WINDOW_NAME = "Aerial Detections"
+
+
+def get_args():
+    parser = argparse.ArgumentParser(description="Aerial object detection and tracking")
+    parser.add_argument("config", type=str, help="Path to config file")
+    parser.add_argument("--onnx-path", type=str, required=True, help="Path to ONNX model file")
+    input_group = parser.add_mutually_exclusive_group(required=True)
+    input_group.add_argument("--image", type=str, help="Path to image file")
+    input_group.add_argument("--video", type=str, help="Path to video file")
+    input_group.add_argument("--camid", type=int, help="Camera ID for video capture")
+    return parser.parse_args()
+
+
+def frame_generator(source):
+    cap = cv2.VideoCapture(source)
+    while cap.isOpened():
+        ret, frame = cap.read()
+        if not ret:
             break
-
-
-def process_image(model, image_path):
-    image = cv2.imread(image_path)
-    class_id_to_name = model.get_class_mapping()
-    estimations = run_on_frame(model, image)
-    vis_image = draw_estimations(image, estimations, class_id_to_name)
-    cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
-    cv2.imshow(WINDOW_NAME, vis_image)
-    cv2.waitKey(0)
-    cv2.destroyAllWindows()
-
-
-def process_video(model, video_path):
-    video = cv2.VideoCapture(video_path)
-    num_frames = int(video.get(cv2.CAP_PROP_FRAME_COUNT))
-    frame_gen = frame_from_video(video)
-    class_id_to_name = model.get_class_mapping()
-
-    for frame in tqdm.tqdm(frame_gen, total=num_frames):
-        estimations = run_on_frame(model, frame)
-        vis_frame = draw_estimations(frame, estimations, class_id_to_name)
-        cv2.namedWindow(WINDOW_NAME, cv2.WINDOW_NORMAL)
-        cv2.imshow(WINDOW_NAME, vis_frame)
-        if cv2.waitKey(1) == 27:  # ESC key to quit
-            break
-
-    video.release()
-    cv2.destroyAllWindows()
-
-
-def main(args):
-    model = build_model(args)
-    if args.image_input:
-        process_image(model, args.image_input)
-    elif args.video_input:
-        process_video(model, args.video_input)
+        yield frame
+    cap.release()
+
+
+def annotate_frame(frame, detections, class_map):
+    box_annotator = sv.BoxAnnotator(thickness=2)
+    label_annotator = sv.LabelAnnotator(text_scale=0.5, text_thickness=1, text_padding=1)
+    labels = []
+    for class_id, tracker_id in zip(detections.class_id, detections.tracker_id):
+        class_name = class_map.get(class_id, "Unknown")
+        if np.isnan(tracker_id):
+            labels.append(class_name)
+        else:
+            labels.append(f"#{int(tracker_id)} {class_name}")
+    frame = box_annotator.annotate(scene=frame, detections=detections)
+    frame = label_annotator.annotate(scene=frame, detections=detections, labels=labels)
+    return frame
+
+
+def show_frame(window, frame):
+    cv2.namedWindow(window, cv2.WINDOW_NORMAL)
+    cv2.imshow(window, frame)
+    key = cv2.waitKey(1)
+    # Quit on 'q' or ESC
+    if key in (ord('q'), 27):
+        return False
+    return True
+
+
+def main():
+    args = get_args()
+    load_config(cfg, args.config)
+    pipeline = build_pipeline(cfg.pipeline)
+    load_onnx_model(pipeline.detector, args.onnx_path)  
+    category_mapping = pipeline.detector.get_category_mapping()
+
+    if args.image:
+        image = cv2.imread(args.image)
+        if image is None:
+            print(f"Error: Unable to load image {args.image}")
+            return
+        detections = pipeline(image)
+        vis = annotate_frame(image, detections, category_mapping)
+        cv2.imshow(WINDOW_NAME, vis)
+        cv2.waitKey(0)
+        cv2.destroyAllWindows()
     else:
-        print("Error: No input provided. Use --image-input or --video-input.")
+        source = args.camid if args.camid is not None else args.video
+        for frame in tqdm(frame_generator(source), desc="Processing"):
+            detections = pipeline(frame)
+            vis = annotate_frame(frame, detections, category_mapping)
+            if not show_frame(WINDOW_NAME, vis):
+                break
+        cv2.destroyAllWindows()
 
 
-if __name__ == '__main__':
-    parser = argparse.ArgumentParser('Aerial object detection and tracking inference script', parents=[get_args_parser()])
-    args = parser.parse_args()
-    main(args)
+if __name__ == "__main__":
+    main()