diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh new file mode 100755 index 000000000..05a8cd2fe --- /dev/null +++ b/scripts/profile_tests.sh @@ -0,0 +1,56 @@ +#!/bin/bash +# Profile test durations to diagnose slow tests (Issue #1633) +# +# Usage: ./scripts/profile_tests.sh [options] +# +# Options: +# -m MARKER Pytest marker filter (default: "not production_server and not test_server") +# -d DURATION Number of slowest durations to show, 0 for all (default: 20) +# -t TIMEOUT Per-test timeout in seconds (default: 300) +# -n WORKERS Number of parallel workers (default: 4) +# -o OUTPUT Output file path for the report (default: test_durations_report.txt) +# +# Examples: +# ./scripts/profile_tests.sh +# ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 +# ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt + +set -euo pipefail + +# Default values +MARKER_FILTER="not production_server and not test_server" +DURATIONS=20 +TIMEOUT=300 +NUM_WORKERS=4 +OUTPUT_FILE="test_durations_report.txt" + +# Parse command line arguments +while getopts "m:d:t:n:o:" opt; do + case $opt in + m) MARKER_FILTER="$OPTARG" ;; + d) DURATIONS="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + n) NUM_WORKERS="$OPTARG" ;; + o) OUTPUT_FILE="$OPTARG" ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;; + esac +done + +echo "=== OpenML Test Duration Profiler ===" +echo "Marker filter: $MARKER_FILTER" +echo "Durations to show: $DURATIONS" +echo "Timeout per test: ${TIMEOUT}s" +echo "Workers: $NUM_WORKERS" +echo "Output file: $OUTPUT_FILE" +echo "" + +pytest \ + --dist=load \ + -n="$NUM_WORKERS" \ + --durations="$DURATIONS" \ + --timeout="$TIMEOUT" \ + -m "$MARKER_FILTER" \ + 2>&1 | tee "$OUTPUT_FILE" + +echo "" +echo "=== Report saved to $OUTPUT_FILE ===" diff --git a/tests/conftest.py b/tests/conftest.py index 2a7a6dcc7..399a97c3e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -105,7 +105,8 @@ def delete_remote_files(tracker, flow_names) -> None: if "flow" in tracker: to_sort = list(zip(tracker["flow"], flow_names)) flow_deletion_order = [ - entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) + entity_id + for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) ] tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order] @@ -295,11 +296,12 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" + return Path(__file__).parent / "files" + @pytest.fixture def workdir(tmp_path):