openml · Abhishek9639 · Feb 26, 2026 · Mar 1, 2026 · Mar 1, 2026 · geetu040
diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh
@@ -0,0 +1,56 @@
+#!/bin/bash
+# Profile test durations to diagnose slow tests (Issue #1633)
+#
+# Usage: ./scripts/profile_tests.sh [options]
+#
+# Options:
+#   -m MARKER    Pytest marker filter (default: "not production_server and not test_server")
+#   -d DURATION  Number of slowest durations to show, 0 for all (default: 20)
+#   -t TIMEOUT   Per-test timeout in seconds (default: 300)
+#   -n WORKERS   Number of parallel workers (default: 4)
+#   -o OUTPUT    Output file path for the report (default: test_durations_report.txt)
+#
+# Examples:
+#   ./scripts/profile_tests.sh
+#   ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600
+#   ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt
+
+set -euo pipefail
+
+# Default values
+MARKER_FILTER="not production_server and not test_server"
+DURATIONS=20
+TIMEOUT=300
+NUM_WORKERS=4
+OUTPUT_FILE="test_durations_report.txt"
+
+# Parse command line arguments
+while getopts "m:d:t:n:o:" opt; do
+  case $opt in
+    m) MARKER_FILTER="$OPTARG" ;;
+    d) DURATIONS="$OPTARG" ;;
+    t) TIMEOUT="$OPTARG" ;;
+    n) NUM_WORKERS="$OPTARG" ;;
+    o) OUTPUT_FILE="$OPTARG" ;;
+    *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;;
+  esac
+done
+
+echo "=== OpenML Test Duration Profiler ==="
+echo "Marker filter: $MARKER_FILTER"
+echo "Durations to show: $DURATIONS"
+echo "Timeout per test: ${TIMEOUT}s"
+echo "Workers: $NUM_WORKERS"
+echo "Output file: $OUTPUT_FILE"
+echo ""
+
+pytest \
+  --dist=load \
+  -n="$NUM_WORKERS" \
+  --durations="$DURATIONS" \
+  --timeout="$TIMEOUT" \
+  -m "$MARKER_FILTER" \
+  2>&1 | tee "$OUTPUT_FILE"
+
+echo ""
+echo "=== Report saved to $OUTPUT_FILE ==="
diff --git a/tests/conftest.py b/tests/conftest.py
@@ -105,7 +105,8 @@ def delete_remote_files(tracker, flow_names) -> None:
     if "flow" in tracker:
         to_sort = list(zip(tracker["flow"], flow_names))
         flow_deletion_order = [
-            entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True)
+            entity_id
+            for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True)
         ]
         tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order]
 
@@ -295,11 +296,12 @@ def with_test_cache(test_files_directory, request):
     openml.config.set_root_cache_directory(_root_cache_directory)
     if tmp_cache.exists():
         shutil.rmtree(tmp_cache)
-        
+
 
 @pytest.fixture
 def static_cache_dir():
-    return Path(__file__).parent / "files" 
+    return Path(__file__).parent / "files"
+
 
 @pytest.fixture
 def workdir(tmp_path):