From 07243d11b8dc6e7ab469699ec9c4a456b70e2b4b Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Thu, 26 Feb 2026 23:02:55 +0530
Subject: [PATCH 1/3] [MNT] Diagnose and address long test runtimes (#1633)

- Add global per-test timeout (600s) to pytest config
- CI: report all test durations (--durations=0) for diagnosis
- CI: add explicit --timeout=600 to prevent hanging tests
- Optimize verify_cache_state fixture: scope function -> module
- Add scripts/profile_tests.sh for local duration profiling
---
 .github/workflows/test.yml |  6 +++---
 pyproject.toml             |  1 +
 scripts/profile_tests.sh   | 27 +++++++++++++++++++++++++++
 tests/conftest.py          | 10 ++++++----
 4 files changed, 37 insertions(+), 7 deletions(-)
 create mode 100755 scripts/profile_tests.sh

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 7fa3450ca..fd5e12aad 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -119,7 +119,7 @@ jobs:
           marks="not production_server and not test_server"
         fi
 
-        pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks"
 
     - name: Run tests on Ubuntu Production
       if: matrix.os == 'ubuntu-latest'
@@ -136,14 +136,14 @@ jobs:
           marks="production_server and not test_server"
         fi
 
-        pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks"
 
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
-        pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
+        pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
 
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
diff --git a/pyproject.toml b/pyproject.toml
index 47013271d..573de1584 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -129,6 +129,7 @@ version = {attr = "openml.__version__.__version__"}
 testpaths = ["tests"]
 minversion = "7.0"
 xfail_strict = true
+timeout = 600
 filterwarnings=[
     "ignore:the matrix subclass:PendingDeprecationWarning"
 ]
diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh
new file mode 100755
index 000000000..593700cff
--- /dev/null
+++ b/scripts/profile_tests.sh
@@ -0,0 +1,27 @@
+#!/bin/bash
+# Profile test durations to diagnose slow tests (Issue #1633)
+# Usage: ./scripts/profile_tests.sh [marker_filter]
+#
+# Examples:
+#   ./scripts/profile_tests.sh                               # non-server tests
+#   ./scripts/profile_tests.sh "production_server"            # production server tests only
+#   ./scripts/profile_tests.sh "sklearn"                      # sklearn tests only
+
+set -euo pipefail
+
+MARKER_FILTER="${1:-not production_server and not test_server}"
+
+echo "=== OpenML Test Duration Profiler ==="
+echo "Marker filter: $MARKER_FILTER"
+echo "Timeout per test: 300s"
+echo ""
+
+pytest \
+  --durations=0 \
+  --timeout=300 \
+  -q \
+  -m "$MARKER_FILTER" \
+  2>&1 | tee test_durations_report.txt
+
+echo ""
+echo "=== Report saved to test_durations_report.txt ==="
diff --git a/tests/conftest.py b/tests/conftest.py
index 2a7a6dcc7..423b26f70 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -105,7 +105,8 @@ def delete_remote_files(tracker, flow_names) -> None:
     if "flow" in tracker:
         to_sort = list(zip(tracker["flow"], flow_names))
         flow_deletion_order = [
-            entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True)
+            entity_id
+            for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True)
         ]
         tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order]
 
@@ -254,7 +255,7 @@ def test_api_key() -> str:
     return TestBase.user_key
 
 
-@pytest.fixture(autouse=True, scope="function")
+@pytest.fixture(autouse=True, scope="module")
 def verify_cache_state(test_files_directory) -> Iterator[None]:
     assert_static_test_cache_correct(test_files_directory)
     yield
@@ -295,11 +296,12 @@ def with_test_cache(test_files_directory, request):
     openml.config.set_root_cache_directory(_root_cache_directory)
     if tmp_cache.exists():
         shutil.rmtree(tmp_cache)
-        
+
 
 @pytest.fixture
 def static_cache_dir():
-    return Path(__file__).parent / "files" 
+    return Path(__file__).parent / "files"
+
 
 @pytest.fixture
 def workdir(tmp_path):

From ede2e0de38730263daf33f5dcaf067a14f43aae2 Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Sun, 1 Mar 2026 21:36:51 +0530
Subject: [PATCH 2/3] Address review feedback: revert CI/conftest changes,
 improve profile script

- Revert CI workflow to original --durations=20 (no timeout)
- Remove global timeout from pyproject.toml
- Revert conftest.py verify_cache_state scope to function
- Update profile_tests.sh: accept CLI args (-m, -d, -t, -o) with defaults
---
 .github/workflows/test.yml |  6 +++---
 pyproject.toml             |  1 -
 scripts/profile_tests.sh   | 44 +++++++++++++++++++++++++++++---------
 tests/conftest.py          |  2 +-
 4 files changed, 38 insertions(+), 15 deletions(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fd5e12aad..7fa3450ca 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -119,7 +119,7 @@ jobs:
           marks="not production_server and not test_server"
         fi
 
-        pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
 
     - name: Run tests on Ubuntu Production
       if: matrix.os == 'ubuntu-latest'
@@ -136,14 +136,14 @@ jobs:
           marks="production_server and not test_server"
         fi
 
-        pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks"
+        pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks"
 
     - name: Run tests on Windows
       if: matrix.os == 'windows-latest'
       env:
         OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }}
       run: |  # we need a separate step because of the bash-specific if-statement in the previous one.
-        pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
+        pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server"
 
     - name: Check for files left behind by test
       if: matrix.os != 'windows-latest' && always()
diff --git a/pyproject.toml b/pyproject.toml
index 573de1584..47013271d 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -129,7 +129,6 @@ version = {attr = "openml.__version__.__version__"}
 testpaths = ["tests"]
 minversion = "7.0"
 xfail_strict = true
-timeout = 600
 filterwarnings=[
     "ignore:the matrix subclass:PendingDeprecationWarning"
 ]
diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh
index 593700cff..88e6f0ad7 100755
--- a/scripts/profile_tests.sh
+++ b/scripts/profile_tests.sh
@@ -1,27 +1,51 @@
 #!/bin/bash
 # Profile test durations to diagnose slow tests (Issue #1633)
-# Usage: ./scripts/profile_tests.sh [marker_filter]
+#
+# Usage: ./scripts/profile_tests.sh [options]
+#
+# Options:
+#   -m MARKER    Pytest marker filter (default: "not production_server and not test_server")
+#   -d DURATION  Number of slowest durations to show, 0 for all (default: 20)
+#   -t TIMEOUT   Per-test timeout in seconds (default: 300)
+#   -o OUTPUT    Output file path for the report (default: test_durations_report.txt)
 #
 # Examples:
-#   ./scripts/profile_tests.sh                               # non-server tests
-#   ./scripts/profile_tests.sh "production_server"            # production server tests only
-#   ./scripts/profile_tests.sh "sklearn"                      # sklearn tests only
+#   ./scripts/profile_tests.sh
+#   ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600
+#   ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt
 
 set -euo pipefail
 
-MARKER_FILTER="${1:-not production_server and not test_server}"
+# Default values
+MARKER_FILTER="not production_server and not test_server"
+DURATIONS=20
+TIMEOUT=300
+OUTPUT_FILE="test_durations_report.txt"
+
+# Parse command line arguments
+while getopts "m:d:t:o:" opt; do
+  case $opt in
+    m) MARKER_FILTER="$OPTARG" ;;
+    d) DURATIONS="$OPTARG" ;;
+    t) TIMEOUT="$OPTARG" ;;
+    o) OUTPUT_FILE="$OPTARG" ;;
+    *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;;
+  esac
+done
 
 echo "=== OpenML Test Duration Profiler ==="
 echo "Marker filter: $MARKER_FILTER"
-echo "Timeout per test: 300s"
+echo "Durations to show: $DURATIONS"
+echo "Timeout per test: ${TIMEOUT}s"
+echo "Output file: $OUTPUT_FILE"
 echo ""
 
 pytest \
-  --durations=0 \
-  --timeout=300 \
+  --durations="$DURATIONS" \
+  --timeout="$TIMEOUT" \
   -q \
   -m "$MARKER_FILTER" \
-  2>&1 | tee test_durations_report.txt
+  2>&1 | tee "$OUTPUT_FILE"
 
 echo ""
-echo "=== Report saved to test_durations_report.txt ==="
+echo "=== Report saved to $OUTPUT_FILE ==="
diff --git a/tests/conftest.py b/tests/conftest.py
index 423b26f70..399a97c3e 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -255,7 +255,7 @@ def test_api_key() -> str:
     return TestBase.user_key
 
 
-@pytest.fixture(autouse=True, scope="module")
+@pytest.fixture(autouse=True, scope="function")
 def verify_cache_state(test_files_directory) -> Iterator[None]:
     assert_static_test_cache_correct(test_files_directory)
     yield

From 8a00373528af5b9b7aa9c078f016436a92feb0eb Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Sun, 1 Mar 2026 22:11:47 +0530
Subject: [PATCH 3/3] Update profile_tests.sh: add -n workers, --dist=load,
 remove -q

- Add -n flag for parallel workers (default: 4)
- Add --dist=load to distribute tests across workers
- Remove -q flag for full pytest output
- Mimics exact pytest command used in CI
---
 scripts/profile_tests.sh | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh
index 88e6f0ad7..05a8cd2fe 100755
--- a/scripts/profile_tests.sh
+++ b/scripts/profile_tests.sh
@@ -7,12 +7,13 @@
 #   -m MARKER    Pytest marker filter (default: "not production_server and not test_server")
 #   -d DURATION  Number of slowest durations to show, 0 for all (default: 20)
 #   -t TIMEOUT   Per-test timeout in seconds (default: 300)
+#   -n WORKERS   Number of parallel workers (default: 4)
 #   -o OUTPUT    Output file path for the report (default: test_durations_report.txt)
 #
 # Examples:
 #   ./scripts/profile_tests.sh
 #   ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600
-#   ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt
+#   ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt
 
 set -euo pipefail
 
@@ -20,16 +21,18 @@ set -euo pipefail
 MARKER_FILTER="not production_server and not test_server"
 DURATIONS=20
 TIMEOUT=300
+NUM_WORKERS=4
 OUTPUT_FILE="test_durations_report.txt"
 
 # Parse command line arguments
-while getopts "m:d:t:o:" opt; do
+while getopts "m:d:t:n:o:" opt; do
   case $opt in
     m) MARKER_FILTER="$OPTARG" ;;
     d) DURATIONS="$OPTARG" ;;
     t) TIMEOUT="$OPTARG" ;;
+    n) NUM_WORKERS="$OPTARG" ;;
     o) OUTPUT_FILE="$OPTARG" ;;
-    *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;;
+    *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;;
   esac
 done
 
@@ -37,13 +40,15 @@ echo "=== OpenML Test Duration Profiler ==="
 echo "Marker filter: $MARKER_FILTER"
 echo "Durations to show: $DURATIONS"
 echo "Timeout per test: ${TIMEOUT}s"
+echo "Workers: $NUM_WORKERS"
 echo "Output file: $OUTPUT_FILE"
 echo ""
 
 pytest \
+  --dist=load \
+  -n="$NUM_WORKERS" \
   --durations="$DURATIONS" \
   --timeout="$TIMEOUT" \
-  -q \
   -m "$MARKER_FILTER" \
   2>&1 | tee "$OUTPUT_FILE"