From 07243d11b8dc6e7ab469699ec9c4a456b70e2b4b Mon Sep 17 00:00:00 2001 From: Abhishek Date: Thu, 26 Feb 2026 23:02:55 +0530 Subject: [PATCH 1/3] [MNT] Diagnose and address long test runtimes (#1633) - Add global per-test timeout (600s) to pytest config - CI: report all test durations (--durations=0) for diagnosis - CI: add explicit --timeout=600 to prevent hanging tests - Optimize verify_cache_state fixture: scope function -> module - Add scripts/profile_tests.sh for local duration profiling --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 + scripts/profile_tests.sh | 27 +++++++++++++++++++++++++++ tests/conftest.py | 10 ++++++---- 4 files changed, 37 insertions(+), 7 deletions(-) create mode 100755 scripts/profile_tests.sh diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 7fa3450ca..fd5e12aad 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -119,7 +119,7 @@ jobs: marks="not production_server and not test_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -136,14 +136,14 @@ jobs: marks="production_server and not test_server" fi - pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() diff --git a/pyproject.toml b/pyproject.toml index 47013271d..573de1584 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,6 +129,7 @@ version = {attr = "openml.__version__.__version__"} testpaths = ["tests"] minversion = "7.0" xfail_strict = true +timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh new file mode 100755 index 000000000..593700cff --- /dev/null +++ b/scripts/profile_tests.sh @@ -0,0 +1,27 @@ +#!/bin/bash +# Profile test durations to diagnose slow tests (Issue #1633) +# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Examples: +# ./scripts/profile_tests.sh # non-server tests +# ./scripts/profile_tests.sh "production_server" # production server tests only +# ./scripts/profile_tests.sh "sklearn" # sklearn tests only + +set -euo pipefail + +MARKER_FILTER="${1:-not production_server and not test_server}" + +echo "=== OpenML Test Duration Profiler ===" +echo "Marker filter: $MARKER_FILTER" +echo "Timeout per test: 300s" +echo "" + +pytest \ + --durations=0 \ + --timeout=300 \ + -q \ + -m "$MARKER_FILTER" \ + 2>&1 | tee test_durations_report.txt + +echo "" +echo "=== Report saved to test_durations_report.txt ===" diff --git a/tests/conftest.py b/tests/conftest.py index 2a7a6dcc7..423b26f70 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -105,7 +105,8 @@ def delete_remote_files(tracker, flow_names) -> None: if "flow" in tracker: to_sort = list(zip(tracker["flow"], flow_names)) flow_deletion_order = [ - entity_id for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) + entity_id + for entity_id, _ in sorted(to_sort, key=lambda x: len(x[1]), reverse=True) ] tracker["flow"] = [flow_deletion_order[1] for flow_id, _ in flow_deletion_order] @@ -254,7 +255,7 @@ def test_api_key() -> str: return TestBase.user_key -@pytest.fixture(autouse=True, scope="function") +@pytest.fixture(autouse=True, scope="module") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield @@ -295,11 +296,12 @@ def with_test_cache(test_files_directory, request): openml.config.set_root_cache_directory(_root_cache_directory) if tmp_cache.exists(): shutil.rmtree(tmp_cache) - + @pytest.fixture def static_cache_dir(): - return Path(__file__).parent / "files" + return Path(__file__).parent / "files" + @pytest.fixture def workdir(tmp_path): From ede2e0de38730263daf33f5dcaf067a14f43aae2 Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 21:36:51 +0530 Subject: [PATCH 2/3] Address review feedback: revert CI/conftest changes, improve profile script - Revert CI workflow to original --durations=20 (no timeout) - Remove global timeout from pyproject.toml - Revert conftest.py verify_cache_state scope to function - Update profile_tests.sh: accept CLI args (-m, -d, -t, -o) with defaults --- .github/workflows/test.yml | 6 +++--- pyproject.toml | 1 - scripts/profile_tests.sh | 44 +++++++++++++++++++++++++++++--------- tests/conftest.py | 2 +- 4 files changed, 38 insertions(+), 15 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index fd5e12aad..7fa3450ca 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -119,7 +119,7 @@ jobs: marks="not production_server and not test_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Ubuntu Production if: matrix.os == 'ubuntu-latest' @@ -136,14 +136,14 @@ jobs: marks="production_server and not test_server" fi - pytest -n 4 --durations=0 --timeout=600 --dist load -sv $codecov -o log_cli=true -m "$marks" + pytest -n 4 --durations=20 --dist load -sv $codecov -o log_cli=true -m "$marks" - name: Run tests on Windows if: matrix.os == 'windows-latest' env: OPENML_TEST_SERVER_ADMIN_KEY: ${{ secrets.OPENML_TEST_SERVER_ADMIN_KEY }} run: | # we need a separate step because of the bash-specific if-statement in the previous one. - pytest -n 4 --durations=0 --timeout=600 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" + pytest -n 4 --durations=20 --dist load -sv --reruns 5 --reruns-delay 1 -m "not test_server" - name: Check for files left behind by test if: matrix.os != 'windows-latest' && always() diff --git a/pyproject.toml b/pyproject.toml index 573de1584..47013271d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -129,7 +129,6 @@ version = {attr = "openml.__version__.__version__"} testpaths = ["tests"] minversion = "7.0" xfail_strict = true -timeout = 600 filterwarnings=[ "ignore:the matrix subclass:PendingDeprecationWarning" ] diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 593700cff..88e6f0ad7 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -1,27 +1,51 @@ #!/bin/bash # Profile test durations to diagnose slow tests (Issue #1633) -# Usage: ./scripts/profile_tests.sh [marker_filter] +# +# Usage: ./scripts/profile_tests.sh [options] +# +# Options: +# -m MARKER Pytest marker filter (default: "not production_server and not test_server") +# -d DURATION Number of slowest durations to show, 0 for all (default: 20) +# -t TIMEOUT Per-test timeout in seconds (default: 300) +# -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: -# ./scripts/profile_tests.sh # non-server tests -# ./scripts/profile_tests.sh "production_server" # production server tests only -# ./scripts/profile_tests.sh "sklearn" # sklearn tests only +# ./scripts/profile_tests.sh +# ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 +# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt set -euo pipefail -MARKER_FILTER="${1:-not production_server and not test_server}" +# Default values +MARKER_FILTER="not production_server and not test_server" +DURATIONS=20 +TIMEOUT=300 +OUTPUT_FILE="test_durations_report.txt" + +# Parse command line arguments +while getopts "m:d:t:o:" opt; do + case $opt in + m) MARKER_FILTER="$OPTARG" ;; + d) DURATIONS="$OPTARG" ;; + t) TIMEOUT="$OPTARG" ;; + o) OUTPUT_FILE="$OPTARG" ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + esac +done echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" -echo "Timeout per test: 300s" +echo "Durations to show: $DURATIONS" +echo "Timeout per test: ${TIMEOUT}s" +echo "Output file: $OUTPUT_FILE" echo "" pytest \ - --durations=0 \ - --timeout=300 \ + --durations="$DURATIONS" \ + --timeout="$TIMEOUT" \ -q \ -m "$MARKER_FILTER" \ - 2>&1 | tee test_durations_report.txt + 2>&1 | tee "$OUTPUT_FILE" echo "" -echo "=== Report saved to test_durations_report.txt ===" +echo "=== Report saved to $OUTPUT_FILE ===" diff --git a/tests/conftest.py b/tests/conftest.py index 423b26f70..399a97c3e 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -255,7 +255,7 @@ def test_api_key() -> str: return TestBase.user_key -@pytest.fixture(autouse=True, scope="module") +@pytest.fixture(autouse=True, scope="function") def verify_cache_state(test_files_directory) -> Iterator[None]: assert_static_test_cache_correct(test_files_directory) yield From 8a00373528af5b9b7aa9c078f016436a92feb0eb Mon Sep 17 00:00:00 2001 From: Abhishek Date: Sun, 1 Mar 2026 22:11:47 +0530 Subject: [PATCH 3/3] Update profile_tests.sh: add -n workers, --dist=load, remove -q - Add -n flag for parallel workers (default: 4) - Add --dist=load to distribute tests across workers - Remove -q flag for full pytest output - Mimics exact pytest command used in CI --- scripts/profile_tests.sh | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/scripts/profile_tests.sh b/scripts/profile_tests.sh index 88e6f0ad7..05a8cd2fe 100755 --- a/scripts/profile_tests.sh +++ b/scripts/profile_tests.sh @@ -7,12 +7,13 @@ # -m MARKER Pytest marker filter (default: "not production_server and not test_server") # -d DURATION Number of slowest durations to show, 0 for all (default: 20) # -t TIMEOUT Per-test timeout in seconds (default: 300) +# -n WORKERS Number of parallel workers (default: 4) # -o OUTPUT Output file path for the report (default: test_durations_report.txt) # # Examples: # ./scripts/profile_tests.sh # ./scripts/profile_tests.sh -m "production_server" -d 0 -t 600 -# ./scripts/profile_tests.sh -m "sklearn" -o sklearn_report.txt +# ./scripts/profile_tests.sh -m "sklearn" -n 2 -o sklearn_report.txt set -euo pipefail @@ -20,16 +21,18 @@ set -euo pipefail MARKER_FILTER="not production_server and not test_server" DURATIONS=20 TIMEOUT=300 +NUM_WORKERS=4 OUTPUT_FILE="test_durations_report.txt" # Parse command line arguments -while getopts "m:d:t:o:" opt; do +while getopts "m:d:t:n:o:" opt; do case $opt in m) MARKER_FILTER="$OPTARG" ;; d) DURATIONS="$OPTARG" ;; t) TIMEOUT="$OPTARG" ;; + n) NUM_WORKERS="$OPTARG" ;; o) OUTPUT_FILE="$OPTARG" ;; - *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-o output_file]" && exit 1 ;; + *) echo "Usage: $0 [-m marker] [-d durations] [-t timeout] [-n workers] [-o output_file]" && exit 1 ;; esac done @@ -37,13 +40,15 @@ echo "=== OpenML Test Duration Profiler ===" echo "Marker filter: $MARKER_FILTER" echo "Durations to show: $DURATIONS" echo "Timeout per test: ${TIMEOUT}s" +echo "Workers: $NUM_WORKERS" echo "Output file: $OUTPUT_FILE" echo "" pytest \ + --dist=load \ + -n="$NUM_WORKERS" \ --durations="$DURATIONS" \ --timeout="$TIMEOUT" \ - -q \ -m "$MARKER_FILTER" \ 2>&1 | tee "$OUTPUT_FILE"