From e5ba9844a1651dcc0f989e8c2853097ea966e983 Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Mon, 23 Feb 2026 23:02:39 +0530
Subject: [PATCH 1/4] [DOC] Add usage examples to core function docstrings
 (#1538)

---
 openml/datasets/functions.py | 12 ++++++
 openml/runs/functions.py     | 78 ++++++++++++++++++++++++++----------
 openml/study/functions.py    | 25 +++++++++++-
 openml/tasks/functions.py    | 12 +++++-
 4 files changed, 103 insertions(+), 24 deletions(-)

diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py
index 3ac657ea0..c2c59683b 100644
--- a/openml/datasets/functions.py
+++ b/openml/datasets/functions.py
@@ -364,6 +364,11 @@ def get_datasets(
     -------
     datasets : list of datasets
         A list of dataset objects.
+
+    Examples
+    --------
+    >>> import openml
+    >>> datasets = openml.datasets.get_datasets([1, 2, 3])  # doctest: +SKIP
     """
     datasets = []
     for dataset_id in dataset_ids:
@@ -446,6 +451,13 @@ def get_dataset(  # noqa: C901, PLR0912
     -------
     dataset : :class:`openml.OpenMLDataset`
         The downloaded dataset.
+
+    Examples
+    --------
+    >>> import openml
+    >>> dataset = openml.datasets.get_dataset(1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset("iris", version=1)  # doctest: +SKIP
+    >>> dataset = openml.datasets.get_dataset(1, download_data=True)  # doctest: +SKIP
     """
     if download_all_files:
         warnings.warn(
diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 503788dbd..ffb468c69 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -104,6 +104,15 @@ def run_model_on_task(  # noqa: PLR0913
         Result of the run.
     flow : OpenMLFlow (optional, only if `return_flow` is True).
         Flow generated from the model.
+
+    Examples
+    --------
+    >>> import openml
+    >>> import openml_sklearn  # doctest: +SKIP
+    >>> from sklearn.tree import DecisionTreeClassifier  # doctest: +SKIP
+    >>> clf = DecisionTreeClassifier()  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
+    >>> run = openml.runs.run_model_on_task(clf, task)  # doctest: +SKIP
     """
     if avoid_duplicate_runs is None:
         avoid_duplicate_runs = openml.config.avoid_duplicate_runs
@@ -273,9 +282,7 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
                 setup_id = setup_exists(flow_from_server)
                 ids = run_exists(task.task_id, setup_id)
                 if ids:
-                    error_message = (
-                        "One or more runs of this setup were already performed on the task."
-                    )
+                    error_message = "One or more runs of this setup were already performed on the task."
                     raise OpenMLRunsExistError(ids, error_message)
         else:
             # Flow does not exist on server and we do not want to upload it.
@@ -505,11 +512,15 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     # this information is multiple times overwritten, but due to the ordering
     # of tne loops, eventually it contains the information based on the full
     # dataset size
-    user_defined_measures_per_fold = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_fold = (
+        OrderedDict()
+    )  # type: 'OrderedDict[str, OrderedDict]'
     # stores sample-based evaluation measures (sublevel of fold-based)
     # will also be filled on a non sample-based task, but the information
     # is the same as the fold-based measures, and disregarded in that case
-    user_defined_measures_per_sample = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_sample = (
+        OrderedDict()
+    )  # type: 'OrderedDict[str, OrderedDict]'
 
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
@@ -557,9 +568,14 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     )  # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`
 
     for n_fit, rep_no, fold_no, sample_no in jobs:
-        pred_y, proba_y, test_indices, test_y, inner_trace, user_defined_measures_fold = job_rvals[
-            n_fit - 1
-        ]
+        (
+            pred_y,
+            proba_y,
+            test_indices,
+            test_y,
+            inner_trace,
+            user_defined_measures_fold,
+        ) = job_rvals[n_fit - 1]
 
         if inner_trace is not None:
             traces.append(inner_trace)
@@ -598,7 +614,11 @@ def _calculate_local_measure(  # type: ignore
                             if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
                         )
-                    pred_prob = proba_y.iloc[i] if isinstance(proba_y, pd.DataFrame) else proba_y[i]
+                    pred_prob = (
+                        proba_y.iloc[i]
+                        if isinstance(proba_y, pd.DataFrame)
+                        else proba_y[i]
+                    )
 
                     arff_line = format_prediction(
                         task=task,
@@ -661,11 +681,13 @@ def _calculate_local_measure(  # type: ignore
             if rep_no not in user_defined_measures_per_sample[measure]:
                 user_defined_measures_per_sample[measure][rep_no] = OrderedDict()
             if fold_no not in user_defined_measures_per_sample[measure][rep_no]:
-                user_defined_measures_per_sample[measure][rep_no][fold_no] = OrderedDict()
+                user_defined_measures_per_sample[measure][rep_no][
+                    fold_no
+                ] = OrderedDict()
 
-            user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
-                measure
-            ]
+            user_defined_measures_per_fold[measure][rep_no][fold_no] = (
+                user_defined_measures_fold[measure]
+            )
             user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
                 user_defined_measures_fold[measure]
             )
@@ -821,7 +843,9 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     run : OpenMLRun
         Run corresponding to ID, fetched from the server.
     """
-    run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id))
+    run_dir = Path(
+        openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
+    )
     run_file = run_dir / "description.xml"
 
     run_dir.mkdir(parents=True, exist_ok=True)
@@ -840,7 +864,9 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
+def _create_run_from_xml(
+    xml: str, from_server: bool = True
+) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
     """Create a run object from xml returned from server.
 
     Parameters
@@ -870,11 +896,13 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
         if not from_server:
             return None
 
-        raise AttributeError("Run XML does not contain required (server) field: ", fieldname)
+        raise AttributeError(
+            "Run XML does not contain required (server) field: ", fieldname
+        )
 
-    run = xmltodict.parse(xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"])[
-        "oml:run"
-    ]
+    run = xmltodict.parse(
+        xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"]
+    )["oml:run"]
     run_id = obtain_field(run, "oml:run_id", from_server, cast=int)
     uploader = obtain_field(run, "oml:uploader", from_server, cast=int)
     uploader_name = obtain_field(run, "oml:uploader_name", from_server)
@@ -1029,7 +1057,9 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
 
 def _get_cached_run(run_id: int) -> OpenMLRun:
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
+    run_cache_dir = openml.utils._create_cache_directory_for_id(
+        RUNS_CACHE_DIR_NAME, run_id
+    )
     run_file = run_cache_dir / "description.xml"
     try:
         with run_file.open(encoding="utf8") as fh:
@@ -1199,7 +1229,9 @@ def __list_runs(api_call: str) -> pd.DataFrame:
     runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
     # Minimalistic check if the XML is useful
     if "oml:runs" not in runs_dict:
-        raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}')
+        raise ValueError(
+            f'Error in return XML, does not contain "oml:runs": {runs_dict}'
+        )
 
     if "@xmlns:oml" not in runs_dict["oml:runs"]:
         raise ValueError(
@@ -1213,7 +1245,9 @@ def __list_runs(api_call: str) -> pd.DataFrame:
             f'"http://openml.org/openml": {runs_dict}',
         )
 
-    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
+    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(
+        runs_dict["oml:runs"]
+    )
 
     runs = {
         int(r["oml:run_id"]): {
diff --git a/openml/study/functions.py b/openml/study/functions.py
index bb24ddcff..24f1d8f7f 100644
--- a/openml/study/functions.py
+++ b/openml/study/functions.py
@@ -30,6 +30,12 @@ def get_suite(suite_id: int | str) -> OpenMLBenchmarkSuite:
     -------
     OpenMLSuite
         The OpenML suite object
+
+    Examples
+    --------
+    >>> import openml
+    >>> suite = openml.study.get_suite(99)  # doctest: +SKIP
+    >>> suite = openml.study.get_suite("OpenML-CC18")  # doctest: +SKIP
     """
     study = _get_study(suite_id, entity_type="task")
     assert isinstance(study, OpenMLBenchmarkSuite)
@@ -59,6 +65,11 @@ def get_study(
     -------
     OpenMLStudy
         The OpenML study object
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.get_study(1)  # doctest: +SKIP
     """
     if study_id == "OpenML100":
         message = (
@@ -109,7 +120,10 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
     tags = []
     if "oml:tag" in result_dict:
         for tag in result_dict["oml:tag"]:
-            current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
+            current_tag = {
+                "name": tag["oml:name"],
+                "write_access": tag["oml:write_access"],
+            }
             if "oml:window_start" in tag:
                 current_tag["window_start"] = tag["oml:window_start"]
             tags.append(current_tag)
@@ -210,6 +224,15 @@ def create_study(
     -------
     OpenMLStudy
         A local OpenML study object (call publish method to upload to server)
+
+    Examples
+    --------
+    >>> import openml
+    >>> study = openml.study.create_study(  # doctest: +SKIP
+    ...     name="My Study",
+    ...     description="A study on classification tasks",
+    ...     run_ids=[1, 2, 3],
+    ... )
     """
     return OpenMLStudy(
         study_id=None,
diff --git a/openml/tasks/functions.py b/openml/tasks/functions.py
index 3fbc7adee..cb1e9295b 100644
--- a/openml/tasks/functions.py
+++ b/openml/tasks/functions.py
@@ -380,7 +380,11 @@ def get_tasks(
     tasks = []
     for task_id in task_ids:
         tasks.append(
-            get_task(task_id, download_data=download_data, download_qualities=download_qualities)
+            get_task(
+                task_id,
+                download_data=download_data,
+                download_qualities=download_qualities,
+            )
         )
     return tasks
 
@@ -411,6 +415,12 @@ def get_task(
     Returns
     -------
     task: OpenMLTask
+
+    Examples
+    --------
+    >>> import openml
+    >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(1, download_splits=True)  # doctest: +SKIP
     """
     if not isinstance(task_id, int):
         raise TypeError(f"Task id should be integer, is {type(task_id)}")

From 2e455d191b05db84ec37a0336fee850a74b328d0 Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Mon, 23 Feb 2026 23:14:14 +0530
Subject: [PATCH 2/4] [DOC] Add usage examples to core function docstrings
 (#1538)

---
 .DS_Store                                     | Bin 0 -> 8196 bytes
 openml/runs/functions.py                      |  61 +++++++-----------
 .../misc/features_with_whitespaces.xml.pkl    | Bin 0 -> 253 bytes
 3 files changed, 22 insertions(+), 39 deletions(-)
 create mode 100644 .DS_Store
 create mode 100644 tests/files/misc/features_with_whitespaces.xml.pkl

diff --git a/.DS_Store b/.DS_Store
new file mode 100644
index 0000000000000000000000000000000000000000..391ba45554c973056274a3e8b4a31021f12627d5
GIT binary patch
literal 8196
zcmeI1O=uHA6vyAxGzrAof>3%~{J4l8E!ta!G^QY;f)qSz+a#vNq#K$P>_NJM3Pl7D
zUZoci)E+ztp7bhs61;fvq*tM!C%yQ8JJZdZWH;$aq|TI?x6FGp?>BE|XEIwNQiav>
z1ko4~WszBq<WR&U)^qMk1EyviQlUN7suQy%zuxGwbQLZFE&?tBE&?tBE&~5b1aM|^
z$|juqTI)&|0T+S)G6K9mc*rdCMh04jTL+5p3ILfxb(rux!Vi$1k$EEnEuleDx+$b?
zs>~Hb>E@{Sj9uQyKug`6l(~E;<5`&-iVD4>e=jsAmDkdhE&?tB-4Wosdx}m{m6oVw
z-QVx8oszn`Uo0-v`~}oyzikI^%Fo_U+xl`(ePdC48`xmCl-co1i?m7$mUo&w&}|_Z
zHXc0Rn$`>*{nY(7NwSs2Ljm*Tz*DC+Y9L>y63rtoQbls9<1Zg4TS~s$cds73p!pK|
zm7M*pC9j&EW8NX~GHZ(})I^!UOmy!`Z)?e&+qyO|qq%$ZFWGn3Lh=)RWPXmz3TFNW
z{u0^KYsr?9duTl~UTMcWreDciJpJXcn4K7Fn>WwAX)2SCHE|R1UXiPU=t#O#FWE-l
zMQhdSLlPy~#BihxN7H7QKM#gTC;cD#9Qhae_FEFCxBWii@n<paTX-w#SamgCbC!Sg
zn^#$vjpVpke6jP+#@Mxadf8cA1KZ4%#;i2JvkbDD#kBwX+fKdYIXgJDY4$Oz4>>Qt
zK280+q}P>YV{r_>Vvb?Sfwf)1u3DuN*jbmc!hMU3CXe-RAz8mZd3dg%Ir{jK^t*3m
zjYsc?8SE-0yrD~oZmVOqDYFj#4hzZq`bRX^T~_UxPPVdnJUFAlXs5GR-sXqPwpMbC
z9z1i$#7(r9F^ada{68IG|9dU%(*qjg`~OVx_y4^%Y&RYk0T%%g0hKDw77M5_cWa^2
zk9^nmkmry&v0b2L*aQWeaD8|khm8JVh-c5}@<s+)LWA_zKLm(-j646^zfP?4&Yk~F
KD;|6B{Qm<5Uot`f

literal 0
HcmV?d00001

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index ffb468c69..41a493dc6 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -282,7 +282,9 @@ def run_flow_on_task(  # noqa: C901, PLR0912, PLR0915, PLR0913
                 setup_id = setup_exists(flow_from_server)
                 ids = run_exists(task.task_id, setup_id)
                 if ids:
-                    error_message = "One or more runs of this setup were already performed on the task."
+                    error_message = (
+                        "One or more runs of this setup were already performed on the task."
+                    )
                     raise OpenMLRunsExistError(ids, error_message)
         else:
             # Flow does not exist on server and we do not want to upload it.
@@ -512,15 +514,11 @@ def _run_task_get_arffcontent(  # noqa: PLR0915, PLR0912, C901
     # this information is multiple times overwritten, but due to the ordering
     # of tne loops, eventually it contains the information based on the full
     # dataset size
-    user_defined_measures_per_fold = (
-        OrderedDict()
-    )  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_fold = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
     # stores sample-based evaluation measures (sublevel of fold-based)
     # will also be filled on a non sample-based task, but the information
     # is the same as the fold-based measures, and disregarded in that case
-    user_defined_measures_per_sample = (
-        OrderedDict()
-    )  # type: 'OrderedDict[str, OrderedDict]'
+    user_defined_measures_per_sample = OrderedDict()  # type: 'OrderedDict[str, OrderedDict]'
 
     # TODO use different iterator to only provide a single iterator (less
     # methods, less maintenance, less confusion)
@@ -614,11 +612,7 @@ def _calculate_local_measure(  # type: ignore
                             if isinstance(test_y[i], (int, np.integer))
                             else test_y[i]
                         )
-                    pred_prob = (
-                        proba_y.iloc[i]
-                        if isinstance(proba_y, pd.DataFrame)
-                        else proba_y[i]
-                    )
+                    pred_prob = proba_y.iloc[i] if isinstance(proba_y, pd.DataFrame) else proba_y[i]
 
                     arff_line = format_prediction(
                         task=task,
@@ -681,13 +675,11 @@ def _calculate_local_measure(  # type: ignore
             if rep_no not in user_defined_measures_per_sample[measure]:
                 user_defined_measures_per_sample[measure][rep_no] = OrderedDict()
             if fold_no not in user_defined_measures_per_sample[measure][rep_no]:
-                user_defined_measures_per_sample[measure][rep_no][
-                    fold_no
-                ] = OrderedDict()
+                user_defined_measures_per_sample[measure][rep_no][fold_no] = OrderedDict()
 
-            user_defined_measures_per_fold[measure][rep_no][fold_no] = (
-                user_defined_measures_fold[measure]
-            )
+            user_defined_measures_per_fold[measure][rep_no][fold_no] = user_defined_measures_fold[
+                measure
+            ]
             user_defined_measures_per_sample[measure][rep_no][fold_no][sample_no] = (
                 user_defined_measures_fold[measure]
             )
@@ -843,9 +835,7 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     run : OpenMLRun
         Run corresponding to ID, fetched from the server.
     """
-    run_dir = Path(
-        openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
-    )
+    run_dir = Path(openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id))
     run_file = run_dir / "description.xml"
 
     run_dir.mkdir(parents=True, exist_ok=True)
@@ -864,9 +854,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun:  # noqa: FBT0
     return _create_run_from_xml(run_xml)
 
 
-def _create_run_from_xml(
-    xml: str, from_server: bool = True
-) -> OpenMLRun:  # noqa: PLR0915, PLR0912, C901, FBT002
+def _create_run_from_xml(  # noqa: PLR0915, PLR0912, C901
+    xml: str,
+    from_server: bool = True,  # noqa: FBT002
+) -> OpenMLRun:
     """Create a run object from xml returned from server.
 
     Parameters
@@ -896,13 +887,11 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
         if not from_server:
             return None
 
-        raise AttributeError(
-            "Run XML does not contain required (server) field: ", fieldname
-        )
+        raise AttributeError("Run XML does not contain required (server) field: ", fieldname)
 
-    run = xmltodict.parse(
-        xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"]
-    )["oml:run"]
+    run = xmltodict.parse(xml, force_list=["oml:file", "oml:evaluation", "oml:parameter_setting"])[
+        "oml:run"
+    ]
     run_id = obtain_field(run, "oml:run_id", from_server, cast=int)
     uploader = obtain_field(run, "oml:uploader", from_server, cast=int)
     uploader_name = obtain_field(run, "oml:uploader_name", from_server)
@@ -1057,9 +1046,7 @@ def obtain_field(xml_obj, fieldname, from_server, cast=None):  # type: ignore
 
 def _get_cached_run(run_id: int) -> OpenMLRun:
     """Load a run from the cache."""
-    run_cache_dir = openml.utils._create_cache_directory_for_id(
-        RUNS_CACHE_DIR_NAME, run_id
-    )
+    run_cache_dir = openml.utils._create_cache_directory_for_id(RUNS_CACHE_DIR_NAME, run_id)
     run_file = run_cache_dir / "description.xml"
     try:
         with run_file.open(encoding="utf8") as fh:
@@ -1229,9 +1216,7 @@ def __list_runs(api_call: str) -> pd.DataFrame:
     runs_dict = xmltodict.parse(xml_string, force_list=("oml:run",))
     # Minimalistic check if the XML is useful
     if "oml:runs" not in runs_dict:
-        raise ValueError(
-            f'Error in return XML, does not contain "oml:runs": {runs_dict}'
-        )
+        raise ValueError(f'Error in return XML, does not contain "oml:runs": {runs_dict}')
 
     if "@xmlns:oml" not in runs_dict["oml:runs"]:
         raise ValueError(
@@ -1245,9 +1230,7 @@ def __list_runs(api_call: str) -> pd.DataFrame:
             f'"http://openml.org/openml": {runs_dict}',
         )
 
-    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(
-        runs_dict["oml:runs"]
-    )
+    assert isinstance(runs_dict["oml:runs"]["oml:run"], list), type(runs_dict["oml:runs"])
 
     runs = {
         int(r["oml:run_id"]): {
diff --git a/tests/files/misc/features_with_whitespaces.xml.pkl b/tests/files/misc/features_with_whitespaces.xml.pkl
new file mode 100644
index 0000000000000000000000000000000000000000..f6a775cc7e9ab9d0db09aa559f3c6b4495859abf
GIT binary patch
literal 253
zcmZo*nfi$V0&1sdcr*0K<QJso<>u(6B$gx=r<N3h8S!bUi6x~)sZ)9c{ejZHJ}y96
zH>lX;DVmK_fSP+)GxJhXE2aPqVaZF(1uA9=Gn~@H2{I|Zq_O}gz@Ar{n_85aJf(*(
zFF!XkFEJ;+EHS4vwRnnOk0?+sDYYm*H?z1nGcO$`0W^&(Kd&S|CqF$Cq^>l{n=ykK
zVq*pykj>u19A;uPC4&=aI>gK=8Qifz=Wr_MDwrAof!>rJ4hTzIZ%S$gk2gaGFVLJ)
FJpk*eSy=!8

literal 0
HcmV?d00001


From c1557096a70df68ba0224e9a403a555c754b539a Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Sun, 1 Mar 2026 22:23:06 +0530
Subject: [PATCH 3/4] Address review: fix run_model_on_task example, remove
 unwanted files

- Use HistGradientBoostingClassifier instead of DecisionTreeClassifier
  (fixes ValueError with missing values in task 1)
- Remove .DS_Store
- Remove tests/files/misc/features_with_whitespaces.xml.pkl
---
 .DS_Store                                        | Bin 8196 -> 0 bytes
 openml/runs/functions.py                         |   4 ++--
 .../files/misc/features_with_whitespaces.xml.pkl | Bin 253 -> 0 bytes
 3 files changed, 2 insertions(+), 2 deletions(-)
 delete mode 100644 .DS_Store
 delete mode 100644 tests/files/misc/features_with_whitespaces.xml.pkl

diff --git a/.DS_Store b/.DS_Store
deleted file mode 100644
index 391ba45554c973056274a3e8b4a31021f12627d5..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 8196
zcmeI1O=uHA6vyAxGzrAof>3%~{J4l8E!ta!G^QY;f)qSz+a#vNq#K$P>_NJM3Pl7D
zUZoci)E+ztp7bhs61;fvq*tM!C%yQ8JJZdZWH;$aq|TI?x6FGp?>BE|XEIwNQiav>
z1ko4~WszBq<WR&U)^qMk1EyviQlUN7suQy%zuxGwbQLZFE&?tBE&?tBE&~5b1aM|^
z$|juqTI)&|0T+S)G6K9mc*rdCMh04jTL+5p3ILfxb(rux!Vi$1k$EEnEuleDx+$b?
zs>~Hb>E@{Sj9uQyKug`6l(~E;<5`&-iVD4>e=jsAmDkdhE&?tB-4Wosdx}m{m6oVw
z-QVx8oszn`Uo0-v`~}oyzikI^%Fo_U+xl`(ePdC48`xmCl-co1i?m7$mUo&w&}|_Z
zHXc0Rn$`>*{nY(7NwSs2Ljm*Tz*DC+Y9L>y63rtoQbls9<1Zg4TS~s$cds73p!pK|
zm7M*pC9j&EW8NX~GHZ(})I^!UOmy!`Z)?e&+qyO|qq%$ZFWGn3Lh=)RWPXmz3TFNW
z{u0^KYsr?9duTl~UTMcWreDciJpJXcn4K7Fn>WwAX)2SCHE|R1UXiPU=t#O#FWE-l
zMQhdSLlPy~#BihxN7H7QKM#gTC;cD#9Qhae_FEFCxBWii@n<paTX-w#SamgCbC!Sg
zn^#$vjpVpke6jP+#@Mxadf8cA1KZ4%#;i2JvkbDD#kBwX+fKdYIXgJDY4$Oz4>>Qt
zK280+q}P>YV{r_>Vvb?Sfwf)1u3DuN*jbmc!hMU3CXe-RAz8mZd3dg%Ir{jK^t*3m
zjYsc?8SE-0yrD~oZmVOqDYFj#4hzZq`bRX^T~_UxPPVdnJUFAlXs5GR-sXqPwpMbC
z9z1i$#7(r9F^ada{68IG|9dU%(*qjg`~OVx_y4^%Y&RYk0T%%g0hKDw77M5_cWa^2
zk9^nmkmry&v0b2L*aQWeaD8|khm8JVh-c5}@<s+)LWA_zKLm(-j646^zfP?4&Yk~F
KD;|6B{Qm<5Uot`f

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index 41a493dc6..d9ab00d33 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -109,8 +109,8 @@ def run_model_on_task(  # noqa: PLR0913
     --------
     >>> import openml
     >>> import openml_sklearn  # doctest: +SKIP
-    >>> from sklearn.tree import DecisionTreeClassifier  # doctest: +SKIP
-    >>> clf = DecisionTreeClassifier()  # doctest: +SKIP
+    >>> from sklearn.ensemble import HistGradientBoostingClassifier  # doctest: +SKIP
+    >>> clf = HistGradientBoostingClassifier()  # doctest: +SKIP
     >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
     >>> run = openml.runs.run_model_on_task(clf, task)  # doctest: +SKIP
     """
diff --git a/tests/files/misc/features_with_whitespaces.xml.pkl b/tests/files/misc/features_with_whitespaces.xml.pkl
deleted file mode 100644
index f6a775cc7e9ab9d0db09aa559f3c6b4495859abf..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 253
zcmZo*nfi$V0&1sdcr*0K<QJso<>u(6B$gx=r<N3h8S!bUi6x~)sZ)9c{ejZHJ}y96
zH>lX;DVmK_fSP+)GxJhXE2aPqVaZF(1uA9=Gn~@H2{I|Zq_O}gz@Ar{n_85aJf(*(
zFF!XkFEJ;+EHS4vwRnnOk0?+sDYYm*H?z1nGcO$`0W^&(Kd&S|CqF$Cq^>l{n=ykK
zVq*pykj>u19A;uPC4&=aI>gK=8Qifz=Wr_MDwrAof!>rJ4hTzIZ%S$gk2gaGFVLJ)
FJpk*eSy=!8


From 49bd8db00c023226dd3e58c91f36727f151b176c Mon Sep 17 00:00:00 2001
From: Abhishek <abhishekup082gmail.com@Abhisheks-MacBook-Air.local>
Date: Mon, 2 Mar 2026 02:03:48 +0530
Subject: [PATCH 4/4] Use get_task(6) with DecisionTreeClassifier per review
 feedback

---
 openml/runs/functions.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/openml/runs/functions.py b/openml/runs/functions.py
index d9ab00d33..dc81f9987 100644
--- a/openml/runs/functions.py
+++ b/openml/runs/functions.py
@@ -109,9 +109,9 @@ def run_model_on_task(  # noqa: PLR0913
     --------
     >>> import openml
     >>> import openml_sklearn  # doctest: +SKIP
-    >>> from sklearn.ensemble import HistGradientBoostingClassifier  # doctest: +SKIP
-    >>> clf = HistGradientBoostingClassifier()  # doctest: +SKIP
-    >>> task = openml.tasks.get_task(1)  # doctest: +SKIP
+    >>> from sklearn.tree import DecisionTreeClassifier  # doctest: +SKIP
+    >>> clf = DecisionTreeClassifier()  # doctest: +SKIP
+    >>> task = openml.tasks.get_task(6)  # doctest: +SKIP
     >>> run = openml.runs.run_model_on_task(clf, task)  # doctest: +SKIP
     """
     if avoid_duplicate_runs is None: