Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions openml/datasets/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -364,6 +364,11 @@ def get_datasets(
-------
datasets : list of datasets
A list of dataset objects.

Examples
--------
>>> import openml
>>> datasets = openml.datasets.get_datasets([1, 2, 3]) # doctest: +SKIP
"""
datasets = []
for dataset_id in dataset_ids:
Expand Down Expand Up @@ -446,6 +451,13 @@ def get_dataset( # noqa: C901, PLR0912
-------
dataset : :class:`openml.OpenMLDataset`
The downloaded dataset.

Examples
--------
>>> import openml
>>> dataset = openml.datasets.get_dataset(1) # doctest: +SKIP
>>> dataset = openml.datasets.get_dataset("iris", version=1) # doctest: +SKIP
>>> dataset = openml.datasets.get_dataset(1, download_data=True) # doctest: +SKIP
"""
if download_all_files:
warnings.warn(
Expand Down
25 changes: 21 additions & 4 deletions openml/runs/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,15 @@ def run_model_on_task( # noqa: PLR0913
Result of the run.
flow : OpenMLFlow (optional, only if `return_flow` is True).
Flow generated from the model.

Examples
--------
>>> import openml
>>> import openml_sklearn # doctest: +SKIP
>>> from sklearn.tree import DecisionTreeClassifier # doctest: +SKIP
>>> clf = DecisionTreeClassifier() # doctest: +SKIP
>>> task = openml.tasks.get_task(6) # doctest: +SKIP
>>> run = openml.runs.run_model_on_task(clf, task) # doctest: +SKIP
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

this currently raises the following error, can you look into it?

ValueError: could not convert string to float: '?'

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Okay, I’ll take a look.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would say a better example would be to use a task with a more suitable dataset, may be openml.tasks.get_task(6) and keep using DecisionTreeClassifier, since it's more common as an example

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for the suggestion.
That makes sense using openml.tasks.get_task(6) with DecisionTreeClassifier will make the example clearer and more standard.
I’ll update the example accordingly and push the changes shortly.
Thanks

"""
if avoid_duplicate_runs is None:
avoid_duplicate_runs = openml.config.avoid_duplicate_runs
Expand Down Expand Up @@ -559,9 +568,14 @@ def _run_task_get_arffcontent( # noqa: PLR0915, PLR0912, C901
) # job_rvals contain the output of all the runs with one-to-one correspondence with `jobs`

for n_fit, rep_no, fold_no, sample_no in jobs:
pred_y, proba_y, test_indices, test_y, inner_trace, user_defined_measures_fold = job_rvals[
n_fit - 1
]
(
pred_y,
proba_y,
test_indices,
test_y,
inner_trace,
user_defined_measures_fold,
) = job_rvals[n_fit - 1]

if inner_trace is not None:
traces.append(inner_trace)
Expand Down Expand Up @@ -846,7 +860,10 @@ def get_run(run_id: int, ignore_cache: bool = False) -> OpenMLRun: # noqa: FBT0
return _create_run_from_xml(run_xml)


def _create_run_from_xml(xml: str, from_server: bool = True) -> OpenMLRun: # noqa: PLR0915, PLR0912, C901, FBT002
def _create_run_from_xml( # noqa: PLR0915, PLR0912, C901
xml: str,
from_server: bool = True, # noqa: FBT002
) -> OpenMLRun:
"""Create a run object from xml returned from server.

Parameters
Expand Down
25 changes: 24 additions & 1 deletion openml/study/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,12 @@ def get_suite(suite_id: int | str) -> OpenMLBenchmarkSuite:
-------
OpenMLSuite
The OpenML suite object

Examples
--------
>>> import openml
>>> suite = openml.study.get_suite(99) # doctest: +SKIP
>>> suite = openml.study.get_suite("OpenML-CC18") # doctest: +SKIP
"""
study = _get_study(suite_id, entity_type="task")
assert isinstance(study, OpenMLBenchmarkSuite)
Expand Down Expand Up @@ -59,6 +65,11 @@ def get_study(
-------
OpenMLStudy
The OpenML study object

Examples
--------
>>> import openml
>>> study = openml.study.get_study(1) # doctest: +SKIP
"""
if study_id == "OpenML100":
message = (
Expand Down Expand Up @@ -109,7 +120,10 @@ def _get_study(id_: int | str, entity_type: str) -> BaseStudy:
tags = []
if "oml:tag" in result_dict:
for tag in result_dict["oml:tag"]:
current_tag = {"name": tag["oml:name"], "write_access": tag["oml:write_access"]}
current_tag = {
"name": tag["oml:name"],
"write_access": tag["oml:write_access"],
}
if "oml:window_start" in tag:
current_tag["window_start"] = tag["oml:window_start"]
tags.append(current_tag)
Expand Down Expand Up @@ -210,6 +224,15 @@ def create_study(
-------
OpenMLStudy
A local OpenML study object (call publish method to upload to server)

Examples
--------
>>> import openml
>>> study = openml.study.create_study( # doctest: +SKIP
... name="My Study",
... description="A study on classification tasks",
... run_ids=[1, 2, 3],
... )
"""
return OpenMLStudy(
study_id=None,
Expand Down
12 changes: 11 additions & 1 deletion openml/tasks/functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,7 +380,11 @@ def get_tasks(
tasks = []
for task_id in task_ids:
tasks.append(
get_task(task_id, download_data=download_data, download_qualities=download_qualities)
get_task(
task_id,
download_data=download_data,
download_qualities=download_qualities,
)
)
return tasks

Expand Down Expand Up @@ -411,6 +415,12 @@ def get_task(
Returns
-------
task: OpenMLTask

Examples
--------
>>> import openml
>>> task = openml.tasks.get_task(1) # doctest: +SKIP
>>> task = openml.tasks.get_task(1, download_splits=True) # doctest: +SKIP
"""
if not isinstance(task_id, int):
raise TypeError(f"Task id should be integer, is {type(task_id)}")
Expand Down