diff --git a/doc/progress.rst b/doc/progress.rst index 8d3f4ec1d..1c48e13c9 100644 --- a/doc/progress.rst +++ b/doc/progress.rst @@ -8,7 +8,7 @@ Changelog 0.12.2 ~~~~~~ - +* MAINT/DOC: #1066/#1063 Use names instead of integer ids in the examples whenever possible. Update documentation for ``get_dataset``. * DOC: Fixes a few broken links in the documentation. * MAINT/DOC: Automatically check for broken external links when building the documentation. * MAINT/DOC: Fail documentation building on warnings. This will make the documentation building diff --git a/examples/20_basic/simple_datasets_tutorial.py b/examples/20_basic/simple_datasets_tutorial.py index c525a3ef9..62b005846 100644 --- a/examples/20_basic/simple_datasets_tutorial.py +++ b/examples/20_basic/simple_datasets_tutorial.py @@ -26,8 +26,7 @@ # Download a dataset # ================== -# Iris dataset https://www.openml.org/d/61 -dataset = openml.datasets.get_dataset(61) +dataset = openml.datasets.get_dataset(dataset_id="iris", version=1) # Print a summary print( diff --git a/examples/20_basic/simple_flows_and_runs_tutorial.py b/examples/20_basic/simple_flows_and_runs_tutorial.py index 1d3bb5d6f..398a37c28 100644 --- a/examples/20_basic/simple_flows_and_runs_tutorial.py +++ b/examples/20_basic/simple_flows_and_runs_tutorial.py @@ -21,7 +21,7 @@ # ============================== # NOTE: We are using dataset 20 from the test server: https://test.openml.org/d/20 -dataset = openml.datasets.get_dataset(20) +dataset = openml.datasets.get_dataset(dataset_id="diabetes", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( dataset_format="array", target=dataset.default_target_attribute ) diff --git a/examples/20_basic/simple_suites_tutorial.py b/examples/20_basic/simple_suites_tutorial.py index 92dfb3c04..64c8de490 100644 --- a/examples/20_basic/simple_suites_tutorial.py +++ b/examples/20_basic/simple_suites_tutorial.py @@ -38,8 +38,9 @@ #################################################################################################### # Downloading benchmark suites # ============================ - -suite = openml.study.get_suite(99) +# OpenML Benchmarking Suites and the OpenML-CC18 +# https://www.openml.org/s/99 +suite = openml.study.get_suite("OpenML-CC18") print(suite) #################################################################################################### diff --git a/examples/30_extended/configure_logging.py b/examples/30_extended/configure_logging.py index 2dae4047f..2782f9733 100644 --- a/examples/30_extended/configure_logging.py +++ b/examples/30_extended/configure_logging.py @@ -24,7 +24,7 @@ import openml -openml.datasets.get_dataset("iris") +openml.datasets.get_dataset("iris", version=1) # With default configuration, the above example will show no output to console. # However, in your cache directory you should find a file named 'openml_python.log', @@ -39,7 +39,7 @@ openml.config.console_log.setLevel(logging.DEBUG) openml.config.file_log.setLevel(logging.WARNING) -openml.datasets.get_dataset("iris") +openml.datasets.get_dataset("iris", version=1) # Now the log level that was previously written to file should also be shown in the console. # The message is now no longer written to file as the `file_log` was set to level `WARNING`. diff --git a/examples/30_extended/datasets_tutorial.py b/examples/30_extended/datasets_tutorial.py index e8aa94f2b..11ea2f5bd 100644 --- a/examples/30_extended/datasets_tutorial.py +++ b/examples/30_extended/datasets_tutorial.py @@ -52,7 +52,7 @@ # ================= # This is done based on the dataset ID. -dataset = openml.datasets.get_dataset(1471) +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1) # Print a summary print( @@ -92,7 +92,7 @@ # data file. The dataset object can be used as normal. # Whenever you use any functionality that requires the data, # such as `get_data`, the data will be downloaded. -dataset = openml.datasets.get_dataset(1471, download_data=False) +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1, download_data=False) ############################################################################ # Exercise 2 diff --git a/examples/30_extended/flows_and_runs_tutorial.py b/examples/30_extended/flows_and_runs_tutorial.py index 714ce7b55..9adc8cb96 100644 --- a/examples/30_extended/flows_and_runs_tutorial.py +++ b/examples/30_extended/flows_and_runs_tutorial.py @@ -25,7 +25,7 @@ # Train a scikit-learn model on the data manually. # NOTE: We are using dataset 68 from the test server: https://test.openml.org/d/68 -dataset = openml.datasets.get_dataset(68) +dataset = openml.datasets.get_dataset(dataset_id="eeg-eye-state", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( dataset_format="array", target=dataset.default_target_attribute ) @@ -36,7 +36,7 @@ # You can also ask for meta-data to automatically preprocess the data. # # * e.g. categorical features -> do feature encoding -dataset = openml.datasets.get_dataset(17) +dataset = openml.datasets.get_dataset(dataset_id="credit-g", version=1) X, y, categorical_indicator, attribute_names = dataset.get_data( dataset_format="array", target=dataset.default_target_attribute ) diff --git a/examples/30_extended/study_tutorial.py b/examples/30_extended/study_tutorial.py index b66c49096..08ebd49dc 100644 --- a/examples/30_extended/study_tutorial.py +++ b/examples/30_extended/study_tutorial.py @@ -77,7 +77,8 @@ tasks = [115, 259, 307] # To verify -suite = openml.study.get_suite(1) +# https://test.openml.org/api/v1/study/1 +suite = openml.study.get_suite("OpenML100") print(all([t_id in suite.tasks for t_id in tasks])) run_ids = [] diff --git a/examples/30_extended/suites_tutorial.py b/examples/30_extended/suites_tutorial.py index 9b8c1d73d..4b3916181 100644 --- a/examples/30_extended/suites_tutorial.py +++ b/examples/30_extended/suites_tutorial.py @@ -37,7 +37,8 @@ ############################################################################ # This is done based on the dataset ID. -suite = openml.study.get_suite(99) +# https://www.openml.org/api/v1/study/99 +suite = openml.study.get_suite("OpenML-CC18") print(suite) ############################################################################ diff --git a/openml/datasets/functions.py b/openml/datasets/functions.py index 746285650..180f850b7 100644 --- a/openml/datasets/functions.py +++ b/openml/datasets/functions.py @@ -368,7 +368,8 @@ def get_dataset( Parameters ---------- dataset_id : int or str - Dataset ID of the dataset to download + Dataset ID of the dataset to download. It can be an integer or it can be a string + of the dataset name. download_data : bool, optional (default=True) If True, also download the data file. Beware that some datasets are large and it might make the operation noticeably slower. Metadata is also still retrieved.