Created
October 25, 2019 16:15
-
-
Save Neeratyoy/f694f1961a72649c8f4f1c26d4aeda9e to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 1, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tid</th>\n", | |
" <th>ttid</th>\n", | |
" <th>did</th>\n", | |
" <th>name</th>\n", | |
" <th>task_type</th>\n", | |
" <th>status</th>\n", | |
" <th>estimation_procedure</th>\n", | |
" <th>evaluation_measures</th>\n", | |
" <th>source_data</th>\n", | |
" <th>target_feature</th>\n", | |
" <th>...</th>\n", | |
" <th>NumberOfFeatures</th>\n", | |
" <th>NumberOfInstances</th>\n", | |
" <th>NumberOfInstancesWithMissingValues</th>\n", | |
" <th>NumberOfMissingValues</th>\n", | |
" <th>NumberOfNumericFeatures</th>\n", | |
" <th>NumberOfSymbolicFeatures</th>\n", | |
" <th>number_samples</th>\n", | |
" <th>cost_matrix</th>\n", | |
" <th>quality_measure</th>\n", | |
" <th>target_value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>59</th>\n", | |
" <td>59</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>10-fold Crossvalidation</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>118</th>\n", | |
" <td>118</td>\n", | |
" <td>3</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Learning Curve</td>\n", | |
" <td>active</td>\n", | |
" <td>10 times 10-fold Learning Curve</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>289</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>33% Holdout set</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1758</th>\n", | |
" <td>1758</td>\n", | |
" <td>3</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Learning Curve</td>\n", | |
" <td>active</td>\n", | |
" <td>10-fold Learning Curve</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>4</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1823</th>\n", | |
" <td>1823</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>5 times 2-fold Crossvalidation</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 24 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tid ttid did name task_type status \\\n", | |
"59 59 1 61 iris Supervised Classification active \n", | |
"118 118 3 61 iris Learning Curve active \n", | |
"289 289 1 61 iris Supervised Classification active \n", | |
"1758 1758 3 61 iris Learning Curve active \n", | |
"1823 1823 1 61 iris Supervised Classification active \n", | |
"\n", | |
" estimation_procedure evaluation_measures source_data \\\n", | |
"59 10-fold Crossvalidation predictive_accuracy 61 \n", | |
"118 10 times 10-fold Learning Curve predictive_accuracy 61 \n", | |
"289 33% Holdout set predictive_accuracy 61 \n", | |
"1758 10-fold Learning Curve predictive_accuracy 61 \n", | |
"1823 5 times 2-fold Crossvalidation predictive_accuracy 61 \n", | |
"\n", | |
" target_feature ... NumberOfFeatures NumberOfInstances \\\n", | |
"59 class ... 5 150 \n", | |
"118 class ... 5 150 \n", | |
"289 class ... 5 150 \n", | |
"1758 class ... 5 150 \n", | |
"1823 class ... 5 150 \n", | |
"\n", | |
" NumberOfInstancesWithMissingValues NumberOfMissingValues \\\n", | |
"59 0 0 \n", | |
"118 0 0 \n", | |
"289 0 0 \n", | |
"1758 0 0 \n", | |
"1823 0 0 \n", | |
"\n", | |
" NumberOfNumericFeatures NumberOfSymbolicFeatures number_samples \\\n", | |
"59 4 1 NaN \n", | |
"118 4 1 4 \n", | |
"289 4 1 NaN \n", | |
"1758 4 1 4 \n", | |
"1823 4 1 NaN \n", | |
"\n", | |
" cost_matrix quality_measure target_value \n", | |
"59 NaN NaN NaN \n", | |
"118 NaN NaN NaN \n", | |
"289 NaN NaN NaN \n", | |
"1758 NaN NaN NaN \n", | |
"1823 NaN NaN NaN \n", | |
"\n", | |
"[5 rows x 24 columns]" | |
] | |
}, | |
"execution_count": 1, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"df = openml.tasks.list_tasks(data_id=61, output_format='dataframe')\n", | |
"df.head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 2, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>tid</th>\n", | |
" <th>ttid</th>\n", | |
" <th>did</th>\n", | |
" <th>name</th>\n", | |
" <th>task_type</th>\n", | |
" <th>status</th>\n", | |
" <th>estimation_procedure</th>\n", | |
" <th>evaluation_measures</th>\n", | |
" <th>source_data</th>\n", | |
" <th>target_feature</th>\n", | |
" <th>...</th>\n", | |
" <th>NumberOfFeatures</th>\n", | |
" <th>NumberOfInstances</th>\n", | |
" <th>NumberOfInstancesWithMissingValues</th>\n", | |
" <th>NumberOfMissingValues</th>\n", | |
" <th>NumberOfNumericFeatures</th>\n", | |
" <th>NumberOfSymbolicFeatures</th>\n", | |
" <th>number_samples</th>\n", | |
" <th>cost_matrix</th>\n", | |
" <th>quality_measure</th>\n", | |
" <th>target_value</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>59</th>\n", | |
" <td>59</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>10-fold Crossvalidation</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>289</th>\n", | |
" <td>289</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>33% Holdout set</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1823</th>\n", | |
" <td>1823</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>5 times 2-fold Crossvalidation</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1939</th>\n", | |
" <td>1939</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>10 times 10-fold Crossvalidation</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1992</th>\n", | |
" <td>1992</td>\n", | |
" <td>1</td>\n", | |
" <td>61</td>\n", | |
" <td>iris</td>\n", | |
" <td>Supervised Classification</td>\n", | |
" <td>active</td>\n", | |
" <td>Leave one out</td>\n", | |
" <td>predictive_accuracy</td>\n", | |
" <td>61</td>\n", | |
" <td>class</td>\n", | |
" <td>...</td>\n", | |
" <td>5</td>\n", | |
" <td>150</td>\n", | |
" <td>0</td>\n", | |
" <td>0</td>\n", | |
" <td>4</td>\n", | |
" <td>1</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" <td>NaN</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"<p>5 rows × 24 columns</p>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" tid ttid did name task_type status \\\n", | |
"59 59 1 61 iris Supervised Classification active \n", | |
"289 289 1 61 iris Supervised Classification active \n", | |
"1823 1823 1 61 iris Supervised Classification active \n", | |
"1939 1939 1 61 iris Supervised Classification active \n", | |
"1992 1992 1 61 iris Supervised Classification active \n", | |
"\n", | |
" estimation_procedure evaluation_measures source_data \\\n", | |
"59 10-fold Crossvalidation predictive_accuracy 61 \n", | |
"289 33% Holdout set predictive_accuracy 61 \n", | |
"1823 5 times 2-fold Crossvalidation predictive_accuracy 61 \n", | |
"1939 10 times 10-fold Crossvalidation predictive_accuracy 61 \n", | |
"1992 Leave one out predictive_accuracy 61 \n", | |
"\n", | |
" target_feature ... NumberOfFeatures NumberOfInstances \\\n", | |
"59 class ... 5 150 \n", | |
"289 class ... 5 150 \n", | |
"1823 class ... 5 150 \n", | |
"1939 class ... 5 150 \n", | |
"1992 class ... 5 150 \n", | |
"\n", | |
" NumberOfInstancesWithMissingValues NumberOfMissingValues \\\n", | |
"59 0 0 \n", | |
"289 0 0 \n", | |
"1823 0 0 \n", | |
"1939 0 0 \n", | |
"1992 0 0 \n", | |
"\n", | |
" NumberOfNumericFeatures NumberOfSymbolicFeatures number_samples \\\n", | |
"59 4 1 NaN \n", | |
"289 4 1 NaN \n", | |
"1823 4 1 NaN \n", | |
"1939 4 1 NaN \n", | |
"1992 4 1 NaN \n", | |
"\n", | |
" cost_matrix quality_measure target_value \n", | |
"59 NaN NaN NaN \n", | |
"289 NaN NaN NaN \n", | |
"1823 NaN NaN NaN \n", | |
"1939 NaN NaN NaN \n", | |
"1992 NaN NaN NaN \n", | |
"\n", | |
"[5 rows x 24 columns]" | |
] | |
}, | |
"execution_count": 2, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"# Filtering only the Supervised Classification tasks on Iris\n", | |
"df.query(\"task_type=='Supervised Classification'\").head()" | |
] | |
}, | |
{ | |
"cell_type": "code", | |
"execution_count": 3, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"name": "stdout", | |
"output_type": "stream", | |
"text": [ | |
"11\n" | |
] | |
} | |
], | |
"source": [ | |
"# Collecting all relevant task_ids\n", | |
"tasks = df.query(\"task_type=='Supervised Classification'\")['tid'].to_numpy()\n", | |
"print(len(tasks))" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.8" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment