Skip to content

Instantly share code, notes, and snippets.

@Neeratyoy
Created October 25, 2019 16:21
Show Gist options
  • Save Neeratyoy/c65915651924b25582d301f9d0f0d33d to your computer and use it in GitHub Desktop.
Save Neeratyoy/c65915651924b25582d301f9d0f0d33d to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>run_id</th>\n",
" <th>task_id</th>\n",
" <th>setup_id</th>\n",
" <th>flow_id</th>\n",
" <th>flow_name</th>\n",
" <th>data_id</th>\n",
" <th>data_name</th>\n",
" <th>function</th>\n",
" <th>upload_time</th>\n",
" <th>uploader</th>\n",
" <th>uploader_name</th>\n",
" <th>value</th>\n",
" <th>values</th>\n",
" <th>array_data</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>81</td>\n",
" <td>59</td>\n",
" <td>12</td>\n",
" <td>67</td>\n",
" <td>weka.BayesNet_K2(1)</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2014-04-07 00:05:11</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.940000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>161</td>\n",
" <td>59</td>\n",
" <td>13</td>\n",
" <td>70</td>\n",
" <td>weka.SMO_PolyKernel(1)</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2014-04-07 00:55:32</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>234</td>\n",
" <td>59</td>\n",
" <td>1</td>\n",
" <td>56</td>\n",
" <td>weka.ZeroR(1)</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2014-04-07 01:33:24</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.333333</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>447</td>\n",
" <td>59</td>\n",
" <td>6</td>\n",
" <td>61</td>\n",
" <td>weka.REPTree(1)</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2014-04-07 06:26:27</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.926667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>473</td>\n",
" <td>59</td>\n",
" <td>18</td>\n",
" <td>77</td>\n",
" <td>weka.LogitBoost_DecisionStump(1)</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2014-04-07 06:39:27</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.946667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" run_id task_id setup_id flow_id flow_name \\\n",
"0 81 59 12 67 weka.BayesNet_K2(1) \n",
"1 161 59 13 70 weka.SMO_PolyKernel(1) \n",
"2 234 59 1 56 weka.ZeroR(1) \n",
"3 447 59 6 61 weka.REPTree(1) \n",
"4 473 59 18 77 weka.LogitBoost_DecisionStump(1) \n",
"\n",
" data_id data_name function upload_time uploader \\\n",
"0 61 iris predictive_accuracy 2014-04-07 00:05:11 1 \n",
"1 61 iris predictive_accuracy 2014-04-07 00:55:32 1 \n",
"2 61 iris predictive_accuracy 2014-04-07 01:33:24 1 \n",
"3 61 iris predictive_accuracy 2014-04-07 06:26:27 1 \n",
"4 61 iris predictive_accuracy 2014-04-07 06:39:27 1 \n",
"\n",
" uploader_name value values array_data \n",
"0 janvanrijn@gmail.com 0.940000 None None \n",
"1 janvanrijn@gmail.com 0.960000 None None \n",
"2 janvanrijn@gmail.com 0.333333 None None \n",
"3 janvanrijn@gmail.com 0.926667 None None \n",
"4 janvanrijn@gmail.com 0.946667 None None "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Listing all evaluations made on the 11 tasks collected above\n",
"# with evaluation metric as 'predictive_accuracy'\n",
"task_df = openml.evaluations.list_evaluations(function='predictive_accuracy', task=tasks, output_format='dataframe')\n",
"task_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>run_id</th>\n",
" <th>task_id</th>\n",
" <th>setup_id</th>\n",
" <th>flow_id</th>\n",
" <th>flow_name</th>\n",
" <th>data_id</th>\n",
" <th>data_name</th>\n",
" <th>function</th>\n",
" <th>upload_time</th>\n",
" <th>uploader</th>\n",
" <th>uploader_name</th>\n",
" <th>value</th>\n",
" <th>values</th>\n",
" <th>array_data</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>1849043</td>\n",
" <td>59</td>\n",
" <td>29015</td>\n",
" <td>5500</td>\n",
" <td>sklearn.ensemble.forest.RandomForestClassifier...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-03-03 17:10:12</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.946667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>1853409</td>\n",
" <td>59</td>\n",
" <td>30950</td>\n",
" <td>5873</td>\n",
" <td>sklearn.pipeline.Pipeline(Imputer=openml.utils...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-03-21 22:08:01</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6130126</td>\n",
" <td>59</td>\n",
" <td>4163633</td>\n",
" <td>7108</td>\n",
" <td>sklearn.model_selection._search.RandomizedSear...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-08-21 11:07:40</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6130128</td>\n",
" <td>59</td>\n",
" <td>4163634</td>\n",
" <td>7108</td>\n",
" <td>sklearn.model_selection._search.RandomizedSear...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-08-21 11:08:06</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.946667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>148</th>\n",
" <td>6715383</td>\n",
" <td>59</td>\n",
" <td>4747289</td>\n",
" <td>7117</td>\n",
" <td>sklearn.model_selection._search.RandomizedSear...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-09-01 02:56:44</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" run_id task_id setup_id flow_id \\\n",
"144 1849043 59 29015 5500 \n",
"145 1853409 59 30950 5873 \n",
"146 6130126 59 4163633 7108 \n",
"147 6130128 59 4163634 7108 \n",
"148 6715383 59 4747289 7117 \n",
"\n",
" flow_name data_id data_name \\\n",
"144 sklearn.ensemble.forest.RandomForestClassifier... 61 iris \n",
"145 sklearn.pipeline.Pipeline(Imputer=openml.utils... 61 iris \n",
"146 sklearn.model_selection._search.RandomizedSear... 61 iris \n",
"147 sklearn.model_selection._search.RandomizedSear... 61 iris \n",
"148 sklearn.model_selection._search.RandomizedSear... 61 iris \n",
"\n",
" function upload_time uploader uploader_name \\\n",
"144 predictive_accuracy 2017-03-03 17:10:12 1 janvanrijn@gmail.com \n",
"145 predictive_accuracy 2017-03-21 22:08:01 1 janvanrijn@gmail.com \n",
"146 predictive_accuracy 2017-08-21 11:07:40 1 janvanrijn@gmail.com \n",
"147 predictive_accuracy 2017-08-21 11:08:06 1 janvanrijn@gmail.com \n",
"148 predictive_accuracy 2017-09-01 02:56:44 1 janvanrijn@gmail.com \n",
"\n",
" value values array_data \n",
"144 0.946667 None None \n",
"145 0.960000 None None \n",
"146 0.960000 None None \n",
"147 0.946667 None None \n",
"148 0.960000 None None "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filtering based on sklearn (scikit-learn)\n",
"task_df = task_df[task_df['flow_name'].str.contains(\"sklearn\")]\n",
"task_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"59 1984\n",
"10107 25\n",
"289 1\n",
"Name: task_id, dtype: int64"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Counting frequency of the different tasks used to\n",
"# solve Iris as a supervised classification using scikit-learn\n",
"task_df['task_id'].value_counts()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"OpenML Classification Task\n",
"==========================\n",
"Task Type Description: https://www.openml.org/tt/1\n",
"Task ID..............: 59\n",
"Task URL.............: https://www.openml.org/t/59\n",
"Estimation Procedure.: crossvalidation\n",
"Evaluation Measure...: predictive_accuracy\n",
"Target Feature.......: class\n",
"# of Classes.........: 3\n",
"Cost Matrix..........: Available"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Retrieving the most used task\n",
"t = openml.tasks.get_task(59)\n",
"t"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# Filtering for only task_id=59\n",
"task_df = task_df.query(\"task_id==59\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>run_id</th>\n",
" <th>task_id</th>\n",
" <th>setup_id</th>\n",
" <th>flow_id</th>\n",
" <th>flow_name</th>\n",
" <th>data_id</th>\n",
" <th>data_name</th>\n",
" <th>function</th>\n",
" <th>upload_time</th>\n",
" <th>uploader</th>\n",
" <th>uploader_name</th>\n",
" <th>value</th>\n",
" <th>values</th>\n",
" <th>array_data</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>144</th>\n",
" <td>1849043</td>\n",
" <td>59</td>\n",
" <td>29015</td>\n",
" <td>5500</td>\n",
" <td>sklearn.ensemble.forest.RandomForestClassifier...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-03-03 17:10:12</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.946667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>145</th>\n",
" <td>1853409</td>\n",
" <td>59</td>\n",
" <td>30950</td>\n",
" <td>5873</td>\n",
" <td>sklearn.pipeline.Pipeline(Imputer=openml.utils...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-03-21 22:08:01</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>146</th>\n",
" <td>6130126</td>\n",
" <td>59</td>\n",
" <td>4163633</td>\n",
" <td>7108</td>\n",
" <td>sklearn.model_selection._search.RandomizedSear...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-08-21 11:07:40</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.960000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>147</th>\n",
" <td>6130128</td>\n",
" <td>59</td>\n",
" <td>4163634</td>\n",
" <td>7108</td>\n",
" <td>sklearn.model_selection._search.RandomizedSear...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-08-21 11:08:06</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.946667</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" <tr>\n",
" <th>190</th>\n",
" <td>6946499</td>\n",
" <td>59</td>\n",
" <td>4978397</td>\n",
" <td>7109</td>\n",
" <td>sklearn.pipeline.Pipeline(imputation=openmlstu...</td>\n",
" <td>61</td>\n",
" <td>iris</td>\n",
" <td>predictive_accuracy</td>\n",
" <td>2017-09-02 22:06:32</td>\n",
" <td>1</td>\n",
" <td>janvanrijn@gmail.com</td>\n",
" <td>0.920000</td>\n",
" <td>None</td>\n",
" <td>None</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" run_id task_id setup_id flow_id \\\n",
"144 1849043 59 29015 5500 \n",
"145 1853409 59 30950 5873 \n",
"146 6130126 59 4163633 7108 \n",
"147 6130128 59 4163634 7108 \n",
"190 6946499 59 4978397 7109 \n",
"\n",
" flow_name data_id data_name \\\n",
"144 sklearn.ensemble.forest.RandomForestClassifier... 61 iris \n",
"145 sklearn.pipeline.Pipeline(Imputer=openml.utils... 61 iris \n",
"146 sklearn.model_selection._search.RandomizedSear... 61 iris \n",
"147 sklearn.model_selection._search.RandomizedSear... 61 iris \n",
"190 sklearn.pipeline.Pipeline(imputation=openmlstu... 61 iris \n",
"\n",
" function upload_time uploader uploader_name \\\n",
"144 predictive_accuracy 2017-03-03 17:10:12 1 janvanrijn@gmail.com \n",
"145 predictive_accuracy 2017-03-21 22:08:01 1 janvanrijn@gmail.com \n",
"146 predictive_accuracy 2017-08-21 11:07:40 1 janvanrijn@gmail.com \n",
"147 predictive_accuracy 2017-08-21 11:08:06 1 janvanrijn@gmail.com \n",
"190 predictive_accuracy 2017-09-02 22:06:32 1 janvanrijn@gmail.com \n",
"\n",
" value values array_data \n",
"144 0.946667 None None \n",
"145 0.960000 None None \n",
"146 0.960000 None None \n",
"147 0.946667 None None \n",
"190 0.920000 None None "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Filtering based on Random Forest\n",
"task_rf = task_df[task_df['flow_name'].str.contains(\"RandomForest\")]\n",
"task_rf.head()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment