Skip to content

Instantly share code, notes, and snippets.

@Neeratyoy
Created October 23, 2019 12:06
Show Gist options
  • Save Neeratyoy/38064228dc44d481908b448850db414b to your computer and use it in GitHub Desktop.
Save Neeratyoy/38064228dc44d481908b448850db414b to your computer and use it in GitHub Desktop.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import openml\n",
"\n",
"import numpy as np\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(2958, 16)\n",
"did\n",
"name\n",
"version\n",
"uploader\n",
"status\n",
"format\n",
"MajorityClassSize\n",
"MaxNominalAttDistinctValues\n",
"MinorityClassSize\n",
"NumberOfClasses\n",
"NumberOfFeatures\n",
"NumberOfInstances\n",
"NumberOfInstancesWithMissingValues\n",
"NumberOfMissingValues\n",
"NumberOfNumericFeatures\n",
"NumberOfSymbolicFeatures\n"
]
}
],
"source": [
"# Fetching the list of all available datasets on OpenML\n",
"d = openml.datasets.list_datasets(output_format='dataframe')\n",
"print(d.shape)\n",
"\n",
"# Listing column names or attributes that OpenML offers\n",
"for name in d.columns:\n",
" print(name)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" did name version uploader status format MajorityClassSize \\\n",
"2 2 anneal 1 1 active ARFF 684.0 \n",
"3 3 kr-vs-kp 1 1 active ARFF 1669.0 \n",
"4 4 labor 1 1 active ARFF 37.0 \n",
"5 5 arrhythmia 1 1 active ARFF 245.0 \n",
"6 6 letter 1 1 active ARFF 813.0 \n",
"\n",
" MaxNominalAttDistinctValues MinorityClassSize NumberOfClasses \\\n",
"2 7.0 8.0 5.0 \n",
"3 3.0 1527.0 2.0 \n",
"4 3.0 20.0 2.0 \n",
"5 13.0 2.0 13.0 \n",
"6 26.0 734.0 26.0 \n",
"\n",
" NumberOfFeatures NumberOfInstances NumberOfInstancesWithMissingValues \\\n",
"2 39.0 898.0 898.0 \n",
"3 37.0 3196.0 0.0 \n",
"4 17.0 57.0 56.0 \n",
"5 280.0 452.0 384.0 \n",
"6 17.0 20000.0 0.0 \n",
"\n",
" NumberOfMissingValues NumberOfNumericFeatures NumberOfSymbolicFeatures \n",
"2 22175.0 6.0 33.0 \n",
"3 0.0 0.0 37.0 \n",
"4 326.0 8.0 9.0 \n",
"5 408.0 206.0 74.0 \n",
"6 0.0 16.0 1.0 \n"
]
}
],
"source": [
"print(d.head())"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.8"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment