Skip to content

Instantly share code, notes, and snippets.

@kuharan
Last active January 14, 2019 06:17
Show Gist options
  • Save kuharan/6cfd461d5e20e8d086a73ecc11b353db to your computer and use it in GitHub Desktop.
Save kuharan/6cfd461d5e20e8d086a73ecc11b353db to your computer and use it in GitHub Desktop.
This gist provides sample code to generate random car data and use one hot encoding and re arrange the data frame so that the target remains in the end.
Display the source blob
Display the rendered blob
Raw
{
"cells": [
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import random"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"car_feed = ['Hatchback','Edan','SUV','Crossover','Coupe','Convertible']\n",
"camera_feed = ['high','low','medium'] \n",
"buy_feed = ['Yes','NO']"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"def create_list(feed, rows=100):\n",
" mylist = random.choices(feed, k=rows)\n",
" return mylist"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"RangeIndex: 10000 entries, 0 to 9999\n",
"Data columns (total 3 columns):\n",
"Car 10000 non-null object\n",
"Rear Camera Sensor 10000 non-null object\n",
"Buy 10000 non-null object\n",
"dtypes: object(3)\n",
"memory usage: 234.5+ KB\n"
]
}
],
"source": [
"ROWS=10000\n",
"car = {\n",
" 'Car': create_list(car_feed, ROWS),\n",
" 'Rear Camera Sensor' :create_list(camera_feed, ROWS),\n",
" 'Buy':create_list(buy_feed, ROWS)\n",
"}\n",
"\n",
"#make a dataframe\n",
"car_df = pd.DataFrame(data=car)\n",
"\n",
"#print the info\n",
"car_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Car\n",
"Convertible 1672\n",
"Coupe 1636\n",
"Crossover 1664\n",
"Edan 1741\n",
"Hatchback 1679\n",
"SUV 1608\n",
"dtype: int64\n"
]
}
],
"source": [
"# class distribution\n",
"print(car_df.groupby('Car').size())"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Car</th>\n",
" <th>Rear Camera Sensor</th>\n",
" <th>Buy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>Crossover</td>\n",
" <td>high</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>Hatchback</td>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>SUV</td>\n",
" <td>low</td>\n",
" <td>NO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>SUV</td>\n",
" <td>high</td>\n",
" <td>NO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>Edan</td>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Car Rear Camera Sensor Buy\n",
"0 Crossover high Yes\n",
"1 Hatchback medium Yes\n",
"2 SUV low NO\n",
"3 SUV high NO\n",
"4 Edan medium Yes"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"car_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Rear Camera Sensor</th>\n",
" <th>Buy</th>\n",
" <th>Car_Convertible</th>\n",
" <th>Car_Coupe</th>\n",
" <th>Car_Crossover</th>\n",
" <th>Car_Edan</th>\n",
" <th>Car_Hatchback</th>\n",
" <th>Car_SUV</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>high</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>low</td>\n",
" <td>NO</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>high</td>\n",
" <td>NO</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Rear Camera Sensor Buy Car_Convertible Car_Coupe Car_Crossover \\\n",
"0 high Yes 0 0 1 \n",
"1 medium Yes 0 0 0 \n",
"2 low NO 0 0 0 \n",
"3 high NO 0 0 0 \n",
"4 medium Yes 0 0 0 \n",
"\n",
" Car_Edan Car_Hatchback Car_SUV \n",
"0 0 0 0 \n",
"1 0 1 0 \n",
"2 0 0 1 \n",
"3 0 0 1 \n",
"4 1 0 0 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#one hot encoding\n",
"car_df_onehot = pd.get_dummies(car_df, columns=['Car'], prefix =['Car'])\n",
"cols_at_end = ['Rear Camera Sensor', 'Buy']\n",
"car_df_onehot.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>Car_Convertible</th>\n",
" <th>Car_Coupe</th>\n",
" <th>Car_Crossover</th>\n",
" <th>Car_Edan</th>\n",
" <th>Car_Hatchback</th>\n",
" <th>Car_SUV</th>\n",
" <th>Rear Camera Sensor</th>\n",
" <th>Buy</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>high</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>low</td>\n",
" <td>NO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>high</td>\n",
" <td>NO</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>medium</td>\n",
" <td>Yes</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" Car_Convertible Car_Coupe Car_Crossover Car_Edan Car_Hatchback \\\n",
"0 0 0 1 0 0 \n",
"1 0 0 0 0 1 \n",
"2 0 0 0 0 0 \n",
"3 0 0 0 0 0 \n",
"4 0 0 0 1 0 \n",
"\n",
" Car_SUV Rear Camera Sensor Buy \n",
"0 0 high Yes \n",
"1 0 medium Yes \n",
"2 1 low NO \n",
"3 1 high NO \n",
"4 0 medium Yes "
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"#Re arrange using list comprehension\n",
"car_df_onehot = car_df_onehot[[c for c in car_df_onehot if c not in cols_at_end] + [c for c in cols_at_end if c in car_df_onehot]]\n",
"car_df_onehot.head()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment