Last active
August 15, 2018 19:47
-
-
Save vallantin/b6bf35de026ccb27f7c11cb3678040b7 to your computer and use it in GitHub Desktop.
Given a data frame, replace all values on a column by 'Other' except the 2 most frequent ones
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"cells": [ | |
{ | |
"cell_type": "code", | |
"execution_count": 40, | |
"metadata": {}, | |
"outputs": [ | |
{ | |
"data": { | |
"text/html": [ | |
"<div>\n", | |
"<style scoped>\n", | |
" .dataframe tbody tr th:only-of-type {\n", | |
" vertical-align: middle;\n", | |
" }\n", | |
"\n", | |
" .dataframe tbody tr th {\n", | |
" vertical-align: top;\n", | |
" }\n", | |
"\n", | |
" .dataframe thead th {\n", | |
" text-align: right;\n", | |
" }\n", | |
"</style>\n", | |
"<table border=\"1\" class=\"dataframe\">\n", | |
" <thead>\n", | |
" <tr style=\"text-align: right;\">\n", | |
" <th></th>\n", | |
" <th>person</th>\n", | |
" <th>favorite color</th>\n", | |
" </tr>\n", | |
" </thead>\n", | |
" <tbody>\n", | |
" <tr>\n", | |
" <th>0</th>\n", | |
" <td>Julia</td>\n", | |
" <td>blue</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>1</th>\n", | |
" <td>other</td>\n", | |
" <td>other</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>2</th>\n", | |
" <td>other</td>\n", | |
" <td>other</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>3</th>\n", | |
" <td>other</td>\n", | |
" <td>other</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>4</th>\n", | |
" <td>Cristina</td>\n", | |
" <td>red</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>5</th>\n", | |
" <td>John</td>\n", | |
" <td>red</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>6</th>\n", | |
" <td>Mary</td>\n", | |
" <td>blue</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>7</th>\n", | |
" <td>other</td>\n", | |
" <td>other</td>\n", | |
" </tr>\n", | |
" <tr>\n", | |
" <th>8</th>\n", | |
" <td>other</td>\n", | |
" <td>other</td>\n", | |
" </tr>\n", | |
" </tbody>\n", | |
"</table>\n", | |
"</div>" | |
], | |
"text/plain": [ | |
" person favorite color\n", | |
"0 Julia blue\n", | |
"1 other other\n", | |
"2 other other\n", | |
"3 other other\n", | |
"4 Cristina red\n", | |
"5 John red\n", | |
"6 Mary blue\n", | |
"7 other other\n", | |
"8 other other" | |
] | |
}, | |
"execution_count": 40, | |
"metadata": {}, | |
"output_type": "execute_result" | |
} | |
], | |
"source": [ | |
"import pandas as pd\n", | |
"\n", | |
"infos = {\n", | |
" 'person' : ['Julia', 'Mario', 'Christian', 'Cesar', 'Cristina', 'John', 'Mary', 'Joseph', 'Marcos'],\n", | |
" 'favorite color' : ['blue','green','yellow','purple','red','red','blue','black','white']\n", | |
"}\n", | |
"\n", | |
"df = pd.DataFrame(infos)\n", | |
"frequency = df['favorite color'].value_counts()\n", | |
"df[~df['favorite color'].isin(frequency.index[:2])] = 'other'\n", | |
"df" | |
] | |
} | |
], | |
"metadata": { | |
"kernelspec": { | |
"display_name": "Python 3", | |
"language": "python", | |
"name": "python3" | |
}, | |
"language_info": { | |
"codemirror_mode": { | |
"name": "ipython", | |
"version": 3 | |
}, | |
"file_extension": ".py", | |
"mimetype": "text/x-python", | |
"name": "python", | |
"nbconvert_exporter": "python", | |
"pygments_lexer": "ipython3", | |
"version": "3.6.5" | |
} | |
}, | |
"nbformat": 4, | |
"nbformat_minor": 2 | |
} |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment