Forked from georgerichardson/standardise_column_names.py
Last active
February 24, 2020 21:04
-
-
Save wkarney/57c6921651851d3514744058f30d049f to your computer and use it in GitHub Desktop.
[Standardizing Column Names for pandas] Make DataFrame column names lowercase and replace whitespace (and punctuation) with '_' #python #pandas
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import re | |
import string | |
def standardise_column_names(df, remove_punct=True): | |
""" Converts all DataFrame column names to lower case replacing | |
whitespace of any length with a single underscore. Can also strip | |
all punctuation from column names. | |
Parameters | |
---------- | |
df: pandas.DataFrame | |
DataFrame with non-standardised column names. | |
remove_punct: bool (default True) | |
If True will remove all punctuation from column names. | |
Returns | |
------- | |
df: pandas.DataFrame | |
DataFrame with standardised column names. | |
Example | |
------- | |
>>> df = pd.DataFrame({'Column With Spaces': [1,2,3,4,5], | |
'Column-With-Hyphens&Others/': [6,7,8,9,10], | |
'Too Many Spaces': [11,12,13,14,15], | |
}) | |
>>> df = standardise_column_names(df) | |
>>> print(df.columns) | |
Index(['column_with_spaces', | |
'column_with_hyphens_others', | |
'too_many_spaces'], dtype='object') | |
""" | |
translator = str.maketrans(string.punctuation, ' '*len(string.punctuation)) | |
for c in df.columns: | |
c_mod = c.lower() | |
if remove_punct: | |
c_mod = c_mod.translate(translator) | |
c_mod = '_'.join(c_mod.split(' ')) | |
if c_mod[-1] == '_': | |
c_mod = c_mod[:-1] | |
c_mod = re.sub(r'\_+', '_', c_mod) | |
df.rename({c: c_mod}, inplace=True, axis=1) | |
return df |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment