Skip to content

Instantly share code, notes, and snippets.

@ghl3
Created April 20, 2014 00:57
Show Gist options
  • Save ghl3/11102044 to your computer and use it in GitHub Desktop.
Save ghl3/11102044 to your computer and use it in GitHub Desktop.
Convert nominal (string, object) features in a pandas dataframe to integers
import pandas as pd
def get_nominal_integer_dict(nominal_vals):
d = {}
for val in nominal_vals:
if val not in d:
current_max = max(d.values()) if len(d) > 0 else -1
d[val] = current_max+1
return d
def convert_to_integer(srs):
d = get_nominal_integer_dict(srs)
return srs.map(lambda x: d[x])
def convert_strings_to_integer(df):
ret = pd.DataFrame()
for column_name in df:
column = df[column_name]
if column.dtype=='string' or column.dtype=='object':
ret[column_name] = convert_to_integer(column)
else:
ret[column_name] = column
return ret
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment