A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
#List unique values in a DataFrame column | |
pd.unique(df.column_name.ravel()) | |
#Convert Series datatype to numeric, getting rid of any non-numeric values | |
df['col'] = df['col'].astype(str).convert_objects(convert_numeric=True) | |
#Grab DataFrame rows where column has certain values | |
valuelist = ['value1', 'value2', 'value3'] | |
df = df[df.column.isin(value_list)] |
A personal diary of DataFrame munging over the years.
Convert Series datatype to numeric (will error if column has non-numeric values)
(h/t @makmanalp)
obesity = read.csv( | |
'http://www.stat.berkeley.edu/classes/s133/data/obesity.csv', | |
stringsAsFactors = F | |
) | |
names(obesity) = gsub("\\.", "_", names(obesity)) | |
# add column with two letter state names and | |
obesity = plyr::mutate(obesity, | |
State = stringr::str_trim(State), | |
state = state.abb[match(State, state.name)], | |
) |