Skip to content

Instantly share code, notes, and snippets.

@khalido
Last active December 6, 2019 05:07
Show Gist options
  • Save khalido/be9080e68f6b2282f6415f8db6ae3a05 to your computer and use it in GitHub Desktop.
Save khalido/be9080e68f6b2282f6415f8db6ae3a05 to your computer and use it in GitHub Desktop.
[pandas tips] common things to do in pandas which i always have to look up
# take a single col from your df and drop the nulls
s = df['some_col'].dropna()
# this should return an expanded df from the col of dicts:
expanded_df = s.apply(lambda row: pandas.Series(row))
# then merge the two dataframes - pandas will put NaNs for the missing values in the expanded df
pd.concat([data1, expanded_df], axis=1)
# https://chrisalbon.com/python/data_wrangling/pandas_map_values_to_values/
# dict mapping old val to current val
map_old_vals_to_current = {"LC6B": "LE7",
"TQ12": "LE7" }
# change old col values to current ones in a given col
# can also apply to whole dataframe
df["col"] = df["col"].replace(map_old_vals_to_current)
"""
See full options at https://pandas.pydata.org/pandas-docs/stable/user_guide/options.html
"""
pd.options.display.precision = 2 # default is 6, but this is easier to see
from datetime import datetime
def time_string(time_string: str = "%m-%d-%y_%H-%M-%S"):
now = datetime.now() # current date and time
return now.strftime(time_string)
def save_to_file(df, fn="flagged", sheet="Flagged"):
"""takes in df, filename and sheetname and writes to disk"""
# add a check to see if the excel file already exists, add a sheet to it if so
time_now = time_string() # get current time as string
fn = f"{fn}_{time_now}.xlsx"
writer = pd.ExcelWriter(output / fn, engine="xlsxwriter")
# can save multiple dataframes to different sheets in the same file
df.to_excel(writer, sheet_name=sheet)
#format the excel sheets
wb = writer.book
# declaring various text formatting types, these have to be applied to a col to take effect
fmt = wb.add_format({"text_wrap": True})
pct_fmt = wb.add_format({"num_format": "0.0%"}) # percentage
num_fmt = wb.add_format({"num_format": "#,##0"}) # add comma to thousands
ws = writer.sheets[sheet]
ws.set_column('A:A', 30) # only set the col width here, didn't apply a format
ws.set_column("B:F", None, num_fmt) # applied num_fmt to a range of cols
writer.save()
return fn
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment