Last active
April 25, 2017 00:19
-
-
Save nickjevershed/f7c0ec1747470a0147b3 to your computer and use it in GitHub Desktop.
PANDAS cheatsheat
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#calculate percentage of grouped items | |
df.groupby(level=0).apply(lambda x: 100*x/float(x.sum())) | |
#groupby week, assuming day/month/year date order | |
df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date') | |
df.resample('w', how='count').to_csv('values.csv') | |
#transpose a column series to column headers | |
df.pivot(index='year', columns='key', values='count') | |
#group by year | |
dg = df.groupby([lambda x: x.year]) | |
# new column from a string split on existing column, eg for financial years | |
df['end-year'] = df['year'].str.split("-").str.get(1) | |
# adding 'missing' values to a groupby object with a multiindex, eg for graphing the output from grouping on two or more columns | |
index1 = ['dogs','cats','birds'] | |
index2 = [1,2,3,4,5] | |
combined = [] | |
combined.append(index1) | |
combined.append(index2) | |
multi = pd.MultiIndex.from_product(combined, names=['animals', 'numbers']) | |
df = pd.read_csv('things.csv') | |
dg = df.groupby(['animals','numbers']) | |
df2 = dg.sum() | |
df3 = df2.reindex(multi, fill_value=0) | |
# Creating a JSON file with an integer converted to string as the key | |
df['id'] = df['id'].astype(basestring) | |
df = df.set_index(['id']) | |
df.to_json('test.json',orient='index') | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment