nickjevershed/pandas-cheatsheat.py

## pandas-cheatsheat.py
#calculate percentage of grouped items

df.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

#groupby week, assuming day/month/year date order

df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date')
df.resample('w', how='count').to_csv('values.csv')

#transpose a column series to column headers

df.pivot(index='year', columns='key', values='count')

#group by year

dg = df.groupby([lambda x: x.year])

# new column from a string split on existing column, eg for financial years

df['end-year'] = df['year'].str.split("-").str.get(1)

# adding 'missing' values to a groupby object with a multiindex, eg for graphing the output from grouping on two or more columns

index1 = ['dogs','cats','birds']
index2 = [1,2,3,4,5]
combined = []
combined.append(index1)
combined.append(index2)

multi = pd.MultiIndex.from_product(combined, names=['animals', 'numbers'])

df = pd.read_csv('things.csv')
dg = df.groupby(['animals','numbers'])
df2 = dg.sum()
df3 = df2.reindex(multi, fill_value=0)

# Creating a JSON file with an integer converted to string as the key

df['id'] = df['id'].astype(basestring)
df = df.set_index(['id'])
df.to_json('test.json',orient='index')
	#calculate percentage of grouped items

	df.groupby(level=0).apply(lambda x: 100*x/float(x.sum()))

	#groupby week, assuming day/month/year date order

	df = pd.read_csv('values.csv', parse_dates='date', dayfirst='true', index_col='date')
	df.resample('w', how='count').to_csv('values.csv')

	#transpose a column series to column headers

	df.pivot(index='year', columns='key', values='count')

	#group by year

	dg = df.groupby([lambda x: x.year])

	# new column from a string split on existing column, eg for financial years

	df['end-year'] = df['year'].str.split("-").str.get(1)

	# adding 'missing' values to a groupby object with a multiindex, eg for graphing the output from grouping on two or more columns

	index1 = ['dogs','cats','birds']
	index2 = [1,2,3,4,5]
	combined = []
	combined.append(index1)
	combined.append(index2)

	multi = pd.MultiIndex.from_product(combined, names=['animals', 'numbers'])

	df = pd.read_csv('things.csv')
	dg = df.groupby(['animals','numbers'])
	df2 = dg.sum()
	df3 = df2.reindex(multi, fill_value=0)

	# Creating a JSON file with an integer converted to string as the key

	df['id'] = df['id'].astype(basestring)
	df = df.set_index(['id'])
	df.to_json('test.json',orient='index')