Sebastian dataninjato

## lambda_comprehensions.py
# Convert function to lambda expr
l1 = lambda x, y: x if x > y else y

# dict comprehension
l2 = lambda s: { char:s.count(char) for char in s}

# numbers tuple fed into list comprehsion
l3 = lambda *nums: math.sqrt(sum([n**2 for n in nums]))

# multiple conditions in dict comprehension

## proportion_effectsize.py
from statmodels.stats.power import tt_ind_solve_power
from statsmodels.stats.proportion import proportion_effectsize

es = proportion_effectsize(0.03, 0.0315)
n = tt_ind_solve_power(effect_size=es, ratio=1, power=0.8, alpha=0.05)

# from https://speakerdeck.com/nneu/b-testing-a-bayesian-approach?slide=36

## pandas.py
# Subset for categories with at least 250 apps
large_categories = apps_with_size_and_rating_present.groupby('Category').filter(lambda x: len(x) >= 250)

# Thus, it filters out only rows that doesn't have NaN values in 'name' column.
filtered_df = df[df['name'].notnull()]

# Multi Merge DFs
ridership_cal_stations = ridership.merge(cal, on=['year','month','day']).merge(stations, on='station_id')

# Group by ward, pop_2010, and vacant, then count the # of accounts
	# Convert function to lambda expr
	l1 = lambda x, y: x if x > y else y

	# dict comprehension
	l2 = lambda s: { char:s.count(char) for char in s}

	# numbers tuple fed into list comprehsion
	l3 = lambda nums: math.sqrt(sum([n*2 for n in nums]))

	# multiple conditions in dict comprehension
	from statmodels.stats.power import tt_ind_solve_power
	from statsmodels.stats.proportion import proportion_effectsize

	es = proportion_effectsize(0.03, 0.0315)
	n = tt_ind_solve_power(effect_size=es, ratio=1, power=0.8, alpha=0.05)

	# from https://speakerdeck.com/nneu/b-testing-a-bayesian-approach?slide=36
	# Subset for categories with at least 250 apps
	large_categories = apps_with_size_and_rating_present.groupby('Category').filter(lambda x: len(x) >= 250)

	# Thus, it filters out only rows that doesn't have NaN values in 'name' column.
	filtered_df = df[df['name'].notnull()]

	# Multi Merge DFs
	ridership_cal_stations = ridership.merge(cal, on=['year','month','day']).merge(stations, on='station_id')

	# Group by ward, pop_2010, and vacant, then count the # of accounts