mwaskom/mcfrank_analysis.py

## mcfrank_analysis.py
import pandas as pd
import seaborn as sns
imoprt statsmodels.api as sm

#### 1. read in data
#d <- read.csv("data/all_data.csv")

d = pd.read_csv("data/all_data.csv")

#### 2. aggregate for each subject and then across subjects
#mss <- aggregate(side ~ subid + agegroup + corr.side + condition,
#                 data = d, mean)
#ms <- aggregate(side ~ agegroup + corr.side + condition,
#                data = mss, mean)

ms = d.groupby(["agegroup", "corr_sid", "condition"]).side.mean().reset_index()
mss = ms.groupyby("subid").side.mean().reset_index()

#### 3. plot
#qplot(agegroup, side, colour = corr.side,
#      facets = .~condition,
#      group = corr.side,
#      geom = "line",
#      data = ms)

sns.factorplot("agegroup", "side", "corr_side", col="condition", data=ms, kind="point")

#### 4. linear mixed-effects model
#lm.all <- glmer(side ~ condition * corr.side * age +
#                (corr.side | subid),
#                data = kids, family = "binomial")

# Womp womp, not in Python yet.
# But there is currently a PR in statsmodels with mixed effects regression.
# At this point I would use the IPython rmagic function to run glmer in an R cell
# with very little interruption to the workflow
	import pandas as pd
	import seaborn as sns
	imoprt statsmodels.api as sm

	#### 1. read in data
	#d <- read.csv("data/all_data.csv")

	d = pd.read_csv("data/all_data.csv")

	#### 2. aggregate for each subject and then across subjects
	#mss <- aggregate(side ~ subid + agegroup + corr.side + condition,
	# data = d, mean)
	#ms <- aggregate(side ~ agegroup + corr.side + condition,
	# data = mss, mean)

	ms = d.groupby(["agegroup", "corr_sid", "condition"]).side.mean().reset_index()
	mss = ms.groupyby("subid").side.mean().reset_index()

	#### 3. plot
	#qplot(agegroup, side, colour = corr.side,
	# facets = .~condition,
	# group = corr.side,
	# geom = "line",
	# data = ms)

	sns.factorplot("agegroup", "side", "corr_side", col="condition", data=ms, kind="point")

	#### 4. linear mixed-effects model
	#lm.all <- glmer(side ~ condition * corr.side * age +
	# (corr.side \| subid),
	# data = kids, family = "binomial")

	# Womp womp, not in Python yet.
	# But there is currently a PR in statsmodels with mixed effects regression.
	# At this point I would use the IPython rmagic function to run glmer in an R cell
	# with very little interruption to the workflow