Skip to content

Instantly share code, notes, and snippets.

@hadley hadley/phsample.R
Created Jan 14, 2014

Embed
What would you like to do?
library(ggplot2)
library(plyr)
load("phsample.RData")
hhonly <- subset(dhus, TYPE == 1 & NP > 0)
filtered <- subset(hhonly, HINCP > 0)
# Do all variable creation in one step
expense_frame <- summarise(filtered,
np = NP, # household size
hinc = HINCP, # household income
hinc.log10 = log10(HINCP),
ocpip = OCPIP, # owner costs as % hinc
grpip = GRPIP, # gross rent % hinc
# A discretized version of household income, for later
hinc.bin = 10 ^ (round(hinc.log10, 2)),
# merge the owner and renter expense columns
living_expenses = ifelse(!is.na(ocpip), ocpip,
ifelse(!is.na(grpip),
grpip, NA)),
# cost of living (housing) as % household income
COL_pct = living_expenses,
# is the household living beyond its means?
beyond.means = (living_expenses > 30)
)
# remove households with no rent or living expenses
expense_frame <- subset(expense_frame, !is.na(living_expenses))
# Use ddply to compute summaries.
hinc_stats <- ddply(expense_frame, "hinc.bin", summarise,
n = length(np),
COL_pct = mean(COL_pct))
# Use scale_size_area() to scale point sizes
# Also add in a little alpha to ameliorate overplotting
ggplot(hinc_stats, aes(hinc.bin, COL_pct)) +
geom_point(aes(size = n), alpha = 1 / 3) +
geom_hline(aes(yintercept = 30), color = "red") +
scale_x_log10("Household Income",
breaks = 10^(2:6),
labels = format(10^(2:6), big.mark = ",", sci = F, trim = T),
# Due to buglet, need to log transform minor breaks
minor_breaks = log10(as.vector(outer(1:9, 10 ^ (1:6))))
) +
scale_y_continuous("Living expense as %income") +
scale_size_area()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
You can’t perform that action at this time.