Last active
July 25, 2016 18:08
-
-
Save sjstebbins/0f0ef7829825ddb41f1b1577218268ff to your computer and use it in GitHub Desktop.
average student loan debt and percentage of students with debt plots
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
state_college_data <- read.csv("https://raw.githubusercontent.com/sjstebbins/ExploratoryDataViz/master/data/CollegeInSight_Explore.csv", stringsAsFactors = FALSE) | |
#parse state names | |
state_college_data$Name = gsub(' - 4-year or above', '', state_college_data$Name) | |
#rename Year column to Date and Name to State | |
state_college_data <- rename(state_college_data, Date=Year, State=Name) | |
#convert date to yearformat | |
state_college_data$Date = substr(state_college_data$Date,0, nchar(state_college_data$Date) -3) | |
#convert all NA in order to summarize on year | |
NAs <- state_data == "N/A" | |
state_data[NAs] <- NA | |
#convert appropriate to numerics | |
state_data[,3:11] <- as.numeric(unlist(state_data[,3:11])) | |
state_data <- select(state_data, -State) | |
#group by on year and summarize means | |
data <- state_data %>% group_by(Date) %>% summarise_each(funs(mean(., na.rm = TRUE))) | |
##------------------------------------- | |
##PLOT 2 - Average Student Loan Debt | |
##------------------------------------- | |
#select columns from main dataframe | |
average_debt <- select(data,Date, Average.debt.of.graduates) | |
#melt to single column | |
average_debt<- melt(average_debt,id = 'Date', value.name='Amount.in.Thousands') | |
#plot | |
ggplot(average_debt,aes(x=Date,y=Amount.in.Thousands,group=variable,color=variable)) + geom_line() + ggtitle('Average Student Loan Debt') + theme_fivethirtyeight()+ theme(legend.title=element_blank()) + theme(axis.title = element_text(), axis.title.x = element_blank()) + ylab('Dollars') +theme(legend.position="none") | |
##------------------------------------- | |
##PLOT 3 - Percent of Student with Debt | |
##------------------------------------- | |
#select columns from main dataframe | |
percent_student_with_debt <- select(data,Date, Percent.of.graduates.with.debt) | |
melt to single column | |
percent_student_with_debt <- melt(percent_student_with_debt ,id = 'Date', value.name='Percent') | |
#convert percent to readable integer | |
percent_student_with_debt$Percent <- as.numeric(percent_student_with_debt$Percent) * 100 | |
#plot | |
ggplot(percent_student_with_debt , aes(x=Date,y=Percent,group=variable,color=variable)) + geom_line() + ggtitle('Percent of Students with Debt')+ theme_fivethirtyeight()+ theme(legend.title=element_blank()) + theme(axis.title = element_text(), axis.title.x = element_blank()) + ylab('Percent') + theme(legend.position="none") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment