Last active
December 25, 2015 11:29
-
-
Save chelsimoy/6969212 to your computer and use it in GitHub Desktop.
Using R to chart the comparison between North Dakota's unemployment rate and the national average between 2000 and 2013... this orders, subsets, changes arrays to values (gets rid of the comma in the dataset) and plots lines.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("~/dataviz-fall-2013/breaking-exercise") | |
data <- read.csv("ssamatab2.csv") | |
// Creates and orders unemployment rates for 2013 -- Discovers highest and lowest locations are AZ and ND// | |
Year2013 <- subset(data, Year == "2013") | |
Year2013 <- Year2013[order(Year2013$Unemployment.Rate, decreasing=T),] | |
//Want to chart unemployment rates for only North Dakota between 2000 and 2013. This targets North Dakota specifically and puts it in order by year // | |
ND <- subset(data, Area == "Bismarck, ND MSA") | |
ND <- ND[order(ND$Year, decreasing=T),] | |
plot(ND$Year, ND$Unemployment.Rate, type="l", col="red") | |
//But the graph looks weird. The x-axis needs to be in months rather than years// | |
ND$monthindex <- 1:nrow(ND) | |
abline(v=seq(1,163, 12)) | |
plot(ND$monthindex, ND$Unemployment.Rate, type="l", col="red") | |
//Ok, now we want to compare ND's unemployment rate to the national average. We need to aggregate the columns. Here we add up the labor force for all cities// | |
aggregate(data$Civilian.Labor.Force, list(data$Year), sum) | |
//But we need the labor force numbers as a value, not an array. We takeout the comma in the number// | |
as.numeric(gsub(",","", data$Civilian.Labor.Force)) | |
data$labor_force <- as.numeric(gsub(",","", data$Civilian.Labor.Force)) | |
//Ok, let's try this again..adding up both month and year// | |
> aggregate(data$labor_force, list(data$Year, data$Month), sum) | |
//Now let's do the same thing for the unemployment numbers...cleaning up the numbers// | |
data$num_unemployment <- as.numeric(gsub(",","", data$Unemployment)) | |
aggregate(data$num_unemployment, list(data$Year, data$Month), sum) | |
aggregate(data$num_unemployment) | |
//Now let's add up the labor force and unemployed people columns and divide// | |
data[,c("labor_force", "num_unemployment")] | |
us_avg <- aggregate(data[,c("labor_force", "num_unemployment")], list(data$Year, data$Month), sum) | |
us_avg$rate <- us_avg$num_unemployment / us_avg$labor_force | |
plot(us_avg$Group.1, us_avg$rate, ylab="Unemployment Rate", xlab="Year", col="red", type="l") | |
//Let's put the data in order// | |
us_avg <- us_avg[order(us_avg$Group.1),] | |
//ND is in months and us_avg is in years...so let's put them both in months so our x-axis is the same// | |
us_avg$month_index <- 1:nrow(us_avg) | |
//And lastly, let's fix our yaxis// | |
us_avg$rate <- us_avg$rate * 100 | |
//Now let's plot!// | |
plot(us_avg$month_index, us_avg$rate, ylab="Unemployment Rate", xlab="Month", col="red", type="l", ylim=c(2,10)) | |
lines(ND$monthindex, ND$Unemployment.Rate, col="blue", lwd=2) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment