chelsimoy/Unemployment rates

## Unemployment rates
setwd("~/dataviz-fall-2013/breaking-exercise")
data <- read.csv("ssamatab2.csv")

// Creates and orders unemployment rates for 2013 -- Discovers highest and lowest locations are AZ and ND//
Year2013 <- subset(data, Year == "2013")
Year2013 <- Year2013[order(Year2013$Unemployment.Rate, decreasing=T),]

//Want to chart unemployment rates for only North Dakota between 2000 and 2013. This targets North Dakota specifically and puts it in order by year //
ND <- subset(data, Area == "Bismarck, ND MSA")
ND <- ND[order(ND$Year, decreasing=T),]

plot(ND$Year, ND$Unemployment.Rate, type="l", col="red")

//But the graph looks weird. The x-axis needs to be in months rather than years//
ND$monthindex <- 1:nrow(ND)
abline(v=seq(1,163, 12))
plot(ND$monthindex, ND$Unemployment.Rate, type="l", col="red")

//Ok, now we want to compare ND's unemployment rate to the national average. We need to aggregate the columns. Here we add up the labor force for all cities//

aggregate(data$Civilian.Labor.Force, list(data$Year), sum)

//But we need the labor force numbers as a value, not an array. We takeout the comma in the number//
as.numeric(gsub(",","", data$Civilian.Labor.Force))
data$labor_force <- as.numeric(gsub(",","", data$Civilian.Labor.Force))

//Ok, let's try this again..adding up both month and year//
> aggregate(data$labor_force, list(data$Year, data$Month), sum)

//Now let's do the same thing for the unemployment numbers...cleaning up the numbers//
data$num_unemployment <- as.numeric(gsub(",","", data$Unemployment))
aggregate(data$num_unemployment, list(data$Year, data$Month), sum)
aggregate(data$num_unemployment)

//Now let's add up the labor force and unemployed people columns and divide//
data[,c("labor_force", "num_unemployment")]
us_avg <- aggregate(data[,c("labor_force", "num_unemployment")], list(data$Year, data$Month), sum)
us_avg$rate <- us_avg$num_unemployment / us_avg$labor_force

plot(us_avg$Group.1, us_avg$rate, ylab="Unemployment Rate", xlab="Year", col="red", type="l")

//Let's put the data in order//
us_avg <- us_avg[order(us_avg$Group.1),]


//ND is in months and us_avg is in years...so let's put them both in months so our x-axis is the same//
us_avg$month_index <- 1:nrow(us_avg)


//And lastly, let's fix our yaxis//
us_avg$rate <- us_avg$rate * 100

//Now let's plot!//
plot(us_avg$month_index, us_avg$rate, ylab="Unemployment Rate", xlab="Month", col="red", type="l", ylim=c(2,10))
lines(ND$monthindex, ND$Unemployment.Rate, col="blue", lwd=2)
	setwd("~/dataviz-fall-2013/breaking-exercise")
	data <- read.csv("ssamatab2.csv")

	// Creates and orders unemployment rates for 2013 -- Discovers highest and lowest locations are AZ and ND//
	Year2013 <- subset(data, Year == "2013")
	Year2013 <- Year2013[order(Year2013$Unemployment.Rate, decreasing=T),]

	//Want to chart unemployment rates for only North Dakota between 2000 and 2013. This targets North Dakota specifically and puts it in order by year //
	ND <- subset(data, Area == "Bismarck, ND MSA")
	ND <- ND[order(ND$Year, decreasing=T),]

	plot(ND$Year, ND$Unemployment.Rate, type="l", col="red")

	//But the graph looks weird. The x-axis needs to be in months rather than years//
	ND$monthindex <- 1:nrow(ND)
	abline(v=seq(1,163, 12))
	plot(ND$monthindex, ND$Unemployment.Rate, type="l", col="red")

	//Ok, now we want to compare ND's unemployment rate to the national average. We need to aggregate the columns. Here we add up the labor force for all cities//

	aggregate(data$Civilian.Labor.Force, list(data$Year), sum)

	//But we need the labor force numbers as a value, not an array. We takeout the comma in the number//
	as.numeric(gsub(",","", data$Civilian.Labor.Force))
	data$labor_force <- as.numeric(gsub(",","", data$Civilian.Labor.Force))

	//Ok, let's try this again..adding up both month and year//
	> aggregate(data$labor_force, list(data$Year, data$Month), sum)

	//Now let's do the same thing for the unemployment numbers...cleaning up the numbers//
	data$num_unemployment <- as.numeric(gsub(",","", data$Unemployment))
	aggregate(data$num_unemployment, list(data$Year, data$Month), sum)
	aggregate(data$num_unemployment)

	//Now let's add up the labor force and unemployed people columns and divide//
	data[,c("labor_force", "num_unemployment")]
	us_avg <- aggregate(data[,c("labor_force", "num_unemployment")], list(data$Year, data$Month), sum)
	us_avg$rate <- us_avg$num_unemployment / us_avg$labor_force

	plot(us_avg$Group.1, us_avg$rate, ylab="Unemployment Rate", xlab="Year", col="red", type="l")

	//Let's put the data in order//
	us_avg <- us_avg[order(us_avg$Group.1),]


	//ND is in months and us_avg is in years...so let's put them both in months so our x-axis is the same//
	us_avg$month_index <- 1:nrow(us_avg)


	//And lastly, let's fix our yaxis//
	us_avg$rate <- us_avg$rate * 100

	//Now let's plot!//
	plot(us_avg$month_index, us_avg$rate, ylab="Unemployment Rate", xlab="Month", col="red", type="l", ylim=c(2,10))
	lines(ND$monthindex, ND$Unemployment.Rate, col="blue", lwd=2)