Created
October 16, 2013 20:55
-
-
Save juliemariebrown/7014698 to your computer and use it in GitHub Desktop.
DataViz Homework with Angela and Mihir, 10/16/2013
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
//Assign a variable to your data set | |
data <- read.csv("ssamatab1.csv") | |
//Assign a variable to the California subset (or whatever subset you're looking at) | |
ca <- subset(data, data$ST.FIPS.Code == 06) | |
//aggregate data (whatever that means...), in this case California's total labor by year | |
ca_workforce_by_year <- aggregate(ca$Civilian.Labor.Force, list(ca$Year), sum) | |
names(ca_workforce_by_year)[1] <- "Year" | |
names(ca_workforce_by_year)[2] <- "ca_total_workforce" | |
//aggregate data again, in this case California's total unemployment by year | |
ca_unemployment_by_year <- aggregate(ca$Unemployment, list(ca$Year), sum) | |
//rename the columns | |
names(ca_unemployment_by_year)[1] <- "Year" | |
names(ca_unemployment_by_year)[2] <-"ca_total_unemployment" | |
//now merge those data sets you just aggregated by their common variable, which in this case is the year. And assign it the variable, California Unemployment Rate | |
ca_unemployment_rate <- merge(ca_workforce_by_year, ca_unemployment_by_year, by="Year") | |
//Now you can find the average, by dividing the unemployment rate by the total labor force. | |
ca_unemployment_rate$rate <- ca_unemployment_rate$ca_total_workforce/ca_unemployment_rate$Year | |
//Question - This worked when I did it, but I'm not sure why we divided workforce by year. Shouldn't we have divided ca_unemployment_by_year by ca_workforce_by_year? | |
//Plot your graph | |
plot(ca_unemployment_rate$rate, ca_unemployment_rate$Year, type = "l", ylim=c(0,max(ca_unemployment_rate$rate))) | |
//Now you want to compare your subset against the entire data set to give it context. So do the same thing as above, but for the entire data set. | |
//First, aggregate the data to find the nation's total labor force | |
national_labor <- aggregate(data$Civilian.Labor.Force, list(data$Year), sum) | |
//Assign names to columns | |
names(national_labor)[1] <- "Year" | |
names(national_labor)[2] <- "Total_National_Labor" | |
//Aggregate the data to find the nation's total unemployment | |
national_unemployment <- aggregate(data$Unemployment, list(data$Year), sum) | |
names(national_unemployment)[2] <- "Total_National_Unemployment" | |
names(national_unemployment)[1] <- "Year" | |
//Merge the two national data that we just aggregated by their common variable, which is Year | |
national_unemployment_rate <- merge(national_labor, national_unemployment, by="Year") | |
//And to find the rate as it changes by year, divide Unemployment by labor | |
national_unemployment_rate$rate <- national_unemployment_rate$Total_National_Unemployment/national_unemployment_rate$Total_National_Labor | |
//Now you can plot the national unemployment rate | |
plot(national_unemployment_rate$rate) | |
//And now you can plot the California rate against the National rate | |
plot(ca_unemployment_rate$Year, ca_unemployment_rate$rate, type="l", ylim=c(0,max(ca_unemployment_rate$rate)), col="red") | |
lines(national_unemployment_rate$Year, national_unemployment_rate$rate, type="l",col="blue") | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment