Created
December 10, 2014 16:24
-
-
Save justgrimes/07079a03c66f2ad054d3 to your computer and use it in GitHub Desktop.
Introduction to R - SDC Annual Meeting 2014
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#SDC Annual Meeting 2014 Introduction to R, R Studio, and Statistical Computing | |
# Load downloaded data from PLS 2012 survey, CSV format (comma seperated value) | |
pupld12a_csv <- read.csv("pupld12a_csv/pupld12a.csv", header = TRUE, sep=",", quote = "\"") | |
puout12a_csv <- read.csv("pupld12a_csv/puout12a.csv", header = TRUE, sep=",", quote = "\"") | |
pusum12a_csv <- read.csv("pupld12a_csv/pusum12a.csv", header = TRUE, sep=",", quote = "\"") | |
# Load downloaded data from PLS 2012 survey, Excel format (xls) | |
#install.packages("gdata") | |
library(gdata) #library to load xls data; if loading xlsx files use XLConnect library | |
pupld12a_xls <- read.xls("pupld12a_xls/pupld12a.xls", sheet = 1, header = TRUE) | |
puout12a_xls <- read.xls("pupld12a_xls/puout12a.xls", sheet = 1, header = TRUE) | |
pusum12a_xls <- read.xls("pupld12a_xls/pusum12a.xls", sheet = 1, header = TRUE) | |
# Load downloaded data from PLS 2012 survey, SAS format (sas7bdat) | |
#install.packages("sas7bdat") | |
library(sas7bdat) | |
pupld12a_sas <- read.sas7bdat("pupld12a_sas/pupld12a.sas7bdat") | |
puout12a_sas <- read.sas7bdat("pupld12a_sas/puout12a.sas7bdat") | |
pusum12a_sas <- read.sas7bdat("pupld12a_sas/pusum12a.sas7bdat") | |
# Load PLS 2012 directly from the web | |
temp <- tempfile() | |
download.file("http://www.imls.gov/assets/1/AssetManager/pupld12a_csv.zip", temp, mode="wb") | |
unzip(temp, "Pupld12a.csv") | |
unzip(temp, "Puout12a.csv") | |
unzip(temp, "Pusum12a.csv") | |
pupld12a <- read.csv("Pupld12a.csv", header = TRUE, sep=",", quote = "\"", na.strings=c(".","-1","-3")) | |
puout12a <- read.csv("Puout12a.csv", header = TRUE, sep=",", quote = "\"", na.strings=c(".","-1","-3")) | |
pusum12a <- read.csv("Pusum12a.csv", header = TRUE, sep=",", quote = "\"", na.strings=c(".","-1","-3")) | |
#Keep columns (i.e. drop imputed columns) | |
keepme <- c("FSCSKEY","STABR","LIBID","LIBNAME","ADDRESS","CITY","ZIP","ZIP4","ADDRES_M","CITY_M","ZIP_M","ZIP4_M", | |
"CNTY","PHONE","C_RELATN","C_LEGBAS","C_ADMIN","C_FSCS","GEOCODE","LSABOUND","STARTDAT","ENDDATE", | |
"POPU_LSA","POPU_UND","CENTLIB","BRANLIB","BKMOB","MASTER","LIBRARIA","OTHPAID","TOTSTAFF","LOCGVT" | |
,"STGVT","FEDGVT","OTHINCM","TOTINCM","SALARIES","BENEFIT","STAFFEXP","PRMATEXP", | |
"ELMATEXP","OTHMATEX","TOTEXPCO","OTHOPEXP","TOTOPEXP","LCAP_REV","SCAP_REV","OCAP_REV","CAP_REV", | |
"CAPITAL","BKVOL","EBOOK","AUDIO_PH","AUDIO_DL","VIDEO_PH","VIDEO_DL","DB_LO_OT","DB_ST","DATABASE", | |
"SUBSCRIP","HRS_OPEN","VISITS","REFERENC","REGBOR","TOTCIR","KIDCIRCL","LOANTO","LOANFM", | |
"TOTPRO","KIDPRO","YAPRO","TOTATTEN","KIDATTEN","YAATTEN","GPTERMS","PITUSR","YR_SUB","OBEREG", | |
"RSTATUS","STATSTRU","STATNAME","STATADDR","LONGITUD","LATITUDE","FIPSST","FIPSCO","FIPSPLAC", | |
"CNTYPOP","LOCALE","CENTRACT","CENBLOCK","CDCODE","CBSA","MICROF","GAL","GALMS","POSTMS") | |
mydf <- subset(pupld12a, select = keepme) | |
#pupld12a <- pupld12a[keepme] | |
#remove terroritories | |
mydf <- subset(mydf, STABR!="MP") | |
mydf <- subset(mydf, STABR!="VI") | |
mydf <- subset(mydf, STABR!="GU") | |
mydf <- subset(mydf, STABR!="MH") | |
mydf <- subset(mydf, STABR!="PW") | |
mydf <- subset(mydf, STABR!="PR") | |
mydf <- subset(mydf, STABR!="AS") | |
#remove non fscs data | |
mydf <- subset(mydf, C_FSCS!="N") | |
#remove closed or temporarily closed libraries | |
mydf <- subset(mydf, STATSTRU!=3) | |
mydf <- subset(mydf, STATSTRU!=23) | |
# You can also select data from clipboard | |
#read.table("clipboard") | |
#write/export data as csv | |
write.csv(pusum12a, file="export.csv") | |
#view data | |
mydf | |
head(mydf,5) | |
tail(mydf,5) | |
#count number of rows/columns | |
nrow(mydf) #count row | |
ncol(mydf) #count cols | |
dim(mydf) #get dimensions | |
#get column names | |
colnames(mydf) | |
#select an element of a data frame | |
mydf[1,1] | |
#select a row of a data frame | |
mydf[10,] | |
#select multiple rows of a data frame | |
mydf[c(1,3),] | |
mydf[1:7,] | |
#select a column | |
mydf[,1] | |
mydf$STABR | |
#count | |
length(mydf$VISIT) | |
#find missing | |
which(is.na(mydf$VISIT)) | |
#count missing | |
length(which(is.na(mydf$VISIT))) | |
#sum | |
sum(mydf$VISIT) | |
sum(mydf$VISIT,na.rm=TRUE) | |
#basic statistics | |
mean(mydf$VISIT,na.rm=TRUE) | |
median(mydf$VISIT,na.rm=TRUE) | |
var(mydf$VISIT,na.rm=TRUE) | |
#summarize data | |
summary(mydf) | |
#Sort asc | |
mydf2 <- mydf[order(mydf$VISITS),] | |
#Sort desc | |
mydf2 <- mydf[order(-mydf$VISITS),] | |
#Create a new variable | |
mydf$VISITSPERCAP <- mydf$VISITS/mydf$POPU_UND | |
mydf$CIRCPERCAP <- mydf$TOTCIR/mydf$POPU_UND | |
sum(mydf$VISITS,na.rm=TRUE)/sum(mydf$POPU_UND,na.rm=TRUE) | |
sum(as.numeric(mydf$TOTCIR,na.rm=TRUE))/sum(mydf$POPU_UND,na.rm=TRUE) | |
#Round a value | |
round(sum(mydf$VISITS,na.rm=TRUE)/sum(mydf$POPU_UND,na.rm=TRUE),2) | |
#Recode a value | |
mydf$LOCALE4[mydf$LOCALE == 11 | mydf$LOCALE == 12 | mydf$LOCALE == 13] <- 1 | |
mydf$LOCALE4[mydf$LOCALE == 21 | mydf$LOCALE == 22 | mydf$LOCALE == 23] <- 2 | |
mydf$LOCALE4[mydf$LOCALE == 31 | mydf$LOCALE == 32 | mydf$LOCALE == 33] <- 3 | |
mydf$LOCALE4[mydf$LOCALE == 41 | mydf$LOCALE == 42 | mydf$LOCALE == 43] <- 4 | |
mydf$LOCALE4[mydf$LOCALE4==1] <- "City" | |
mydf$LOCALE4[mydf$LOCALE4==2] <- "Suburb" | |
mydf$LOCALE4[mydf$LOCALE4==3] <- "Town" | |
mydf$LOCALE4[mydf$LOCALE4==4] <- "Rural" | |
mydf$LOCALE4 <- as.factor(mydf$LOCALE4) | |
#Rename a column | |
names(mydf)[names(mydf)=="LOCALE"] <- "LOCALE12" | |
# List types of columns in data frame | |
str(mydf) | |
#Summarize data | |
ptab <- table(mydf$STABR, mydf$LOCALE4) | |
#Save table as data frame | |
df <- as.data.frame(ptab) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment