Created
November 14, 2011 07:09
-
-
Save ramnathv/1363424 to your computer and use it in GitHub Desktop.
Kiplinger's Best Value Rankings (Refactored)
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# REFACTORED VERSION OF CODE IN GIST: git://gist.github.com/1362541.git | |
# FUNCTION TO GET TABLE BASED ON COLLEGE TYPE | |
get_table <- function(college_type){ | |
require(XML) | |
url_base <- "http://www.kiplinger.com/tools/privatecolleges/index.php?table" | |
url_1 <- paste(url_base, college_type, sep = "=") | |
tab <- readHTMLTable(url_1, header = TRUE, stringsAsFactors = FALSE) | |
return(tab[[1]]) | |
} | |
# READ TABLES INTO DATA FRAME AND SET NAMES | |
college_types <- list(Private = "prv_univ", `Liberal Arts` = "lib_arts") | |
mydf <- plyr::ldply(college_types, get_table) | |
mydf <- setNames(mydf, c('type', "rank", "name", "state", "admit_rate", | |
"stud_per_faculty", "4y_grad_rate", "coa", "avg_need_aid", "avg_noneed_aid", | |
"pct_noneed_aid", "avg_grad_debt")) | |
# CONVERT COLUMNS WITH % AND $ INTO NUMBERS | |
mydf_cleaned <- sapply(mydf, gsub, pattern = "[%$,]", replacement = "") | |
mydf_cleaned <- adply(mydf_cleaned, 2, type.convert) | |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment