Created
June 25, 2019 16:29
-
-
Save shilohfling/053e7873fecc0a40326fd1723976d1fe to your computer and use it in GitHub Desktop.
An RScript for webscraping the names and associated programming language of each Python, R, and SQL course available on DataCamp.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
################################################## | |
# An RScript for webscraping the names and # | |
# associated programming language of each # | |
# Python, R, and SQL course # | |
# available on DataCamp. # | |
################################################## | |
# Last updated: March 21, 2019 # | |
################################################## | |
## Load packages ----- | |
library(dplyr) | |
library(tidyr) | |
## Functions ----- | |
scrape_datacamp_courses <- function(my_url) { | |
my_con <- file(my_url, "r") | |
my_html <- readLines(my_con, -1) | |
close(my_con) | |
h4_loc <- grep("[<]h4 class[=]\"course[-]block[_][_]title\"[>]", my_html) | |
courses <- unlist(lapply(as.list(my_html[h4_loc]), function(x) {trimws(gsub("<.*?>", "", gsub(" ", "", x)), which = "both")})) | |
descr <- unlist(lapply(as.list(my_html[h4_loc+2]), function(x) {trimws(x, which = "both")})) | |
df <- data.frame(Course = courses, Description = descr) | |
return(df) | |
} | |
## Call the functions ----- | |
py <- scrape_datacamp_courses(my_url = "https://www.datacamp.com/courses/tech:python") | |
py$Type <- "Python" | |
r <- scrape_datacamp_courses(my_url = "https://www.datacamp.com/courses/tech:r") | |
r$Type <- "R" | |
sql <- scrape_datacamp_courses(my_url = "https://www.datacamp.com/courses/tech:sql") | |
sql$Type <- "SQL" | |
## Join data ----- | |
df <- union(py, r) %>% | |
union(sql) | |
## Export data ----- | |
write.csv(df, paste0("data_camp_courses_",Sys.Date(),".csv")) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment