Created
October 23, 2014 11:52
-
-
Save devmag/3e550ee7517742ccf15a to your computer and use it in GitHub Desktop.
Downloading EJM links
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
setwd("/Users/thiemo/Dropbox/Research/Job Market/Job Openings/") | |
library(data.table) | |
###VERY USEFUL FUNCTION FROM TIMOTHEE CARAYOL | |
DTUniqueBy <- function(data, varvec) { | |
data <- as.data.table(data) | |
data[!duplicated(data.frame(data[, varvec, with=F]))] | |
} | |
options(stringsAsFactors=FALSE) | |
###ejm.html is a plain HTML file with the JOB LISTINGS | |
EJM<- readLines(con="~/Dropbox/Research/Job Market/Job Openings/EJM/ejm.html") | |
URLS<-gsub("(.*)AdDetails\\.php\\?posid=([0-9]+)(.*)","\\2",EJM[grep("AdDetails\\.php\\?posid=([0-9]+)", EJM)]) | |
###APPLICATIONS TAKEN THROUGH EJM? | |
EJMAPPL<-gsub("(.*)PosApp\\.php\\?posid=([0-9]+)(.*)","\\2",EJM[grep("PosApp\\.php\\?posid=([0-9]+)", EJM)]) | |
###BUILD BASIC DATA TABLE | |
EJM<-data.table(cbind("posid"=URLS,"url"=paste("https://econjobmarket.org/AdDetails.php?posid=",URLS, sep=""))) | |
##REMOVE DUPLICATES | |
EJM<-DTUniqueBy(EJM, "posid") | |
##CREATE APPLICATION LINK | |
EJM$url<-as.character(EJM$url) | |
EJM$posid<-as.numeric(as.character(EJM$posid)) | |
EJM$applylink<-"" | |
EJM[posid %in% EJMAPPL]$applylink<-EJM[posid %in% EJMAPPL][,paste("https://econjobmarket.org/PosApp.php?posid=",posid, sep="") ] |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment