devmag/ejm-details

## ejm-details
library(XML)
library(stringr)
library(RCurl)
library(plyr)
library(data.table)

###YOU NEED THE PREVIOUS OBJECT EJM

###NOW WE WANT TO GET THE INFORMATION ABOUT EACH JOB OPENING (UNIVERSITY / DESCRIPTION / DEADLINE)

DETAILS<-NULL
for(i in 1:nrow(EJM)) {
cat(i, " ")
temp <- getURL(EJM[i]$url)
temp<-readHTMLTable(temp)[[2]][,1]
temp<-gsub("\\r|\\n","",temp)

temp<-cbind(posid=EJM[i]$posid,institution=temp[1], position=str_trim(gsub("Position Type: (.*)","\\1",temp[5])),field=str_trim(gsub("Categories/Specialties: (.*)","\\1",temp[6])),deadline=str_trim(gsub("Deadline: (.*)","\\1",temp[7])), description=str_trim(gsub("Description: (.*)","\\1",temp[8])))

DETAILS<-rbind(DETAILS, temp)
}


DETAILS<-data.table(DETAILS)
DETAILS$posid<-as.numeric(as.character(DETAILS$posid))

###join with EJM
EJM<-join(EJM, DETAILS)

###this leaves you with a data object with the following columns...
names(EJM)

#[1] "posid"       "url"         "applylink"   "institution" "position"    "field"       "deadline"
#[8] "description"
###NOW YOU CAN JUST WRITE A CSV AND BROWSE THROUGH IT...
write.csv(EJM, file="econjobmarket.csv")
	library(XML)
	library(stringr)
	library(RCurl)
	library(plyr)
	library(data.table)

	###YOU NEED THE PREVIOUS OBJECT EJM

	###NOW WE WANT TO GET THE INFORMATION ABOUT EACH JOB OPENING (UNIVERSITY / DESCRIPTION / DEADLINE)

	DETAILS<-NULL
	for(i in 1:nrow(EJM)) {
	cat(i, " ")
	temp <- getURL(EJM[i]$url)
	temp<-readHTMLTable(temp)[[2]][,1]
	temp<-gsub("\\r\|\\n","",temp)

	temp<-cbind(posid=EJM[i]$posid,institution=temp[1], position=str_trim(gsub("Position Type: (.)","\\1",temp[5])),field=str_trim(gsub("Categories/Specialties: (.)","\\1",temp[6])),deadline=str_trim(gsub("Deadline: (.)","\\1",temp[7])), description=str_trim(gsub("Description: (.)","\\1",temp[8])))

	DETAILS<-rbind(DETAILS, temp)
	}


	DETAILS<-data.table(DETAILS)
	DETAILS$posid<-as.numeric(as.character(DETAILS$posid))

	###join with EJM
	EJM<-join(EJM, DETAILS)

	###this leaves you with a data object with the following columns...
	names(EJM)

	#[1] "posid" "url" "applylink" "institution" "position" "field" "deadline"
	#[8] "description"
	###NOW YOU CAN JUST WRITE A CSV AND BROWSE THROUGH IT...
	write.csv(EJM, file="econjobmarket.csv")