Skip to content

Instantly share code, notes, and snippets.

@Jian-Qiao
Created August 22, 2017 16:55
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Jian-Qiao/5369372cc86c817b210c2cb46ec01a2e to your computer and use it in GitHub Desktop.
Save Jian-Qiao/5369372cc86c817b210c2cb46ec01a2e to your computer and use it in GitHub Desktop.
Fights_Scrapped=data.frame()
for (i in seq(1,length(EventsUrl))){
Event=read_html(EventsUrl[i])
Result=Event %>% html_nodes('table') %>% html_table()
#Part 1
Result_p1=as.data.frame(Result[2])
if (nrow(Result_p1)!=0){
colnames(Result_p1)=c('Match','Fighter1','vs','Fighter2','Method','Round','Time')
#Delete Title Row
Result_p1=Result_p1[-1,]
n=nrow(Result_p1)
#Delete 'vs' Column
Result_p1=Result_p1[,-3]
#Seperate Method Column
Result_p1$Referee=sapply(Result_p1$Method,function(x) gsub('^(.*)\\)','',x))
Result_p1$Method_D=sapply(Result_p1$Method,function(x) gsub('\\)(.*)','',gsub('^(.*)\\(','',x)))
Result_p1$Method=sapply(Result_p1$Method,function(x) gsub('\\(.*','',x))
#Fighter Url
Fighter_p1=Event %>% html_nodes('.fighter_result_data a') %>% html_attr('href')
Fighter_p1=as.data.frame(matrix(Fighter_p1,ncol=2,byrow=TRUE))
colnames(Fighter_p1)=c('Fighter1_url','Fighter2_url')
rownames(Result_p1)=NULL
Result_p1=cbind(Result_p1,Fighter_p1)
#Part 2
Result_p2=Result[1]
#Format match number
Result_p2=sapply(Result_p2,function(x) sub('^([a-zA-Z]* )','',x))
Result_p2=as.data.frame(t(Result_p2))
colnames(Result_p2)=c('Match','Method','Referee','Round','Time')
#Name of fighters
temp=Event %>% html_nodes('.right_side a span , .left_side a span') %>% html_text()
Result_p2$Fighter1=temp[1]
Result_p2$Fighter2=temp[2]
#Seperate Method Column
Result_p2$Method_D=sapply(Result_p2$Method,function(x) gsub('\\)(.*)','',gsub('^(.*)\\(','',x)))
Result_p2$Method=sapply(Result_p2$Method,function(x) gsub('\\(.*','',x))
#Fighters URL
temp1=Event %>% html_nodes('.left_side a') %>% html_attr('href')
Result_p2$Fighter1_url=temp1[1]
temp2=Event %>% html_nodes('.right_side a') %>% html_attr('href')
Result_p2$Fighter2_url=temp2[1]
#Fight Date & Location
D_L=Event %>% html_nodes('.authors_info span') %>% html_text()
#Bind together
Final=rbind(Result_p1,Result_p2)
Final$Event_Name=EventsInfo[i,2]
Final$Event_id=gsub('.*-','',EventsUrl[i])
Final$Date=D_L[1]
Final$Location=D_L[2]
Fights_Scrapped=rbind(Fights_Scrapped,Final)
}
print(i)
}
Fights_Scrapped=as.data.frame(Fights_Scrapped)
Fights=sapply(Fights_Scrapped,function(x) ifelse(x=='N/A',NA,x))
Fights=as.data.frame(Fights)
Tfmt='%M:%S'
Fights$Time=strptime(Fights$Time,format=Tfmt)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment