Skip to content

Instantly share code, notes, and snippets.

@nitisethi28
Created October 23, 2018 10:10
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nitisethi28/23ed95a9f3ac490a9c319ffc0ef80537 to your computer and use it in GitHub Desktop.
Save nitisethi28/23ed95a9f3ac490a9c319ffc0ef80537 to your computer and use it in GitHub Desktop.
Error in data.frame(bid_no, status, start_date, end_date, items, quantity, : arguments imply differing number of rows: 11, 10
library(rvest)
install.packages("tidyverse")
library(tidyverse)
install.packages("dplyr")
library(dplyr)
install.packages("tidyr")
library(tidyr)
pg<-read_html("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=1")
##Find total number of pages
page_num<-pg%>%
html_nodes(".pagination")%>%
html_nodes("li")%>%
html_nodes("a")%>%
.[5]%>%
html_attrs()%>%
unlist()%>%
parse_number()%>%unique()
#make function for scraping page
scr=function(i){
pg<-read_html(paste0("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=",i))
blocks <- html_nodes(pg, ".block")
items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]")
items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE)
quantity <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Quantity')]/following-sibling::span") %>% html_text(trim=TRUE) %>% as.numeric()
department_name_and_address <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Department Name And Address')]") %>%
html_text(trim=TRUE) %>%
gsub("\n", "|", .) %>%
gsub("[[:space:]]*\\||\\|[[:space:]]*", "|", .)
block_header <- html_nodes(blocks, "div.block_header")
html_nodes(block_header, xpath=".//p[contains(@class, 'bid_no')]") %>%
html_text(trim=TRUE) %>%
gsub("^.*: ", "", .) -> bid_no
html_nodes(block_header, xpath=".//p/b[contains(., 'Status')]/following-sibling::span") %>%
html_text(trim=TRUE) -> status
html_nodes(blocks, xpath=".//strong[contains(., 'Start Date')]/following-sibling::span") %>%
html_text(trim=TRUE) -> start_date
html_nodes(blocks, xpath=".//strong[contains(., 'End Date')]/following-sibling::span") %>%
html_text(trim=TRUE) -> end_date
data.frame(
bid_no,
status,
start_date,
end_date,
items,
quantity,
department_name_and_address,
stringsAsFactors=FALSE
) -> xdf
xdf$is_ra <- grepl("/RA/", bid_no)
return(xdf)
}
#run for-loop for each page and save it in data frame
res<-31:page_num%>%
map_df(.,scr)
#res<-1:15%>%map_df(.,scr)
write.table( res,
file="xdf.csv",
append = T,
sep=',',
row.names=F,
col.names=F )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment