nitisethi28/gist:23ed95a9f3ac490a9c319ffc0ef80537

## gistfile1.txt
library(rvest)
install.packages("tidyverse")
library(tidyverse)
install.packages("dplyr")
library(dplyr)
install.packages("tidyr")
library(tidyr)
pg<-read_html("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=1")

##Find total number of pages

page_num<-pg%>%
  html_nodes(".pagination")%>%
  html_nodes("li")%>%
  html_nodes("a")%>%
  .[5]%>%
  html_attrs()%>%
  unlist()%>%
  parse_number()%>%unique()

#make function for scraping page
scr=function(i){
  pg<-read_html(paste0("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=",i))
  blocks <- html_nodes(pg, ".block")

  items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]")

  items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE)
  quantity <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Quantity')]/following-sibling::span") %>% html_text(trim=TRUE) %>% as.numeric()

  department_name_and_address <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Department Name And Address')]") %>%
    html_text(trim=TRUE) %>%
    gsub("\n", "|", .) %>%
    gsub("[[:space:]]*\\||\\|[[:space:]]*", "|", .)

  block_header <- html_nodes(blocks, "div.block_header")

  html_nodes(block_header, xpath=".//p[contains(@class, 'bid_no')]") %>%
    html_text(trim=TRUE) %>%
    gsub("^.*: ", "", .) -> bid_no

  html_nodes(block_header, xpath=".//p/b[contains(., 'Status')]/following-sibling::span") %>%
    html_text(trim=TRUE) -> status

  html_nodes(blocks, xpath=".//strong[contains(., 'Start Date')]/following-sibling::span") %>%
    html_text(trim=TRUE) -> start_date

  html_nodes(blocks, xpath=".//strong[contains(., 'End Date')]/following-sibling::span") %>%
    html_text(trim=TRUE) -> end_date

  data.frame(
    bid_no,
    status,
    start_date,
    end_date,
    items,
    quantity,
    department_name_and_address,
    stringsAsFactors=FALSE
  ) -> xdf
  xdf$is_ra <- grepl("/RA/", bid_no)
  return(xdf)
}
#run for-loop for each page and save it in data frame
res<-31:page_num%>%
  map_df(.,scr)

#res<-1:15%>%map_df(.,scr)

write.table( res,
             file="xdf.csv",
             append = T,
             sep=',',
             row.names=F,
             col.names=F )
	library(rvest)
	install.packages("tidyverse")
	library(tidyverse)
	install.packages("dplyr")
	library(dplyr)
	install.packages("tidyr")
	library(tidyr)
	pg<-read_html("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=1")

	##Find total number of pages

	page_num<-pg%>%
	html_nodes(".pagination")%>%
	html_nodes("li")%>%
	html_nodes("a")%>%
	.[5]%>%
	html_attrs()%>%
	unlist()%>%
	parse_number()%>%unique()

	#make function for scraping page
	scr=function(i){
	pg<-read_html(paste0("https://bidplus.gem.gov.in/bidresultlists?bidresultlists&page_no=",i))
	blocks <- html_nodes(pg, ".block")

	items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]")

	items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE)
	quantity <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Quantity')]/following-sibling::span") %>% html_text(trim=TRUE) %>% as.numeric()

	department_name_and_address <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Department Name And Address')]") %>%
	html_text(trim=TRUE) %>%
	gsub("\n", "\|", .) %>%
	gsub("[[:space:]]\\\|\|\\\|[[:space:]]", "\|", .)

	block_header <- html_nodes(blocks, "div.block_header")

	html_nodes(block_header, xpath=".//p[contains(@class, 'bid_no')]") %>%
	html_text(trim=TRUE) %>%
	gsub("^.*: ", "", .) -> bid_no

	html_nodes(block_header, xpath=".//p/b[contains(., 'Status')]/following-sibling::span") %>%
	html_text(trim=TRUE) -> status

	html_nodes(blocks, xpath=".//strong[contains(., 'Start Date')]/following-sibling::span") %>%
	html_text(trim=TRUE) -> start_date

	html_nodes(blocks, xpath=".//strong[contains(., 'End Date')]/following-sibling::span") %>%
	html_text(trim=TRUE) -> end_date

	data.frame(
	bid_no,
	status,
	start_date,
	end_date,
	items,
	quantity,
	department_name_and_address,
	stringsAsFactors=FALSE
	) -> xdf
	xdf$is_ra <- grepl("/RA/", bid_no)
	return(xdf)
	}
	#run for-loop for each page and save it in data frame
	res<-31:page_num%>%
	map_df(.,scr)

	#res<-1:15%>%map_df(.,scr)

	write.table( res,
	file="xdf.csv",
	append = T,
	sep=',',
	row.names=F,
	col.names=F )