library(rvest) | |
library(dplyr) | |
pg <- read_html("https://bidplus.gem.gov.in/bidresultlists") | |
blocks <- html_nodes(pg, ".block") | |
items_and_quantity <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Item(s)')]") | |
items <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Item(s)')]/following-sibling::span") %>% html_text(trim=TRUE) | |
quantity <- html_nodes(items_and_quantity, xpath=".//strong[contains(., 'Quantity')]/following-sibling::span") %>% html_text(trim=TRUE) %>% as.numeric() | |
department_name_and_address <- html_nodes(blocks, xpath=".//div[@class='col-block' and contains(., 'Department Name And Address')]") %>% | |
html_text(trim=TRUE) %>% | |
gsub("\n", "|", .) %>% | |
gsub("[[:space:]]*\\||\\|[[:space:]]*", "|", .) | |
block_header <- html_nodes(blocks, "div.block_header") | |
html_nodes(block_header, xpath=".//p[contains(@class, 'bid_no')]") %>% | |
html_text(trim=TRUE) %>% | |
gsub("^.*: ", "", .) -> bid_no | |
html_nodes(block_header, xpath=".//p/b[contains(., 'Status')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> status | |
html_nodes(blocks, xpath=".//strong[contains(., 'Start Date')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> start_date | |
html_nodes(blocks, xpath=".//strong[contains(., 'End Date')]/following-sibling::span") %>% | |
html_text(trim=TRUE) -> end_date | |
data.frame( | |
bid_no, | |
status, | |
start_date, | |
end_date, | |
items, | |
quantity, | |
department_name_and_address, | |
stringsAsFactors=FALSE | |
) -> xdf | |
xdf$is_ra <- grepl("/RA/", bid_no) | |
str(xdf) | |
## 'data.frame': 10 obs. of 8 variables: | |
## $ bid_no : chr "GEM/2018/B/93066" "GEM/2018/B/93082" "GEM/2018/B/93105" "GEM/2018/B/93999" ... | |
## $ status : chr "Not Evaluated" "Not Evaluated" "Not Evaluated" "Not Evaluated" ... | |
## $ start_date : chr "25-09-2018 03:53:pm" "27-09-2018 09:16:am" "25-09-2018 05:08:pm" "26-09-2018 05:21:pm" ... | |
## $ end_date : chr "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" "18-10-2018 03:00:pm" ... | |
## $ items : chr "automotive chassis fitted with engine" "automotive chassis fitted with engine" "automotive chassis fitted with engine" "Storage System" ... | |
## $ quantity : num 1 1 1 2 90 1 981 6 4 376 | |
## $ department_name_and_address: chr "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Ministry Of Steel Na Kirandul Complex N/a" "Department Name And Address:||Maharashtra Energy Department Maharashtra Bhusawal Tps N/a" ... | |
## $ is_ra : logi FALSE FALSE FALSE FALSE FALSE FALSE ... |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This comment has been minimized.
sir will it be possible if i will ask you a query you will help me?