Create a file called ids.txt Put in all branch IDs you can find for SBI. Run scrape.sh to get data from the SBI website.
You might have to update the cookies.
Create a file called ids.txt Put in all branch IDs you can find for SBI. Run scrape.sh to get data from the SBI website.
You might have to update the cookies.
require 'nokogiri' | |
require 'csv' | |
csv = CSV.open("sbi.csv", "wb") | |
csv << [ | |
"NAME", | |
"CODE", | |
"ADDRESS", | |
"CIRCLE", | |
"MODULE", | |
"PIN", | |
"DISTRICT", | |
"STATE", | |
"STD", | |
"CONTACT", | |
"FAX", | |
"EMAIL", | |
"IFSC", | |
"HOURS", | |
"HOLIDAY", | |
"FOREX", | |
"MICR", | |
"FILE" | |
] | |
Dir.glob("tables/*.html") do |file| | |
File.open(file) do |f| | |
n = Nokogiri::XML(f) | |
data = n.css('td').map(&:text).map(&:strip) | |
data = [ | |
data[1], | |
data[3], | |
data[5], | |
data[7], | |
data[9], | |
data[11], | |
data[13], | |
data[15], | |
data[17], | |
data[19], | |
data[21], | |
data[23], | |
data[25], | |
data[27], | |
data[29], | |
data[31], | |
data[33], | |
file | |
] | |
csv << data | |
end | |
end |
#!/bin/bash | |
while read line; do | |
echo $line; | |
BRANCHCODE=$(printf "%05d\n" $line) | |
curl --silent --request GET \ | |
--url "https://www.sbi.co.in/web/home/locator/branch?p_p_id=branchlocator_INSTANCE_f6KKdur73k4Z&p_p_lifecycle=1&p_p_state=normal&p_p_mode=view&_branchlocator_INSTANCE_f6KKdur73k4Z_javax.portlet.action=showDetails&_branchlocator_INSTANCE_f6KKdur73k4Z_branchCode=$BRANCHCODE&_branchlocator_INSTANCE_f6KKdur73k4Z_redirectTo=%2Fweb%2Fhome%2Flocator%2Fbranch%3Fp_p_id%3Dbranchlocator_INSTANCE_f6KKdur73k4Z%26p_p_lifecycle%3D1%26p_p_state%3Dnormal%26p_p_mode%3Dview%26_branchlocator_INSTANCE_f6KKdur73k4Z_javax.portlet.action%3Dsearch" \ | |
--header 'cookie: JSESSIONID=0000j6GVZAZFtwb7-TH3klEbUiW:1doiub59r; COOKIE_SUPPORT=true; GUEST_LANGUAGE_ID=en_US; LFR_SESSION_STATE_104=1582806711511' \ | |
--cookie 'JSESSIONID=0000j6GVZAZFtwb7-TH3klEbUiW:1doiub59r; COOKIE_SUPPORT=true; GUEST_LANGUAGE_ID=en_US; LFR_SESSION_STATE_104=1582806711511' \ | |
> tables/$BRANCHCODE.html | |
done < ids.txt; |