Skip to content

Instantly share code, notes, and snippets.

@LitvinenkoD89
Last active August 2, 2017 09:04
Show Gist options
  • Save LitvinenkoD89/d2a759d5a23819930b28684e2c4699ac to your computer and use it in GitHub Desktop.
Save LitvinenkoD89/d2a759d5a23819930b28684e2c4699ac to your computer and use it in GitHub Desktop.
chase
# CHASE BANK {source_name: 'chase', batch_number: 7, request_id: 7000000068, request_name: 'CHASE BANK'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
fetcher_agent = Library.lib('FetcherAgent')
easy_seeder.seed(source_name: init_vars[:source_name]) do
loop_states "us", radius: 10 do |short_code, full_name|
queue_url "https://locator.chase.com/search/#{short_code}/?q=#{full_name}"
end
end
easy_extractor.extract(
source_name: init_vars[:source_name],
scraper: scraper,
batch_number: init_vars[:batch_number],
request_id: init_vars[:request_id],
request_name: init_vars[:request_name],
) do
find_pages page_format: :html do |url, parser_page, page|
page.search('.result.clearfix').each do |detail|
next if detail.at('.resultName').nil?
max_page = t_body.at('.clearfix.pagination').search('li').map(&:text).map(&:to_i).max rescue nil
if max_page && max_page > 1

(2..max_page).each do |p|

seeder.queue_url "#{url}&page=#{p}"
end
end
name = detail.at('.resultName').text.gsub("\t", '').gsub("\r", '').gsub("\n", '')
href = detail.at('.titleLeft').at('a').attr('href')
location_url = URI.join(url, href).to_s
street1 = detail.at('.address').at('.line.street-address').text
city = detail.at('.address').at('.line.last').at('.locality').text
state = detail.at('.address').at('.line.last').at('.region').text
zip_code = detail.at('.address').at('.line.last').at('.postal-code').text
loc_type = detail.at('.locationType').text
if loc_type =~ /ONLY/i
t = "CHASE ATM ONLY"
else
t = "CHASE BRANCHES"
end
doc_id = store_doc({
# store_id: id,
brand: "CHASE",,
type: t,
# property_id: id,
name: name, # required
address_1: street1,
address_2: '',
city: city,
state: state,
zipcode: zip_code,
country: '',
# lat: lat,
# long: long,
# address_container_html: address_container,
# map_link: store.parent.parent.at('.googlemap').attr('name'),
location_url: location_url,
flags: {
}
})
seeder.queue_url location_url, {
page_type: 'profile',
doc_id: doc_id
}
end
end
# Find profile pages
find_pages page_format: :html, page_type: 'profile' do |url, parser_page, page|
doc = find_location(parser_page[:doc_id])
if doc.present?
atm_count = 0
unless page.at('.atmInformation').nil?
unless page.at('.atmInformation').search('.halfMain.floatLeft').nil?
page.at('.atmInformation').search('.halfMain.floatLeft').each do |info|
tmp = info.text.split(' ')
condition = Float(tmp.first) != nil rescue false
if condition
atm_count = tmp.first
break
end
end
end
end
bs = ""
page.search(".sectionTitle").each do |st|
next unless st.text == "Branch Services"
divs = st.parent.search('div:not([class])')
tmp = []
divs.each do |div|
next if div.text.nil?
tmp.push(div.text.split("\n")[0])
end
bs = tmp.join(", ")
end
lat = page.at('[property="place:location:latitude"]').attr('content')
long = page.at('[property="place:location:longitude"]').attr('content')
frmt = page.at('.branchType').text
frmt = frmt.gsub("\t", '').gsub("\r", '').gsub("\n", '') unless frmt.nil?
doc[:lat] = lat
doc[:long] = long
doc[:flags]['ATM COUNT'] = atm_count
doc[:flags]['BRANCH SERVICES'] = bs
doc[:flags]['FORMAT'] = frmt
store_doc doc
end
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment