Skip to content

Instantly share code, notes, and snippets.

@LitvinenkoD89
Last active July 28, 2017 10:54
Show Gist options
  • Save LitvinenkoD89/d6fda8d33ba331d54cf89bfe332fac2c to your computer and use it in GitHub Desktop.
Save LitvinenkoD89/d6fda8d33ba331d54cf89bfe332fac2c to your computer and use it in GitHub Desktop.
six02
# SIX:02 {source_name: 'six02', batch_number: 6, request_id: 6000000074, request_name: 'SIX:02'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
fetcher_agent = Library.lib('FetcherAgent')
easy_seeder.seed(source_name: init_vars[:source_name]) do
loop_postal_codes "us", radius: 10 do |zip, lat, long|
queue_url "http://www.six02.com/stores"
end
end
easy_extractor.extract(
source_name: init_vars[:source_name],
scraper: scraper,
batch_number: init_vars[:batch_number],
request_id: init_vars[:request_id],
request_name: init_vars[:request_name],
) do
find_pages page_format: :html do |url, parser_page, page|
page.search('.sep_locations_store').each do |store|
id = store.attr('data-store-id')
address_container = store.at('address').inner_html.gsub("\t", '').gsub("\r", '').gsub("\n", '')
tmp = address_container.split('<br>')
tel = tmp.pop
address_container_html = tmp
adr = tmp.pop
street1,street2 = tmp
city, state_zip = adr.split(', ')
state, zip_code = state_zip.split(' ')
href = store.at('.sep_location_store_details').attr('href')
location_url = URI.join(url, href).to_s
doc_id = store_doc({
store_id: id,
# brand: brand,
type: "SIX:02",
property_id: id,
name: '', # required
address_1: street1,
address_2: street2,
city: city,
state: state,
zipcode: zip_code,
country: '',
# lat: lat,
# long: long,
address_container_html: address_container_html,
# map_link: store.parent.parent.at('.googlemap').attr('name'),
location_url: location_url,
flags: {
}
})
seeder.queue_url location_url, {
page_type: 'profile',
doc_id: doc_id
}
end
end
# Find profile pages
find_pages page_format: :html, page_type: 'profile' do |url, parser_page, page|
doc = find_location(parser_page[:doc_id])
if page.at('[itemprop="map"]').nil?
lat, long = [nil, nil]
else
map_link = page.at('[itemprop="map"]').attr('content')
lat, long = map_link.split('/@')[1].split(',')
end
brand = page.at('[itemprop="brand"]').nil? ? nil : page.at('[itemprop="brand"]').attr('content')
doc[:lat] = lat
doc[:long] = long
doc[:brand] = brand
store_doc doc
end
end
end
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment