Skip to content

Instantly share code, notes, and snippets.

View LitvinenkoD89's full-sized avatar

Litvinenko Dmitrii LitvinenkoD89

View GitHub Profile
@LitvinenkoD89
LitvinenkoD89 / findprivateclinics
Created October 6, 2017 11:35
findprivateclinics
require 'csv'
main_url = "https://www.findprivateclinics.ca/"
url = "https://www.findprivateclinics.ca/listings.html"
t = Typhoeus.get url
page = Nokogiri::HTML t.body
data = {}
def parse_page(page, category, subcategory, data)
page.search(".listing").each do |el|
name = el.at(".name").at("a").text.strip
<div id="content_0_MetroPanel" style="padding-left: 0px;">
<h6 class="header">Airport Locations</h6>
<img id="content_0_MetroAirportRepeater_ctl01_PinNumberImage" src="/images/1.gif" style="border-width:0px;">
<b>BURBANK - BOB HOPE AIRPORT&nbsp;(BUR)</b>
<br>
2627 NORTH HOLLYWOOD WAY<br>BURBANK, CA UNITED STATES 91505<br>(877) 283-0898<br>
<a href="/local/index.aspx?LocationCode=BUR">
@LitvinenkoD89
LitvinenkoD89 / ucaoa_console_test
Created August 15, 2017 13:14
ucaoa_console_test
url = "http://www.ucaoa.org/searchserver/people.aspx?id=AA614225-2260-4578-9513-18089A0F8951&cdbid=&canconnect=0&canmessage=0&map=False&toggle=True&hhSearchTerms="
t = Typhoeus.get url
page = Nokogiri::HTML t.body
links = []
pagination = (2..10).to_a
pagination.each do |page_num|
body = page.at('#SearchResultsForm').search('input').inject({}){|acc,c| acc[c.attr('name')] = c.attr('value'); acc}
act = page.at('#SearchResultsForm').attr('action')
act = act[2..act.length]
url = 'https://www.ups.com/dropoff'
headers1 {
'Cookie' => 'WEMEnabled=Y; AMCVS_036784BD57A8BB277F000101%40AdobeOrg=1; com.ups.com_ups_uis.sData=hHQSZMfGVRGvjF4TngHQRNtQ0xCSGQptQJ2y5nyJMkvvLDhdQx3Q!197340479!-1727860029!10413!-1; AMCV_036784BD57A8BB277F000101%40AdobeOrg=1099438348%7CMCMID%7C72850123333592926912021727064730770089%7CMCAAMLH-1502958984%7C6%7CMCAAMB-1502960787%7CcIBAx_aQzFEHcPoEv0GwcQ%7CMCCIDH%7C299901891%7CMCOPTOUT-1502361384s%7CNONE%7CvVersion%7C2.1.0%7CMCIDTS%7C17389; com.ups.com_ups_GDOL.sData=0fzLaNytMZWpGVtcq1pP9Y79uWk3Y4-1u9V3SBm7TKqiPQC_Av9_!171582991!-1727859596!15412!-1; GDOL_AutoSuggest={"recentSearches":[{"criteria":"New+York%2C+NY","Latitude":"40.7740783691406","Longitude":"-73.9697418212891","CtryCode":"US","culture":"","nickName":"","locId":""}]}; sharedsession=14a2c0f5-d9b5-4688-9e68-c89a9a251b67:w; mbox=session#f96a46111c6f49658ffe55448f991684#1502358288|PC#f96a46111c6f49658ffe55448f991684.22_3#1565601227; utag_main=v_id:015dcb4997ba0001473a00ab4be905068001b0600086e$_sn:1$_ss:
@LitvinenkoD89
LitvinenkoD89 / booksamillion_test
Created August 7, 2017 12:37
booksamillion_test
states = [
{'short_code' => 'AL', 'full_name' => 'Alabama'},
...
{'short_code' => 'WY', 'full_name' => 'Wyoming'}
]
result = {}
count = 0
agent = Mechanize.new
@LitvinenkoD89
LitvinenkoD89 / chase
Last active August 2, 2017 09:04
chase
# CHASE BANK {source_name: 'chase', batch_number: 7, request_id: 7000000068, request_name: 'CHASE BANK'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
fetcher_agent = Library.lib('FetcherAgent')
easy_seeder.seed(source_name: init_vars[:source_name]) do
loop_states "us", radius: 10 do |short_code, full_name|
queue_url "https://locator.chase.com/search/#{short_code}/?q=#{full_name}"
@LitvinenkoD89
LitvinenkoD89 / pappas
Last active August 1, 2017 14:37
pappas
# PAPPAS BRANDS {source_name: 'pappas', batch_number: 7, request_id: 7000000048, request_name: 'PAPPAS BRANDS'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
fetcher_agent = Library.lib('FetcherAgent')
easy_seeder.seed(source_name: init_vars[:source_name]) do
queue_url "http://www.pappas.com/locations-list/?msg=noaddy"
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="keywords" content="Pappas Restaurants"><meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<title>Pappas.com - Pappas Locations</title>
<link href="../global/css/global.css" type="text/css" rel="stylesheet" />
<link href="../global/css/jqModal.css" type="text/css" rel="stylesheet" />
<!--[if (gt IE 5)&(lt IE 7)]>
<link href="../global/css/ie6.php?quickPath=../" rel="stylesheet" type="text/css" />
# GODFATHERS PIZZA {source_name: 'godfathers', batch_number: 7, request_id: 7000000051, request_name: 'GODFATHERS PIZZA'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
easy_seeder.seed(source_name: init_vars[:source_name]) do
loop_postal_codes "us", radius: 10 do |zip, lat, long|
queue_url "https://www.godfathers.com/localize", {
method: :post
@LitvinenkoD89
LitvinenkoD89 / GREASE MONKEY AUTOMOTIVE
Created July 28, 2017 13:33
GREASE MONKEY AUTOMOTIVE
# GREASE MONKEY AUTOMOTIVE {source_name: 'greasemonkeyintl', batch_number: 7, request_id: 7000000040, request_name: 'GREASE MONKEY AUTOMOTIVE'}
scraper_service.scrape do |browser, scraper, init_vars|
easy_seeder = Library.lib('EasySeeder')
easy_extractor = Library.lib('EasyExtractor')
fetcher_agent = Library.lib('FetcherAgent')
easy_seeder.seed(source_name: init_vars[:source_name]) do
loop_postal_codes "us", radius: 10 do |zip, lat, long|
queue_url "http://www.greasemonkeyintl.com/store-locator?zip=#{zip}&dist=50"