Skip to content

Instantly share code, notes, and snippets.

@scrapehero
scrapehero / fandango.py
Last active March 8, 2018 21:22
Scraper to extract movie details and showtimes from Fandango.com
from selenium.webdriver.common.keys import Keys
from time import sleep
import unicodecsv as csv
import argparse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
@scrapehero
scrapehero / glassdoor.py
Last active December 2, 2021 12:29
Python 3 code to extract job listings from Glassdoor.com
from lxml import html, etree
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
def parse(keyword, place):
@scrapehero
scrapehero / target.py
Last active February 12, 2018 08:07
Python 2 code to extract sotre locations from Target.com
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of
import requests
from lxml import html
import re
from time import time
import json
import argparse
@scrapehero
scrapehero / tripadvisor_scraper.py
Last active January 24, 2021 15:06
Python Script to scrape Tripadvisor.com for Hotels in a locality. Code Written as part of https://www.scrapehero.com/how-to-scrape-tripadvisor/
#!/usr/bin/env python
from datetime import datetime
from time import time
from lxml import html,etree
import requests,re
import os,sys
import unicodecsv as csv
import argparse
def parse(locality,checkin_date,checkout_date,sort):
@scrapehero
scrapehero / tripadvisor_scraper_hotel.py
Last active December 17, 2018 10:09
Python 2.7 code to extract data from tripadvisor hotel
# -*- coding: utf-8 -*-
from lxml import html
import requests
from collections import OrderedDict
import json
import argparse
import re
import sys
# Adjust MAX_RETRY according to the blocking from tripadvisor
MAX_RETRY = 10
import requests
import json
import unicodecsv as csv
import argparse
from argparse import RawTextHelpFormatter
def parse(location,sort):
print "Retrieving Location Details"
location_details_url = "https://www.viator.com/ajaxSegmentSearch.jspa?term=%s"%(location)
location_response = requests.get(location_details_url).text
@scrapehero
scrapehero / amazon_reviews.py
Last active December 11, 2019 16:09
Python Code to Scrape Customer Reviews from Amazon.com. Read more on https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
from lxml import html
import json
import requests
from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
def parse(url):
# url = "https://www.yelp.com/biz/frances-san-francisco"
@scrapehero
scrapehero / yelp_reviews_parser.py
Last active February 9, 2018 08:11
Python 3 code to extract business details from a restaurant on Yelp.com
from lxml import html
import unicodecsv as csv
import requests
# from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
import traceback
def parse(url):
from lxml import html
import csv
import os
import requests
from exceptions import ValueError
from time import sleep
from random import randint
def parse(url):
headers = {