Skip to content

Instantly share code, notes, and snippets.

#!/usr/bin/env python
from re import findall,sub
from lxml import html
from time import sleep
from selenium import webdriver
from pprint import pprint
from xvfbwrapper import Xvfb
def parse(url):
searchKey = "Las Vegas" # Change this to your city
from bs4 import BeautifulSoup
from requests import get
from re import sub
from json import loads,dump
from re import findall,sub
from unicodecsv import QUOTE_ALL,DictWriter
totalJobs = []
def getPage(url):
"""
@scrapehero
scrapehero / amazon_review_scraper.py
Last active August 5, 2022 10:59
Python Code to Scrape Customer Reviews from Amazon.com. Read more on https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / yelp_business_details.py
Last active November 11, 2020 15:12
Python 2 code to extract yelp business details
from lxml import html
import json
import requests
from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
def parse(url):
# url = "https://www.yelp.com/biz/frances-san-francisco"
@scrapehero
scrapehero / linkedin_scraper.py
Last active December 6, 2021 18:13
Python script to scrape a company details from a public company page on LinkedIn.com. Written as part of How to Scrape educational post - https://www.scrapehero.com/tutorial-scraping-linkedin-for-public-company-data/
from lxml import html
import csv, os, json
import requests
from exceptions import ValueError
from time import sleep
def linkedin_companies_parser(url):
for i in range(5):
try:
@scrapehero
scrapehero / yahoo_finance.py
Last active January 22, 2024 21:46
Python 3 code to extract stock market data from yahoo finance
from lxml import html
import requests
from time import sleep
import json
import argparse
from collections import OrderedDict
from time import sleep
def parse(ticker):
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker)
@scrapehero
scrapehero / expedia.py
Last active August 24, 2020 04:32
Python 3 code to extract the flight schedules and prices for a source and destination pair.
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)
@scrapehero
scrapehero / ebay_scraper.py
Last active April 21, 2024 18:32
Python 3 Code to scrape prices from ebay.com
import argparse
from pprint import pprint
from traceback import format_exc
import requests
import unicodecsv as csv
from lxml import html
def parse(brand):
@scrapehero
scrapehero / geocoder.py
Created March 30, 2017 07:57
Python script to parse unstructured addresses
from requests import get
from pprint import pprint
from json import dump
from csv import QUOTE_ALL, DictWriter
API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
def address_resolver(json):
final = {}
if json['results']:
data = json['results'][0]
for item in data['address_components']:
import pytesseract
import sys
import argparse
try:
import Image
except ImportError:
from PIL import Image
from subprocess import check_output