Skip to content

Instantly share code, notes, and snippets.

View hotels_scraper.py
#!/usr/bin/env python
from re import findall,sub
from lxml import html
from time import sleep
from selenium import webdriver
from pprint import pprint
from xvfbwrapper import Xvfb
def parse(url):
searchKey = "Las Vegas" # Change this to your city
View linkedin_jobs_scraper.py
from bs4 import BeautifulSoup
from requests import get
from re import sub
from json import loads,dump
from re import findall,sub
from unicodecsv import QUOTE_ALL,DictWriter
totalJobs = []
def getPage(url):
"""
@scrapehero
scrapehero / amazon_review_scraper.py
Last active August 5, 2022 10:59
Python Code to Scrape Customer Reviews from Amazon.com. Read more on https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
View amazon_review_scraper.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / yelp_business_details.py
Last active November 11, 2020 15:12
Python 2 code to extract yelp business details
View yelp_business_details.py
from lxml import html
import json
import requests
from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
def parse(url):
# url = "https://www.yelp.com/biz/frances-san-francisco"
@scrapehero
scrapehero / linkedin_scraper.py
Last active December 6, 2021 18:13
Python script to scrape a company details from a public company page on LinkedIn.com. Written as part of How to Scrape educational post - https://www.scrapehero.com/tutorial-scraping-linkedin-for-public-company-data/
View linkedin_scraper.py
from lxml import html
import csv, os, json
import requests
from exceptions import ValueError
from time import sleep
def linkedin_companies_parser(url):
for i in range(5):
try:
@scrapehero
scrapehero / yahoo_finance.py
Last active July 7, 2023 08:00
Python 3 code to extract stock market data from yahoo finance
View yahoo_finance.py
from lxml import html
import requests
from time import sleep
import json
import argparse
from collections import OrderedDict
from time import sleep
def parse(ticker):
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker)
@scrapehero
scrapehero / expedia.py
Last active August 24, 2020 04:32
Python 3 code to extract the flight schedules and prices for a source and destination pair.
View expedia.py
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)
@scrapehero
scrapehero / ebay_scraper.py
Last active September 10, 2021 08:23
Python 3 Code to scrape prices from ebay.com
View ebay_scraper.py
import argparse
from pprint import pprint
from traceback import format_exc
import requests
import unicodecsv as csv
from lxml import html
def parse(brand):
@scrapehero
scrapehero / geocoder.py
Created March 30, 2017 07:57
Python script to parse unstructured addresses
View geocoder.py
from requests import get
from pprint import pprint
from json import dump
from csv import QUOTE_ALL, DictWriter
API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
def address_resolver(json):
final = {}
if json['results']:
data = json['results'][0]
for item in data['address_components']:
View captcha_resolver.py
import pytesseract
import sys
import argparse
try:
import Image
except ImportError:
from PIL import Image
from subprocess import check_output