Skip to content

Instantly share code, notes, and snippets.

@scrapehero
scrapehero / tripadvisor_scraper_hotel.py
Last active December 17, 2018 10:09
Python 2.7 code to extract data from tripadvisor hotel
# -*- coding: utf-8 -*-
from lxml import html
import requests
from collections import OrderedDict
import json
import argparse
import re
import sys
# Adjust MAX_RETRY according to the blocking from tripadvisor
MAX_RETRY = 10
@scrapehero
scrapehero / ebay.py
Created February 12, 2018 08:02
Python 2 code to extract product details from ebay
from lxml import html
import requests
from pprint import pprint
import unicodecsv as csv
from traceback import format_exc
import argparse
def parse(brand):
for i in range(5):
try:
@scrapehero
scrapehero / walmart_store_locator.py
Last active March 26, 2019 11:09
script to locate walmart stores
import csv
import requests
import json
import argparse
import traceback
def locate_stores(zip_code):
"""
Function to locate walmart stores
"""
@scrapehero
scrapehero / expedia.py
Last active May 17, 2019 14:35
Python 2 code to extract flight details from expedia.com
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)
@scrapehero
scrapehero / tripadvisor_hotel.py
Last active September 11, 2019 16:46
Python 3 Code to extrtact details of hotels from tripadvisor
# -*- coding: utf-8 -*-
from lxml import html
import requests
from collections import OrderedDict
import json
import argparse
import re
import sys
# Adjust MAX_RETRY according to the blocking from tripadvisor
MAX_RETRY = 10
@scrapehero
scrapehero / amazon_reviews.py
Last active December 11, 2019 16:09
Python Code to Scrape Customer Reviews from Amazon.com. Read more on https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / reddit.json
Last active February 6, 2020 13:20
Output for Reddit
{
"no_of_upvotes": "128k",
"no_of_comments": "1967 comments",
"comments": [
{
"permalink": "https://www.reddit.com/r/pics/comments/87bb1m/an_iranian_teacher_visits_his_cancerstricken/dwbsg5a/",
"commenter": "felixfelix",
"comment_text": "I had a university professor who visited one of his former students in the hospital daily. The student was dying of AIDS and his entire family had disowned him. The professor had only known him for one class."
},
{
@scrapehero
scrapehero / walmart.json
Created January 17, 2019 05:55
Sitemap to extract product data and pricing from Walmart using web scraper.io extension
{
"_id": "walmart",
"startUrl": [
"https://www.walmart.com/browse/home-improvement/electrical/1072864_1067619?povid=1072864+%7C+2018-05-02+%7C+Flyout_Electrical"
],
"selectors": [
{
"id": "product",
"type": "SelectorElementClick",
"parentSelectors": [
#!/usr/bin/env python
from re import findall,sub
from lxml import html
from time import sleep
from selenium import webdriver
from pprint import pprint
from xvfbwrapper import Xvfb
def parse(url):
searchKey = "Las Vegas" # Change this to your city
@scrapehero
scrapehero / expedia.py
Last active August 24, 2020 04:32
Python 3 code to extract the flight schedules and prices for a source and destination pair.
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)