Skip to content

Instantly share code, notes, and snippets.

@scrapehero
scrapehero / geocoder.py
Created Mar 30, 2017
Python script to parse unstructured addresses
View geocoder.py
from requests import get
from pprint import pprint
from json import dump
from csv import QUOTE_ALL, DictWriter
API_KEY = 'XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX'
def address_resolver(json):
final = {}
if json['results']:
data = json['results'][0]
for item in data['address_components']:
@scrapehero
scrapehero / zillow.py
Last active Aug 23, 2022
Python 3 script to find real estate listings of properties up for sale on zillow.com
View zillow.py
from lxml import html
import requests
import unicodecsv as csv
import argparse
import json
def clean(text):
if text:
return ' '.join(' '.join(text).split())
View amazon_product.py
from lxml import html
import csv
import os
import requests
from exceptions import ValueError
from time import sleep
from random import randint
def parse(url):
headers = {
View amazon_review_scraper.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / nasdaq_finance.py
Last active Jul 3, 2022
Script to scrape financial data from NASDAQ
View nasdaq_finance.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
from lxml import html
import requests
from time import sleep
import json
import argparse
from random import randint
@scrapehero
scrapehero / amazon_reviews.py
Last active Apr 23, 2022
Python 3 code to extract amazon reviews
View amazon_reviews.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
from json import dump,loads
from requests import get
import json
from re import sub
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / yelp_search.py
Last active Apr 7, 2022
Python 3 code to extract business listings from Yelp.com
View yelp_search.py
from lxml import html
import unicodecsv as csv
import requests
from time import sleep
import re
import argparse
import json
def parse(url):
@scrapehero
scrapehero / amazon-reviews.json
Last active Feb 25, 2022
Sitemap to extract data of review listings of a single product on Amazon.com using Web Scraper Chrome Extension. Instructions - https://www.scrapehero.com/amazon-review-scraper/
View amazon-reviews.json
{
"_id": "amazon_reviews",
"startUrl": [
"https://www.amazon.com/Screen-Protector-SPARIN-Tempered-Glass/product-reviews/B013JZCAZK/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
],
"selectors": [
{
"id": "review",
"type": "SelectorElement",
"parentSelectors": [
@scrapehero
scrapehero / amazon_bestseller.json
Created Nov 2, 2018
JSON Sitemap to scrape amazon bestseller listings from Amazon using Web Scraper Chrome Extension
View amazon_bestseller.json
{
"_id":"amazon_pet",
"startUrl":[
"https://www.amazon.com/Best-Sellers-Pet-Supplies/zgbs/pet-supplies/ref=zg_bs_nav_0"
],
"selectors":[
{
"id":"product",
"type":"SelectorElement",
"parentSelectors":[
@scrapehero
scrapehero / zillow.py
Created May 9, 2017
Python script to find real estate listings of properties up for sale on zillow.com
View zillow.py
from lxml import html
import requests
import unicodecsv as csv
from exceptions import ValueError
import argparse
def parse(zipcode,filter=None):
if filter=="newest":
url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/days_sort".format(zipcode)