Skip to content

Instantly share code, notes, and snippets.

from lxml import html
import json
import requests
from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
def parse(url):
# url = "https://www.yelp.com/biz/frances-san-francisco"
import requests
import json
import unicodecsv as csv
import argparse
from argparse import RawTextHelpFormatter
def parse(location,sort):
print "Retrieving Location Details"
location_details_url = "https://www.viator.com/ajaxSegmentSearch.jspa?term=%s"%(location)
location_response = requests.get(location_details_url).text
from bs4 import BeautifulSoup
from requests import get
from re import sub
from json import loads,dump
from re import findall,sub
from unicodecsv import QUOTE_ALL,DictWriter
totalJobs = []
def getPage(url):
"""
@scrapehero
scrapehero / yelp_reviews_parser.py
Last active February 9, 2018 08:11
Python 3 code to extract business details from a restaurant on Yelp.com
from lxml import html
import unicodecsv as csv
import requests
# from exceptions import ValueError
from time import sleep
import re,urllib
import argparse
import traceback
def parse(url):
@scrapehero
scrapehero / fandango.py
Last active February 12, 2018 07:48
Python 2 code to extract movie details from fandango.com
from lxml import html, etree
import datetime
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
from exceptions import ValueError
@scrapehero
scrapehero / target.py
Last active February 12, 2018 08:07
Python 2 code to extract sotre locations from Target.com
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of
import requests
from lxml import html
import re
from time import time
import json
import argparse
@scrapehero
scrapehero / fandango.py
Last active March 8, 2018 21:22
Scraper to extract movie details and showtimes from Fandango.com
from selenium.webdriver.common.keys import Keys
from time import sleep
import unicodecsv as csv
import argparse
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
@scrapehero
scrapehero / api_response.json
Last active October 12, 2018 12:49
The data gathered from the API response
[
{
"small_description": "Pack of eight 9 Volt Alkaline Batteries - 3-year shelf life so you can store for emergencies or use immediately - Works with a variety of devices including digital cameras, game controllers, toys, and clocks; do not attempt to recharge - Ships in Certified Frustration-Free Packaging",
"average_rating": 4.1,
"url": "https://www.amazon.com/dp/B00MH4QM1S",
"product_information": {
"Product Dimensions": "10 x 5 x 3 inches",
"Amazon Best Sellers Rank": " #30 in Health & Household #1 in Health & Household > House Supplies > Household Batteries > 9V #18 in Health & Household > Sales & Deals",
"International Shipping": "This item can be shipped to select countries outside of the U.S.",
"ASIN": "B00MH4QM1S",
@scrapehero
scrapehero / news-extractor.json
Created October 16, 2018 12:49
Data gathered from the API response
[
{
"author": [
"WTOP Staff"
],
"classification": [
"{'category': 'Sports', 'match_percent': 99.94},{'category': 'Business', 'match_percent': 0.06}"
],
"content": "WASHINGTON – The general manager of the Washington Nationals has denied rumors that the team is thinking of trading superstar outfielder Bryce Harper. On Tuesday morning, The Washington Post reported that after a sleepless night, Nationals General Manager Mike Rizzo told them “Bryce is not going anywhere. I believe in this team.” The denial caps off a 12-hour span in which it was reported that the team was entertaining offers for Harper and moving from potential contenders to potential sellers as the trade deadline approached. The deadline for nonwaiver trades is 4 p.m. Tuesday. After that, any players involved in a trade must be waived, with any team having the opportunity to pick them up if they’ll take on the player’s contract. MLB.com reported the Nationals had made it known to other teams that Harper is indeed
@scrapehero
scrapehero / vis.html
Created October 31, 2018 05:36
HTML code for Amazon Echo
<link rel="stylesheet" type="text/css" href="https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css">
<div id="ldavis_el63961397626142135127132307212"></div>
<script type="text/javascript">
var ldavis_el63961397626142135127132307212_data = {"mdsDat": {"x": [-0.3587306548696021, 0.16801855841761804, 0.06925668495842693, 0.10976188493655777, 0.1829106064705235, -0.056332094123445725, -0.17482877254600862, 0.08717383123026266, -0.02723004447433231], "y": [0.07862161348242461, 0.1404744503956421, 0.10768353624336308, -0.0036550338132977896, 0.0476740214055354, 0.17427257523317832, -0.01825299342037908, -0.16515462141639892, -0.3616635481100678], "topics": [1, 2, 3, 4, 5, 6, 7, 8, 9], "cluster": [1, 1, 1, 1, 1, 1, 1, 1, 1], "Freq": [5.898770332336426, 15.565165519714355, 9.173327445983887, 17.072202682495117, 13.820663452148438, 10.766364097595215, 10.82964038848877, 10.465057373046875, 6.408810615539551]}, "tinfo": {"Category": ["Default", "Default", "Default", "Default", "Default", "Default", "Default