Instantly share code, notes, and snippets.

View news-extractor.json
[
{
"author": [
"WTOP Staff"
],
"classification": [
"{'category': 'Sports', 'match_percent': 99.94},{'category': 'Business', 'match_percent': 0.06}"
],
"content": "WASHINGTON – The general manager of the Washington Nationals has denied rumors that the team is thinking of trading superstar outfielder Bryce Harper. On Tuesday morning, The Washington Post reported that after a sleepless night, Nationals General Manager Mike Rizzo told them “Bryce is not going anywhere. I believe in this team.” The denial caps off a 12-hour span in which it was reported that the team was entertaining offers for Harper and moving from potential contenders to potential sellers as the trade deadline approached. The deadline for nonwaiver trades is 4 p.m. Tuesday. After that, any players involved in a trade must be waived, with any team having the opportunity to pick them up if they’ll take on the player’s contract. MLB.com reported the Nationals had made it known to other teams that Harper is indeed
View api_response.json
[
{
"small_description": "Pack of eight 9 Volt Alkaline Batteries - 3-year shelf life so you can store for emergencies or use immediately - Works with a variety of devices including digital cameras, game controllers, toys, and clocks; do not attempt to recharge - Ships in Certified Frustration-Free Packaging",
"average_rating": 4.1,
"url": "https://www.amazon.com/dp/B00MH4QM1S",
"product_information": {
"Product Dimensions": "10 x 5 x 3 inches",
"Amazon Best Sellers Rank": " #30 in Health & Household #1 in Health & Household > House Supplies > Household Batteries > 9V #18 in Health & Household > Sales & Deals",
"International Shipping": "This item can be shipped to select countries outside of the U.S.",
"ASIN": "B00MH4QM1S",
View amazon_seller_listing_scrape.py
import requests
from lxml import html
from lxml.etree import ParserError
import json
from time import sleep
import argparse
import unicodecsv as csv
import traceback
View reddit.json
{
"no_of_upvotes": "128k",
"no_of_comments": "1967 comments",
"comments": [
{
"permalink": "https://www.reddit.com/r/pics/comments/87bb1m/an_iranian_teacher_visits_his_cancerstricken/dwbsg5a/",
"commenter": "felixfelix",
"comment_text": "I had a university professor who visited one of his former students in the hospital daily. The student was dying of AIDS and his entire family had disowned him. The professor had only known him for one class."
},
{
View walmart_store_locator.py
import csv
import requests
import json
import argparse
import traceback
def locate_stores(zip_code):
"""
Function to locate walmart stores
"""
View amazon_reviews.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
View tripadvisor_hotel.py
# -*- coding: utf-8 -*-
from lxml import html
import requests
from collections import OrderedDict
import json
import argparse
import re
def parse(url):
print ("Fetching %s"%url)
View yelp.py
from lxml import html
import unicodecsv as csv
import requests
from exceptions import ValueError
from time import sleep
import re
import argparse
def parse(url):
headers = {'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/42.0.2311.90 Safari/537.36'}
View yahoo_finance.py
from lxml import html
import requests
from exceptions import ValueError
from time import sleep
import json
import argparse
from collections import OrderedDict
from time import sleep
def parse(ticker):
View expedia.py
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)