This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
<link rel="stylesheet" type="text/css" href="https://cdn.rawgit.com/bmabey/pyLDAvis/files/ldavis.v1.0.0.css"> | |
<div id="ldavis_el63961397626142135127132307212"></div> | |
<script type="text/javascript"> | |
var ldavis_el63961397626142135127132307212_data = {"mdsDat": {"x": [-0.3587306548696021, 0.16801855841761804, 0.06925668495842693, 0.10976188493655777, 0.1829106064705235, -0.056332094123445725, -0.17482877254600862, 0.08717383123026266, -0.02723004447433231], "y": [0.07862161348242461, 0.1404744503956421, 0.10768353624336308, -0.0036550338132977896, 0.0476740214055354, 0.17427257523317832, -0.01825299342037908, -0.16515462141639892, -0.3616635481100678], "topics": [1, 2, 3, 4, 5, 6, 7, 8, 9], "cluster": [1, 1, 1, 1, 1, 1, 1, 1, 1], "Freq": [5.898770332336426, 15.565165519714355, 9.173327445983887, 17.072202682495117, 13.820663452148438, 10.766364097595215, 10.82964038848877, 10.465057373046875, 6.408810615539551]}, "tinfo": {"Category": ["Default", "Default", "Default", "Default", "Default", "Default", "Default |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_id":"amazon_bestsellers", | |
"startUrl":[ | |
"https://www.amazon.com/Best-Sellers-Pet-Supplies/zgbs/pet-supplies/ref=zg_bs_nav_0" | |
], | |
"selectors":[ | |
{ | |
"id":"product", | |
"type":"SelectorElement", | |
"parentSelectors":[ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"_id":"twitter_feed", | |
"startUrl":[ | |
"https://twitter.com/search?l=&q=web%20scraping%20since%3A2018-10-01%20until%3A2018-10-05&src=typd&lang=en" | |
], | |
"selectors":[ | |
{ | |
"id":"tweet", | |
"type":"SelectorElementScroll", | |
"parentSelectors":[ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"author": [ | |
"WTOP Staff" | |
], | |
"classification": [ | |
"{'category': 'Sports', 'match_percent': 99.94},{'category': 'Business', 'match_percent': 0.06}" | |
], | |
"content": "WASHINGTON – The general manager of the Washington Nationals has denied rumors that the team is thinking of trading superstar outfielder Bryce Harper. On Tuesday morning, The Washington Post reported that after a sleepless night, Nationals General Manager Mike Rizzo told them “Bryce is not going anywhere. I believe in this team.” The denial caps off a 12-hour span in which it was reported that the team was entertaining offers for Harper and moving from potential contenders to potential sellers as the trade deadline approached. The deadline for nonwaiver trades is 4 p.m. Tuesday. After that, any players involved in a trade must be waived, with any team having the opportunity to pick them up if they’ll take on the player’s contract. MLB.com reported the Nationals had made it known to other teams that Harper is indeed |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
[ | |
{ | |
"small_description": "Pack of eight 9 Volt Alkaline Batteries - 3-year shelf life so you can store for emergencies or use immediately - Works with a variety of devices including digital cameras, game controllers, toys, and clocks; do not attempt to recharge - Ships in Certified Frustration-Free Packaging", | |
"average_rating": 4.1, | |
"url": "https://www.amazon.com/dp/B00MH4QM1S", | |
"product_information": { | |
"Product Dimensions": "10 x 5 x 3 inches", | |
"Amazon Best Sellers Rank": " #30 in Health & Household #1 in Health & Household > House Supplies > Household Batteries > 9V #18 in Health & Household > Sales & Deals", | |
"International Shipping": "This item can be shipped to select countries outside of the U.S.", | |
"ASIN": "B00MH4QM1S", |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import requests | |
from lxml import html | |
from lxml.etree import ParserError | |
import json | |
from time import sleep | |
import argparse | |
import unicodecsv as csv | |
import traceback | |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
{ | |
"no_of_upvotes": "128k", | |
"no_of_comments": "1967 comments", | |
"comments": [ | |
{ | |
"permalink": "https://www.reddit.com/r/pics/comments/87bb1m/an_iranian_teacher_visits_his_cancerstricken/dwbsg5a/", | |
"commenter": "felixfelix", | |
"comment_text": "I had a university professor who visited one of his former students in the hospital daily. The student was dying of AIDS and his entire family had disowned him. The professor had only known him for one class." | |
}, | |
{ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
import csv | |
import requests | |
import json | |
import argparse | |
import traceback | |
def locate_stores(zip_code): | |
""" | |
Function to locate walmart stores | |
""" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/ | |
from lxml import html | |
from json import dump,loads | |
from requests import get | |
import json | |
from re import sub | |
from dateutil import parser as dateparser | |
from time import sleep |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
# -*- coding: utf-8 -*- | |
from lxml import html | |
import requests | |
from collections import OrderedDict | |
import json | |
import argparse | |
import re | |
import sys | |
# Adjust MAX_RETRY according to the blocking from tripadvisor | |
MAX_RETRY = 10 |