scrapehero’s gists

## amazon_product.py
from lxml import html
import csv
import os
import requests
from exceptions import ValueError
from time import sleep
from random import randint

def parse(url):
    headers = {

## amazon_review_scraper.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep

## nasdaq_finance.py
#!/usr/bin/env python
# -*- coding: utf-8 -*-

from lxml import html
import requests
from time import sleep
import json
import argparse
from random import randint

## yelp_search.py
from lxml import html
import unicodecsv as csv
import requests
from time import sleep
import re
import argparse
import json


def parse(url):

## amazon-reviews.json
{
    "_id": "amazon_reviews",
    "startUrl": [
      "https://www.amazon.com/Screen-Protector-SPARIN-Tempered-Glass/product-reviews/B013JZCAZK/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
    ],
    "selectors": [
      {
        "id": "review",
        "type": "SelectorElement",
        "parentSelectors": [

## amazon_bestseller.json
{
   "_id":"amazon_pet",
   "startUrl":[
      "https://www.amazon.com/Best-Sellers-Pet-Supplies/zgbs/pet-supplies/ref=zg_bs_nav_0"
   ],
   "selectors":[
      {
         "id":"product",
         "type":"SelectorElement",
         "parentSelectors":[

## zillow.py
from lxml import html
import requests
import unicodecsv as csv
from exceptions import ValueError
import argparse

def parse(zipcode,filter=None):

	if filter=="newest":
		url = "https://www.zillow.com/homes/for_sale/{0}/0_singlestory/days_sort".format(zipcode)

## yelp.py
from lxml import html
import unicodecsv as csv
import requests
from time import sleep
import re
import argparse
import json


def parse(url):

## linkedin_scraper.py
from lxml import html
import csv, os, json
import requests
from exceptions import ValueError
from time import sleep


def linkedin_companies_parser(url):
    for i in range(5):
        try:

## glassdoor.py
from lxml import html, etree
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json

def parse(keyword, place):
	from lxml import html
	import csv
	import os
	import requests
	from exceptions import ValueError
	from time import sleep
	from random import randint

	def parse(url):
	headers = {
	#!/usr/bin/env python
	# -- coding: utf-8 --
	# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
	from lxml import html
	import json
	import requests
	import json,re
	from dateutil import parser as dateparser
	from time import sleep
	from lxml import html
	import unicodecsv as csv
	import requests
	from time import sleep
	import re
	import argparse
	import json


	def parse(url):
	{
	"_id": "amazon_reviews",
	"startUrl": [
	"https://www.amazon.com/Screen-Protector-SPARIN-Tempered-Glass/product-reviews/B013JZCAZK/ref=cm_cr_dp_d_show_all_btm?ie=UTF8&reviewerType=all_reviews"
	],
	"selectors": [
	{
	"id": "review",
	"type": "SelectorElement",
	"parentSelectors": [
	{
	"_id":"amazon_pet",
	"startUrl":[
	"https://www.amazon.com/Best-Sellers-Pet-Supplies/zgbs/pet-supplies/ref=zg_bs_nav_0"
	],
	"selectors":[
	{
	"id":"product",
	"type":"SelectorElement",
	"parentSelectors":[
	from lxml import html
	import csv, os, json
	import requests
	from exceptions import ValueError
	from time import sleep


	def linkedin_companies_parser(url):
	for i in range(5):
	try:
	from lxml import html, etree
	import requests
	import re
	import os
	import sys
	import unicodecsv as csv
	import argparse
	import json

	def parse(keyword, place):