Skip to content

Instantly share code, notes, and snippets.

@scrapehero
scrapehero / linkedin_scraper.py
Last active February 11, 2025 09:34
Python script to scrape a company details from a public company page on LinkedIn.com. Written as part of How to Scrape educational post - https://www.scrapehero.com/tutorial-scraping-linkedin-for-public-company-data/
from lxml import html
import csv, os, json
import requests
from exceptions import ValueError
from time import sleep
def linkedin_companies_parser(url):
for i in range(5):
try:
@scrapehero
scrapehero / oddsportal.json
Last active September 29, 2024 13:51
Sitemap to extract details of fixtures of England's premier league from the top bookmakers using webscraper.io chrome extension
{
"_id":"oddsportal",
"startUrl":[
"https://www.oddsportal.com/soccer/england/premier-league/"
],
"selectors":[
{
"id":"match",
"type":"SelectorLink",
"parentSelectors":[
import pytesseract
import sys
import argparse
try:
import Image
except ImportError:
from PIL import Image
from subprocess import check_output
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Written as part of https://www.scrapehero.com/how-to-scrape-amazon-product-reviews-using-python/
from lxml import html
import json
import requests
import json,re
from dateutil import parser as dateparser
from time import sleep
@scrapehero
scrapehero / indeed.json
Last active May 3, 2024 15:21
Sitemap to extract job details based on a job and location from Indeed using webscraper.io chrome extension
{
"_id":"indeed",
"startUrl":[
"https://www.indeed.com/jobs?q=accountant&l=Los+Angeles,+CA&rbl=Anaheim,+CA&jlid=a05ccab40146becb&jt=fulltime"
],
"selectors":[
{
"id":"listings",
"type":"SelectorElement",
"parentSelectors":[
@scrapehero
scrapehero / ebay_scraper.py
Last active April 21, 2024 18:32
Python 3 Code to scrape prices from ebay.com
import argparse
from pprint import pprint
from traceback import format_exc
import requests
import unicodecsv as csv
from lxml import html
def parse(brand):
@scrapehero
scrapehero / yahoo_finance.py
Last active January 22, 2024 21:46
Python 3 code to extract stock market data from yahoo finance
from lxml import html
import requests
from time import sleep
import json
import argparse
from collections import OrderedDict
from time import sleep
def parse(ticker):
url = "http://finance.yahoo.com/quote/%s?p=%s"%(ticker,ticker)
@scrapehero
scrapehero / zillow.py
Last active December 13, 2023 16:05
Python 3 script to find real estate listings of properties up for sale on zillow.com
from lxml import html
import requests
import unicodecsv as csv
import argparse
import json
def clean(text):
if text:
return ' '.join(' '.join(text).split())
@scrapehero
scrapehero / cars.json
Created May 2, 2019 13:31
A quick and easy tutorial to scrape car details from cars.com based on location, new/used cars, deal rating, year, make, model,and trim.
{
"_id":"cars",
"startUrl":[
"https://www.cars.com/for-sale/searchresults.action/?mdId=22162&mkId=20053&page=1&perPage=20&rd=10&searchSource=GN_REFINEMENT&shippable-dealers-checkbox=true&showMore=false&sort=relevance&stkTypId=28881&trId=24731&trId=24209&yrId=35797618&yrId=36362520&zc=20005&localVehicles=false"
],
"selectors":[
{
"id":"car_links",
"type":"SelectorElementClick",
"parentSelectors":[
@scrapehero
scrapehero / fandango.py
Created February 12, 2018 07:46
Python 3 Code for scraping movie details from fandango.com
from lxml import html, etree
import datetime
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
# from exceptions import ValueError