Skip to content

Instantly share code, notes, and snippets.

View websitescraper's full-sized avatar

scrapingintelligence websitescraper

View GitHub Profile
@websitescraper
websitescraper / glassdoor.txt
Last active June 2, 2021 09:36
Python 3 code to extract job listings from Glassdoor.com
from lxml import html, etree
import requests
import re
import os
import sys
import unicodecsv as csv
import argparse
import json
def parse(keyword, place):
@websitescraper
websitescraper / expedia.py
Created June 4, 2021 04:13
Python 3 code to extract the flight schedules and prices for a source and destination pair.
import json
import requests
from lxml import html
from collections import OrderedDict
import argparse
def parse(source,destination,date):
for i in range(5):
try:
url = "https://www.expedia.com/Flights-Search?trip=oneway&leg1=from:{0},to:{1},departure:{2}TANYT&passengers=adults:1,children:0,seniors:0,infantinlap:Y&options=cabinclass%3Aeconomy&mode=search&origref=www.expedia.com".format(source,destination,date)
@websitescraper
websitescraper / zillow.py
Created June 9, 2021 08:58
Python 3 script to find real estate listings of properties up for sale on zillow.com
from lxml import html
import requests
import unicodecsv as csv
import argparse
import json
def clean(text):
if text:
return ' '.join(' '.join(text).split())