Skip to content

Instantly share code, notes, and snippets.

View dmil's full-sized avatar

Dhrumil Mehta dmil

View GitHub Profile
@dmil
dmil / api.py
Created September 12, 2015 04:28
"""
Stub for script to query API
"""
from urllib2 import Request, urlopen, URLError
import json
import logging
from time import sleep
URL_STUB = "http://website.com/api?q="
from nameparser import HumanName
import codecs
import csv
# READ NAMES AND FORMAT PROPERLY
namefile = codecs.open('names.txt', encoding='utf-16')
names = {}
for name in namefile:
names[name.strip()] = str(HumanName(name.strip()))
namefile.close()
@dmil
dmil / pdfscraper
Created March 28, 2015 20:28
Scrape PDFS
"""
Stub for scraping-related jobs
CSS Selectors Reference: http://www.w3schools.com/cssref/css_selectors.asp
"""
import requests, lxml.html
import re
# Select the element using a CSS Selector
@dmil
dmil / scraper_stub
Created March 28, 2015 19:56
Scraper Stub
"""
Stub for scraping-related jobs
CSS Selectors Reference: http://www.w3schools.com/cssref/css_selectors.asp
"""
import requests, lxml.html
# Grab HTML from page
response = requests.get('https://www.google.com/')
@dmil
dmil / scraper_stub
Created March 28, 2015 19:56
Scraper Stub
"""
Stub for scraping-related jobs
CSS Selectors Reference: http://www.w3schools.com/cssref/css_selectors.asp
"""
import requests, lxml.html
# Grab HTML from page
response = requests.get('https://www.google.com/')