Skip to content

Instantly share code, notes, and snippets.

View jaklinger's full-sized avatar

Joel Klinger jaklinger

View GitHub Profile
@jaklinger
jaklinger / google_map_query.py
Last active September 11, 2017 11:30
Snippet giving an example of how to query the Google Maps API, with proxy information.
import requests
import json
import time
'''Decorator for retrying up to 10 times'''
def retry(exception):
def outer_retry(f):
def inner_retry(*args,attempt=0):
n=0
while True:
@jaklinger
jaklinger / uae_pdf_matching.py
Created November 10, 2017 14:44
Matching titles to free text in a PDF
focus_areas = ['Education Innovation and Technology',
'Health Information Technology and Bioinformatics',
'Public Health, Non-Communicable Diseases and Wellness',
'Biotechnology and Genomics',
'Water Management and Economics',
'Solar and Alternative Energy Technology Systems',
'Space Sciences',
'Cubesats and Nanosatellites',
'Cybersecurity',
'Semiconductor Process Development',
@jaklinger
jaklinger / point_in_polygon_uk_ttwas.py
Created January 5, 2018 15:55
Example of dissolving lat/lon points in different projection systems, using UK TTWA shape files
# Geometry, shapefiles and projections
import fiona
from shapely.geometry import shape
from shapely.geometry import Point
import pyproj
# Generate a function to create a UK East/North point from Lon/Lat
wgs84 = pyproj.Proj(init = 'epsg:4326')
ukgrid = pyproj.Proj(init = 'epsg:27700')
EnPoint = lambda lon, lat : Point(*pyproj.transform(wgs84, ukgrid, lon, lat))
@jaklinger
jaklinger / latlon_pairs_to_heatmap.py
Last active January 10, 2018 17:04
Go from lat lon pairs to a geographical heatmap
# Python tools
from matplotlib import pyplot as plt
import pandas as pd
import math
import numpy as np
from collections import defaultdict
from functools import partial
from mpl_toolkits.axes_grid1.inset_locator import inset_axes
import matplotlib as mpl
@jaklinger
jaklinger / example_choropleth_world.py
Last active January 24, 2018 13:39
Example of drawing choropleth using Matplotlib Basemap with Natural Earth boundary data
# Uncomment this for notebooks
# %matplotlib inline
import matplotlib as mpl
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
from mpl_toolkits.basemap import Basemap
import numpy as np
# Country codes --> alpha
@jaklinger
jaklinger / guess_gender.py
Created January 26, 2018 15:49
Gender guessing from xlsx
import gender_guesser.detector as gender
import pandas as pd
path_to_file = "~/Downloads/test_names.xlsx"
d = gender.Detector()
data = pd.read_excel(path_to_file)
for name in data["names"].values:
print(d.get_gender(name))
@jaklinger
jaklinger / js_iframe_scrape.py
Last active March 28, 2018 12:36
Example of scraping in JS-redirected iframe
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions
import os
import time
def wait_and_find(driver, element_id, load_time):
time.sleep(load_time/3)
@jaklinger
jaklinger / companies_house_match.py
Created April 26, 2018 08:31
Example of matching organisations to companies house, by name and address including fuzzy matchign
# Note superfuzz can be found here: https://github.com/jaklinger/nesta_toolbox/blob/master/sandbox/jaklinger/superfuzz/superfuzz.py
import requests
from retrying import retry
import re
import time
from collections import Counter
from superfuzz.superfuzz import superfuzz
from fuzzywuzzy import fuzz
from fuzzywuzzy import process as fuzzy_process
@jaklinger
jaklinger / mag_cheatsheet.sql
Last active May 8, 2018 10:35
MAG postgresql jsonb cheatsheet
-- Selecting nested elements (e.g. author names)
select author -> 'name' as author_name
from microsoft_academic_graph
cross join jsonb_array_elements(microsoft_academic_graph.paper -> 'authors') author
limit 1;
-- Finding specific papers by author
select paper->'title', paper->'authors'
from microsoft_academic_graph
where paper->'authors' @> '[{"name":"John Smith"}]'
@jaklinger
jaklinger / execute_IN_in_chunks.py
Created May 11, 2018 13:46
Execute SELECT statements in chunks with SqlAlchemy
from sqlalchemy import create_engine
from sqlalchemy import text as sql_text
# Stolen from https://stackoverflow.com/a/434328/1571593
def chunker(seq, size):
return (seq[pos:pos + size] for pos in range(0, len(seq), size))
# Execute {SELECT ... IN ...} in chunks
'''Notes:
1) query has to be of the form: