Skip to content

Instantly share code, notes, and snippets.

@mostafam
Created June 3, 2020 01:24
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save mostafam/7e04a31b506916ddafd3733f36997685 to your computer and use it in GitHub Desktop.
Save mostafam/7e04a31b506916ddafd3733f36997685 to your computer and use it in GitHub Desktop.
Final Take!
import pandas as pd
import numpy as np
from uszipcode import SearchEngine
import sqlite3
search = SearchEngine(db_file_dir="/tmp/db")
conn = sqlite3.connect("/tmp/db/simple_db.sqlite")
pdf = pd.read_sql_query("select zipcode, lat, lng, radius_in_miles,
bounds_west, bounds_east, bounds_north, bounds_south from
simple_zipcode",conn)
brd_pdf = sc.broadcast(pdf)
@udf('string')
def get_zip_b(lat, lng):
pdf = brd_pdf.value
try:
out = pdf[(pdf['bounds_north']>=lat) &
(pdf['bounds_south']<=lat) &
(pdf['bounds_west']<=lng) &
(pdf['bounds_east']>=lng) ]
dist = [None]*len(out)
for i in range(len(out)):
dist[i] = (out['lat'].iloc[i]-lat)**2 + (out['lng'].iloc[i]-lng)**2
zip = out['zipcode'].iloc[dist.index(min(dist))]
except:
zip = 'bad'
return zip
output_df = df.withColumn('zip', get_zip_b(col("latitude"),col("longitude"))).cache()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment