Skip to content

Instantly share code, notes, and snippets.

View joekane3's full-sized avatar
🏴󠁧󠁢󠁷󠁬󠁳󠁿

joekane3

🏴󠁧󠁢󠁷󠁬󠁳󠁿
View GitHub Profile
@joekane3
joekane3 / haversine_pyspark.py
Created October 4, 2018 11:52
haversine distance using pyspark
def haversine_spark(df , col_lat1, col_lon1, col_lat2, col_lon2, col_name="distance"):
df = df.withColumn("a", F.pow(F.sin(F.radians(col_lat2 - col_lat1) / 2), 2) + F.cos(F.radians(col_lat1)) * F.cos(F.radians(col_lat2)) * F.pow(F.sin(F.radians(col_lon2 - col_lon1) / 2), 2))
df = df.withColumn(col_name, F.atan2(F.sqrt(df["a"]), F.sqrt(-df["a"] + 1)) * 2 * 6371)
return df.drop("a")