Skip to content

Instantly share code, notes, and snippets.

@cuckookernel
Created July 16, 2018 14:06
Show Gist options
  • Save cuckookernel/e8d0c17a22df02da1cbd5b577f78320f to your computer and use it in GitHub Desktop.
Save cuckookernel/e8d0c17a22df02da1cbd5b577f78320f to your computer and use it in GitHub Desktop.
checkins_by_loc = (checkins_df[['user_id', 'checkin_ts', 'location_id']]
.set_index('location_id') )
chin_pairs = checkins_by_loc.join( checkins_by_loc, lsuffix='_ee', rsuffix='_er' )
pairs_filtered = (chin_pairs[(chin_pairs.checkin_ts_ee < chin_pairs.checkin_ts_er) &
(chin_pairs.user_id_ee != chin_pairs.user_id_er )]
.rename( columns= {"user_id_er" : "stalker",
"user_id_ee" : "stalkee" })
.reset_index()
[["stalkee", "stalker", "location_id"]] )
final_result = ( pairs_filtered.drop_duplicates()
.groupby(["stalkee", "stalker"])
.agg( {"location_id" : "count"})
.rename( columns = { "location_id" : "location_count" } )
.sort_values('location_count', ascending=False) )
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment