Skip to content

Instantly share code, notes, and snippets.

@chribsen
Created March 22, 2016 13:13
Show Gist options
  • Save chribsen/454992e3e5705f0797c2 to your computer and use it in GitHub Desktop.
Save chribsen/454992e3e5705f0797c2 to your computer and use it in GitHub Desktop.
Identify features for people pairs in the RF data set.
import psycopg2
conn_dtu = psycopg2.connect('connstring')
cur_dtu = conn_dtu.cursor()
cur_dtu.execute("select user_id, array_agg(country_id) FROM derived_countries_visited where country_id != 53 group by user_id")
lookup = {}
for each in cur_dtu.fetchall():
lookup[each[0]] = each[1]
cur_dtu.execute("select user_a, user_b from derived_friend_features")
def country_intersection(places_a, places_b):
return len(set(places_a).intersection(set(places_b)))
i = 0
for each_a, each_b in cur_dtu.fetchall():
try:
points_a = lookup[each_a]
except KeyError:
points_a = []
try:
points_b = lookup[each_b]
except KeyError:
points_b = []
other_country_count = country_intersection(points_a, points_b)
if len(points_a) == 0 and len(points_b) == 0:
cur_dtu.execute("UPDATE derived_friend_features SET country_only_in_dk=TRUE WHERE user_a=%s and user_b=%s",
(each_a, each_b))
if other_country_count > 0:
cur_dtu.execute("UPDATE derived_friend_features SET country_both_visited_fc=TRUE WHERE user_a=%s and user_b=%s",
(each_a, each_b))
if other_country_count == 0 and (len(points_b) > 0 != len(points_b) > 0):
cur_dtu.execute("UPDATE derived_friend_features SET country_one_visited_fc=TRUE WHERE user_a=%s and user_b=%s",
(each_a, each_b))
if len(points_a) > 0 and len(points_a) > 0 and other_country_count == 0:
cur_dtu.execute("UPDATE derived_friend_features SET country_both_visited_different_fc=TRUE WHERE user_a=%s and user_b=%s",
(each_a, each_b))
print(str(i))
if i % 500 == 0:
conn_dtu.commit()
i += 1
conn_dtu.commit()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment