Skip to content

Instantly share code, notes, and snippets.

View chribsen's full-sized avatar

Christian Danielsen chribsen

View GitHub Profile
@chribsen
chribsen / compute_camp_clusters.py
Created March 27, 2016 12:13
Extremely inefficient coordinate clustering, eps=20m, min_sample=30.
from __future__ import division
import numpy as np
from sklearn.cluster import DBSCAN, KMeans
import psycopg2
from collections import defaultdict
import folium
EARTH_CIRCUMFERENCE = 6378137 # earth circumference in meters
colors = ['green', 'red', 'yellow', 'blue', 'black', 'white', 'gray', 'pink', 'cloud']
@chribsen
chribsen / add_mutual_friends_count.py
Created March 27, 2016 10:19
Gets the friend pairs from table derived_friend_list and finds their mutual friends. The mutual friend counts are found by taking the size of the intersection of their friend set.
import psycopg2
import psycopg2.extras
from collections import defaultdict
conn_dtu = psycopg2.connect(<connstring>)
cur_dtu = conn_dtu.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur_dtu.execute("""SELECT user_a, user_b FROM derived_friend_list""")
from sklearn.cross_validation import cross_val_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.naive_bayes import MultinomialNB
import psycopg2
import psycopg2.extras
from collections import Counter
from sklearn.feature_selection import VarianceThreshold
from sklearn.feature_selection import RFECV
from sklearn import metrics
from sklearn.cross_validation import KFold, StratifiedKFold
@chribsen
chribsen / add_country_features.py
Created March 22, 2016 13:13
Identify features for people pairs in the RF data set.
import psycopg2
conn_dtu = psycopg2.connect('connstring')
cur_dtu = conn_dtu.cursor()
cur_dtu.execute("select user_id, array_agg(country_id) FROM derived_countries_visited where country_id != 53 group by user_id")
lookup = {}
for each in cur_dtu.fetchall():
# (latitude, longitude)
# This is a newer polygon, that bounds DK
dk_polygon = [
(58.44773280389084, 10.3271484375),
(56.51101750495214, 6.591796875),
(55.02802211299252, 7.6904296875),
(54.470037612805754, 12.12890625),
(55.23528803992295, 12.7496337890625),
(55.912272930063615, 12.7001953125),
(56.07510136019262, 12.6068115234375)]
@chribsen
chribsen / compute_camp_clusters.py
Created March 21, 2016 13:32
Computes spatial clusters using haversine distance and plots the output on a map. Meant for use on RF data set. Parameters haven't been tuned yet.
from __future__ import division
import numpy as np
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import StandardScaler
from data import points
import psycopg2
import matplotlib.pyplot as plt
import sys
from collections import defaultdict
import math, json
@chribsen
chribsen / get_places_from_facebook.py
Created March 21, 2016 13:25
Retrieves places in 10km vicinity to Roskilde Festival and stores it in a JSON file.
import requests, json
import time
# Places token
token = '<my_token>'
params = {'access_token': token,
'center': ','.join([str(55.622534), str(12.080729)]),
'distance': 10000,
'type': 'place',
@chribsen
chribsen / get_places_from_google.py
Last active March 21, 2016 13:23
Receives places in 10km vicinity to Roskilde Festival from Google and stores them into a JSON file.
import requests, json
import time
# Places token
token = '<my_token>'
params = {'key': token,
'location': ','.join([str(55.622534),str(12.080729)]),
'radius': 10000}
import psycopg2
from itertools import groupby
conn_dtu = psycopg2.connect("<connstring>")
cur_dtu = conn_dtu.cursor()
cur_dtu.execute("select user_a, user_b, meeting_array from temp_friend_meetings where met_consecutive_days is null")
for idx, each in enumerate(cur_dtu.fetchall()):
meeting_vector = each[2]
-- This SELECT statement counts how many days a pair of users have had at least 1 co-occurence and then bins them by day.
-- Used for visualization.
select s, sum(t.cou) from generate_series(0,10) as s left join
(select user_a, user_b, count(*) as cou from derived_friend_list_days where nr_of_occurences > 0 group by user_a, user_b) as t
on t.cou = s
group by s;