Skip to content

Instantly share code, notes, and snippets.

View chribsen's full-sized avatar

Christian Danielsen chribsen

View GitHub Profile
@chribsen
chribsen / add_mutual_friends_count.py
Created March 27, 2016 10:19
Gets the friend pairs from table derived_friend_list and finds their mutual friends. The mutual friend counts are found by taking the size of the intersection of their friend set.
import psycopg2
import psycopg2.extras
from collections import defaultdict
conn_dtu = psycopg2.connect(<connstring>)
cur_dtu = conn_dtu.cursor(cursor_factory=psycopg2.extras.DictCursor)
cur_dtu.execute("""SELECT user_a, user_b FROM derived_friend_list""")
@chribsen
chribsen / compute_camp_clusters.py
Created March 27, 2016 12:13
Extremely inefficient coordinate clustering, eps=20m, min_sample=30.
from __future__ import division
import numpy as np
from sklearn.cluster import DBSCAN, KMeans
import psycopg2
from collections import defaultdict
import folium
EARTH_CIRCUMFERENCE = 6378137 # earth circumference in meters
colors = ['green', 'red', 'yellow', 'blue', 'black', 'white', 'gray', 'pink', 'cloud']
@chribsen
chribsen / add_places_jaccard_similarity.py
Created March 27, 2016 12:56
Computes the jaccard similarity of the places visited by each user for all user pairs.
import psycopg2
from scipy.spatial.distance import jaccard
conn_dtu = psycopg2.connect(<connstring>)
cur_dtu = conn_dtu.cursor()
# Retrieve places visited for each user in the user pair and aggregate them into an array.
cur_dtu.execute("""
select count(*) FROM (select user_a, ( SELECT array_agg(place_id) FROM derived_places_visited WHERE user_id=dff.user_a) as places_a,
user_b, ( SELECT array_agg(place_id) FROM derived_places_visited WHERE user_id=dff.user_b) as places_b from derived_friend_features as dff
@chribsen
chribsen / create_feature_same_camp_score.sql
Created April 4, 2016 16:24
Creates the feature same_camp_score.
-- Creates the feature same camp score by looking in the table tmp_user_camp, that consists of the most frequent visited camp site.
update derived_friend_features set same_camp_score=FALSE;
update derived_friend_features AS fr set same_camp_score = TRUE
where (select title FROM tmp_user_camp WHERE user_id = fr.user_a limit 1) = (select title FROM tmp_user_camp WHERE user_id = fr.user_b limit 1)
;
@chribsen
chribsen / create_feature_same_genre_score.sql
Created April 4, 2016 16:31
Creates the feature same_genre_score
update derived_friend_features set same_genre_score=FALSE;
update derived_friend_features AS fr set same_genre_score = TRUE
where (select itunes_genre FROM tmp_user_genre WHERE user_id = fr.user_a limit 1) = (select itunes_genre FROM tmp_user_genre WHERE user_id = fr.user_b limit 1)
@chribsen
chribsen / rf-square-km.sql
Last active May 3, 2016 10:23
Roskilde Festival size
-- The size Roskilde Festival in Square Kilometers
-- The points used are taken from the max points received in the dataset from IBM / CBS dataset.
SELECT (st_distance_sphere(st_makepoint(12.106277, 55.627567), st_makepoint(12.058901,55.627567) ) /1000) *
(st_distance_sphere(st_makepoint(12.058901,55.627567), st_makepoint(12.058901, 55.609975) )/1000)
@chribsen
chribsen / get-artist-from-spotify.py
Created June 1, 2016 21:54
Script retrieves genres based on PostgreSQL database called artist_name, that contains artist names
import psycopg2
import requests
import time
conn_dtu = psycopg2.connect("<connection_string>")
cur_dtu = conn_dtu.cursor()
cur_dtu.execute("select artist_name from data_lineup where spotify_id is null")
search_url = 'https://api.spotify.com/v1/search'
@chribsen
chribsen / create_city_connections.sql
Created June 9, 2016 17:07
Creates a table containing city connections for dyads
select ST_MAKELINE(ARRAY[St_Setsrid(st_centroid(dc1.geom),4326), st_setsrid(st_centroid(dc2.geom),4326)]), dc1.name_2 as source,dc2.name_2 as target, (sum(dd.is_friend_ratio)/(dc1.rf_population+dc2.rf_population)::float) as weight, count(*) as cou
into presentation_city_connections
from derived_dyads dd
inner join user_residence ur1 on dd.user_a=ur1.user_id
inner join dk_cities dc1 on dc1.gid=ur1.city_id
inner join user_residence ur2 on dd.user_b=ur2.user_id
inner join dk_cities dc2 on dc2.gid=ur2.city_id
where dc1.name_2!=dc2.name_2
group by dc1.name_2,dc2.name_2,dc1.rf_population, dc2.rf_population,dc1.geom, dc2.geom;
@chribsen
chribsen / roskilde-2015-area-size.kml
Created July 5, 2016 12:21
A KML representation of a polygon that captures the approximate size of Roskilde Festival 2015. The area is sized 2.81 km2, which is slightly larger than the "approximately 2.5km2" that Roskilde Festival reports on their official site: http://www.roskilde-festival.dk/more/press/fun-facts. We believe that this is a more accurate measure of the tr…
<?xml version="1.0" encoding="UTF-8"?>
<kml xmlns="http://www.opengis.net/kml/2.2" xmlns:gx="http://www.google.com/kml/ext/2.2" xmlns:kml="http://www.opengis.net/kml/2.2" xmlns:atom="http://www.w3.org/2005/Atom">
<Document>
<name>Area Calculator</name>
<description>2814731.04 m² | 2.81 km² | 695.54 acres | 281.47 hectares | 30297512.88 feet² |1.09 square miles | 0.82 square nautical miles</description>
<gx:balloonVisibility>1</gx:balloonVisibility>
<StyleMap id="msn_ylw-pushpin">
<Pair>
<key>normal</key>
<styleUrl>#sn_ylw-pushpin</styleUrl>
@chribsen
chribsen / get-indices-on-table.sql
Created November 12, 2016 09:44
Get indices on a specific table (PostgreSQL)
CREATE OR REPLACE FUNCTION getIndices(_table_name varchar)
RETURNS TABLE(table_name varchar, index_name varchar, column_name varchar) AS $$
BEGIN
RETURN QUERY
select
t.relname::varchar as table_name,
i.relname::varchar as index_name,
a.attname::varchar as column_name
from
pg_class t,