Skip to content

Instantly share code, notes, and snippets.

@epave
epave / hh_month_hasher.py
Created August 29, 2015 16:35
HH data month hasher
def extract_publish_month(col):
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
import numpy as np
class MonthExtractor():
def __init__(self):
pass
@epave
epave / tfidf_hh_profarea.py
Created August 29, 2015 16:13
TFiDF for HH profarea data
def tfidf_profarea(col, min_df=100, profarea_only=True):
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.feature_extraction.text import CountVectorizer
from sklearn.pipeline import Pipeline
class IdExtractor():
def __init__(self, field='profarea_id'):
self._field=field
@epave
epave / lifx_run
Last active August 29, 2015 14:22
LIFX control
import os, subprocess,sys
def lifx_run(token, command, params):
url = '"https://api.lifx.com/v1beta1/lights/all/' + command + '"'
if command.startswith('effects'):
data = "color="+params
method = 'POST'
else:
if command == 'power':
data = "state="+params
elif command == 'color':
@epave
epave / adj_rand_score
Created September 15, 2014 17:47
Adjusted Rans Score calculation from sparse matrix
def read_adj_rand_score(fname):
import scipy.sparse as sp
from scipy.misc import comb
def comb2(n):
# the exact version is faster for k == 2: use it by default globally in
# this module instead of the float approximate variant
return comb(n, 2, exact=1)
mat = sp.lil_matrix((5000,5000), dtype=float)
with open(fname) as f:
lines = f.readlines()
@epave
epave / mi-ari-scores
Created June 19, 2014 15:18
Mutual Information and Adjusted Rand Score calculation
def my_scores(labels_one, labels_two):
vprod = len(intersect1d(labels_one.nonzero()[0], labels_two.nonzero()[0]))
if vprod < 100:
return 0.0, 0.0
n_samples = labels_one.shape[0]
nnz_one = labels_one.nnz
nnz_two = labels_two.nnz
contingency = array([[labels_one.shape[0] - nnz_one - nnz_two + vprod, nnz_one - vprod],[nnz_two - vprod, vprod]], dtype='float')
contingency_sum = labels_one.shape[0]#np.sum(contingency)
pi = array([labels_one.shape[0] - nnz_two, nnz_two])#np.sum(contingency, axis=1)