Skip to content

Instantly share code, notes, and snippets.

Eric A. Scuccimarra escuccim

View GitHub Profile
@escuccim
escuccim / vecs_from_vals.py
Last active Nov 17, 2019
eigenvectors from eigenvalues
View vecs_from_vals.py
# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues
# from https://arxiv.org/pdf/1908.03795.pdf
import numpy as np
# use numpy to calculate the eigen values
e_vals = np.linalg.eigvals(mat)
eigen_vectors = np.zeros_like(mat)
n, _ = mat.shape
@escuccim
escuccim / purity.py
Created Feb 26, 2019
Cluster Purity
View purity.py
import numpy as np
from scipy import stats
## Cluster purity
def purity(truth, pred):
cluster_purities = []
# loop through clusters and calculate purity for each
for pred_cluster in np.unique(pred):
filter_ = pred == pred_cluster
gt_partition = truth[filter_]
View PatternFusion.py
# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a
# to mine the initial set of candidates for the pool
import numpy as np
import pandas as pd
import random
from itertools import combinations
class PatternFusion():
def __init__(self, min_support=10):
View gist:3ad7f9b4b538384602ef6df0462564d8
# DB is dictionary with tuple of pattern as key and support as value
# P1 and P2 are lists or arrays of the keys to compare
# this will only work if one of P1 and P2 is a subset of the other
def pattern_distance(P1, P2, db):
P1_support = db[tuple(P1)]
P2_support = db[tuple(P2)]
# if P1 is a subset of P2 then we just use their supports
if set(P1).issubset(set(P2)):
return 1 - (P2_support / P1_support)
@escuccim
escuccim / fpgrowth.py
Last active Feb 4, 2019
FP-Growth in Python
View fpgrowth.py
class FPTree():
def __init__(self, min_support=2, min_length=1, max_length=None):
self.min_support = min_support
self.min_length = min_length
self.max_length = max_length
# class for a tree node with a name, count, parent and children
# taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/
class treeNode:
def __init__(self, nameValue, numOccur, parentNode):
@escuccim
escuccim / gist:323a56730385be6b9b14b546c8c2eb56
Last active Nov 10, 2018
Python function to calculate blood levels of medication
View gist:323a56730385be6b9b14b546c8c2eb56
# dose: daily dose
# halflife: half-life in hours
# start_level: initial blood levels
# days: days to model
def blood_level(dose, halflife, start_level=0, days=200):
# starting level = 0
level_0 = start_level
# level after first dose = dose
level = dose + start_level
@escuccim
escuccim / tf_bbox_overlap_iou.py
Created Sep 13, 2018
Calculate Intersection over Union on two sets of bounding boxes using Tensorflow
View tf_bbox_overlap_iou.py
import tensorflow as tf
def bbox_overlap_iou(bboxes1, bboxes2):
"""
Args:
bboxes1: shape (total_bboxes1, 4)
with x1, y1, x2, y2 point order.
bboxes2: shape (total_bboxes2, 4)
with x1, y1, x2, y2 point order.
@escuccim
escuccim / gist:8e8f42e8600c14fede59a3a70e60ccee
Created Aug 9, 2018
Function to find duplicated columns in pandas dataframe
View gist:8e8f42e8600c14fede59a3a70e60ccee
def duplicate_columns(frame):
groups = frame.columns.to_series().groupby(frame.dtypes).groups
dups = []
for t, v in groups.items():
cs = frame[v].columns
vs = frame[v]
lcs = len(cs)
@escuccim
escuccim / gist:f358688d9412d4bd423275b11382d263
Created Aug 3, 2018
Code to see what features are not used by any models in sklearn VotingClassifier
View gist:f358688d9412d4bd423275b11382d263
# plot feature importance for sklearn decision trees
def feature_importance(forest, X_train, display_results=True):
ranked_list = []
zero_features = []
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
if display_results:
@escuccim
escuccim / mean_encode.py
Created Jun 7, 2018
mean encoding in python
View mean_encode.py
# mean encoding
means = X_tr.groupby(col).target.mean()
train_new[col+'_target_mean'] = train_new[col].map(means)
val_new[col+'_target_mean'] = val_new[col].map(means)
You can’t perform that action at this time.