Skip to content

Instantly share code, notes, and snippets.

Eric A. Scuccimarra escuccim

Block or report user

Report or block escuccim

Hide content and notifications from this user.

Learn more about blocking users

Contact Support about this user’s behavior.

Learn more about reporting abuse

Report abuse
View GitHub Profile
@escuccim
escuccim / vecs_from_vals.py
Last active Nov 17, 2019
eigenvectors from eigenvalues
View vecs_from_vals.py
# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues
# from https://arxiv.org/pdf/1908.03795.pdf
import numpy as np
# use numpy to calculate the eigen values
e_vals = np.linalg.eigvals(mat)
eigen_vectors = np.zeros_like(mat)
n, _ = mat.shape
@escuccim
escuccim / purity.py
Created Feb 26, 2019
Cluster Purity
View purity.py
import numpy as np
from scipy import stats
## Cluster purity
def purity(truth, pred):
cluster_purities = []
# loop through clusters and calculate purity for each
for pred_cluster in np.unique(pred):
filter_ = pred == pred_cluster
gt_partition = truth[filter_]
View PatternFusion.py
# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a
# to mine the initial set of candidates for the pool
import numpy as np
import pandas as pd
import random
from itertools import combinations
class PatternFusion():
def __init__(self, min_support=10):
@escuccim
escuccim / fpgrowth.py
Last active Feb 4, 2019
FP-Growth in Python
View fpgrowth.py
class FPTree():
def __init__(self, min_support=2, min_length=1, max_length=None):
self.min_support = min_support
self.min_length = min_length
self.max_length = max_length
# class for a tree node with a name, count, parent and children
# taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/
class treeNode:
def __init__(self, nameValue, numOccur, parentNode):
View gist:3ad7f9b4b538384602ef6df0462564d8
# DB is dictionary with tuple of pattern as key and support as value
# P1 and P2 are lists or arrays of the keys to compare
# this will only work if one of P1 and P2 is a subset of the other
def pattern_distance(P1, P2, db):
P1_support = db[tuple(P1)]
P2_support = db[tuple(P2)]
# if P1 is a subset of P2 then we just use their supports
if set(P1).issubset(set(P2)):
return 1 - (P2_support / P1_support)
@escuccim
escuccim / gist:323a56730385be6b9b14b546c8c2eb56
Last active Nov 10, 2018
Python function to calculate blood levels of medication
View gist:323a56730385be6b9b14b546c8c2eb56
# dose: daily dose
# halflife: half-life in hours
# start_level: initial blood levels
# days: days to model
def blood_level(dose, halflife, start_level=0, days=200):
# starting level = 0
level_0 = start_level
# level after first dose = dose
level = dose + start_level
@escuccim
escuccim / tf_bbox_overlap_iou.py
Created Sep 13, 2018
Calculate Intersection over Union on two sets of bounding boxes using Tensorflow
View tf_bbox_overlap_iou.py
import tensorflow as tf
def bbox_overlap_iou(bboxes1, bboxes2):
"""
Args:
bboxes1: shape (total_bboxes1, 4)
with x1, y1, x2, y2 point order.
bboxes2: shape (total_bboxes2, 4)
with x1, y1, x2, y2 point order.
@escuccim
escuccim / gist:8e8f42e8600c14fede59a3a70e60ccee
Created Aug 9, 2018
Function to find duplicated columns in pandas dataframe
View gist:8e8f42e8600c14fede59a3a70e60ccee
def duplicate_columns(frame):
groups = frame.columns.to_series().groupby(frame.dtypes).groups
dups = []
for t, v in groups.items():
cs = frame[v].columns
vs = frame[v]
lcs = len(cs)
@escuccim
escuccim / gist:f358688d9412d4bd423275b11382d263
Created Aug 3, 2018
Code to see what features are not used by any models in sklearn VotingClassifier
View gist:f358688d9412d4bd423275b11382d263
# plot feature importance for sklearn decision trees
def feature_importance(forest, X_train, display_results=True):
ranked_list = []
zero_features = []
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
if display_results:
@escuccim
escuccim / mean_encode.py
Created Jun 7, 2018
mean encoding in python
View mean_encode.py
# mean encoding
means = X_tr.groupby(col).target.mean()
train_new[col+'_target_mean'] = train_new[col].map(means)
val_new[col+'_target_mean'] = val_new[col].map(means)
You can’t perform that action at this time.