Skip to content

Instantly share code, notes, and snippets.

View escuccim's full-sized avatar

Eric A. Scuccimarra escuccim

View GitHub Profile
@escuccim
escuccim / gist:f358688d9412d4bd423275b11382d263
Created August 3, 2018 09:41
Code to see what features are not used by any models in sklearn VotingClassifier
# plot feature importance for sklearn decision trees
def feature_importance(forest, X_train, display_results=True):
ranked_list = []
zero_features = []
importances = forest.feature_importances_
indices = np.argsort(importances)[::-1]
if display_results:
@escuccim
escuccim / gist:8e8f42e8600c14fede59a3a70e60ccee
Created August 9, 2018 16:05
Function to find duplicated columns in pandas dataframe
def duplicate_columns(frame):
groups = frame.columns.to_series().groupby(frame.dtypes).groups
dups = []
for t, v in groups.items():
cs = frame[v].columns
vs = frame[v]
lcs = len(cs)
@escuccim
escuccim / tf_bbox_overlap_iou.py
Created September 13, 2018 10:24
Calculate Intersection over Union on two sets of bounding boxes using Tensorflow
import tensorflow as tf
def bbox_overlap_iou(bboxes1, bboxes2):
"""
Args:
bboxes1: shape (total_bboxes1, 4)
with x1, y1, x2, y2 point order.
bboxes2: shape (total_bboxes2, 4)
with x1, y1, x2, y2 point order.
@escuccim
escuccim / gist:323a56730385be6b9b14b546c8c2eb56
Last active November 10, 2018 16:43
Python function to calculate blood levels of medication
# dose: daily dose
# halflife: half-life in hours
# start_level: initial blood levels
# days: days to model
def blood_level(dose, halflife, start_level=0, days=200):
# starting level = 0
level_0 = start_level
# level after first dose = dose
level = dose + start_level
@escuccim
escuccim / fpgrowth.py
Last active February 4, 2019 08:11
FP-Growth in Python
class FPTree():
def __init__(self, min_support=2, min_length=1, max_length=None):
self.min_support = min_support
self.min_length = min_length
self.max_length = max_length
# class for a tree node with a name, count, parent and children
# taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/
class treeNode:
def __init__(self, nameValue, numOccur, parentNode):
# DB is dictionary with tuple of pattern as key and support as value
# P1 and P2 are lists or arrays of the keys to compare
# this will only work if one of P1 and P2 is a subset of the other
def pattern_distance(P1, P2, db):
P1_support = db[tuple(P1)]
P2_support = db[tuple(P2)]
# if P1 is a subset of P2 then we just use their supports
if set(P1).issubset(set(P2)):
return 1 - (P2_support / P1_support)
@escuccim
escuccim / PatternFusion.py
Last active February 8, 2019 10:35
PatternFusion
# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a
# to mine the initial set of candidates for the pool
import numpy as np
import pandas as pd
import random
from itertools import combinations
class PatternFusion():
def __init__(self, min_support=10):
@escuccim
escuccim / purity.py
Created February 26, 2019 07:52
Cluster Purity
import numpy as np
from scipy import stats
## Cluster purity
def purity(truth, pred):
cluster_purities = []
# loop through clusters and calculate purity for each
for pred_cluster in np.unique(pred):
filter_ = pred == pred_cluster
gt_partition = truth[filter_]
@escuccim
escuccim / vecs_from_vals.py
Last active November 17, 2019 08:21
eigenvectors from eigenvalues
# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues
# from https://arxiv.org/pdf/1908.03795.pdf
import numpy as np
# use numpy to calculate the eigen values
e_vals = np.linalg.eigvals(mat)
eigen_vectors = np.zeros_like(mat)
n, _ = mat.shape
@escuccim
escuccim / datasets.py
Created December 17, 2020 09:34
Multi-scale training for PyTorch ImageFolder dataset
"""Based on https://github.com/CaoWGG/multi-scale-training"""
from torch.utils.data import Sampler,RandomSampler,SequentialSampler
import numpy as np
class BatchSampler(object):
def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None):
if not isinstance(sampler, Sampler):
raise ValueError("sampler should be an instance of "
"torch.utils.data.Sampler, but got sampler={}"