{{ message }}

Instantly share code, notes, and snippets.

# Eric A. Scuccimarraescuccim

Last active Nov 17, 2019
eigenvectors from eigenvalues
View vecs_from_vals.py
 # To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues # from https://arxiv.org/pdf/1908.03795.pdf import numpy as np # use numpy to calculate the eigen values e_vals = np.linalg.eigvals(mat) eigen_vectors = np.zeros_like(mat) n, _ = mat.shape
Created Feb 26, 2019
Cluster Purity
View purity.py
 import numpy as np from scipy import stats ## Cluster purity def purity(truth, pred): cluster_purities = [] # loop through clusters and calculate purity for each for pred_cluster in np.unique(pred): filter_ = pred == pred_cluster gt_partition = truth[filter_]
Last active Feb 8, 2019
PatternFusion
View PatternFusion.py
 # depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a # to mine the initial set of candidates for the pool import numpy as np import pandas as pd import random from itertools import combinations class PatternFusion(): def __init__(self, min_support=10):
Last active Feb 3, 2019
PatternDistance
 # DB is dictionary with tuple of pattern as key and support as value # P1 and P2 are lists or arrays of the keys to compare # this will only work if one of P1 and P2 is a subset of the other def pattern_distance(P1, P2, db): P1_support = db[tuple(P1)] P2_support = db[tuple(P2)] # if P1 is a subset of P2 then we just use their supports if set(P1).issubset(set(P2)): return 1 - (P2_support / P1_support)
Last active Feb 4, 2019
FP-Growth in Python
View fpgrowth.py
 class FPTree(): def __init__(self, min_support=2, min_length=1, max_length=None): self.min_support = min_support self.min_length = min_length self.max_length = max_length # class for a tree node with a name, count, parent and children # taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/ class treeNode: def __init__(self, nameValue, numOccur, parentNode):
Last active Nov 10, 2018
Python function to calculate blood levels of medication
View gist:323a56730385be6b9b14b546c8c2eb56
 # dose: daily dose # halflife: half-life in hours # start_level: initial blood levels # days: days to model def blood_level(dose, halflife, start_level=0, days=200): # starting level = 0 level_0 = start_level # level after first dose = dose level = dose + start_level
Created Sep 13, 2018
Calculate Intersection over Union on two sets of bounding boxes using Tensorflow
View tf_bbox_overlap_iou.py
 import tensorflow as tf def bbox_overlap_iou(bboxes1, bboxes2): """ Args: bboxes1: shape (total_bboxes1, 4) with x1, y1, x2, y2 point order. bboxes2: shape (total_bboxes2, 4) with x1, y1, x2, y2 point order.
Created Aug 9, 2018
Function to find duplicated columns in pandas dataframe
View gist:8e8f42e8600c14fede59a3a70e60ccee
 def duplicate_columns(frame): groups = frame.columns.to_series().groupby(frame.dtypes).groups dups = [] for t, v in groups.items(): cs = frame[v].columns vs = frame[v] lcs = len(cs)
Created Aug 3, 2018
Code to see what features are not used by any models in sklearn VotingClassifier
View gist:f358688d9412d4bd423275b11382d263
 # plot feature importance for sklearn decision trees def feature_importance(forest, X_train, display_results=True): ranked_list = [] zero_features = [] importances = forest.feature_importances_ indices = np.argsort(importances)[::-1] if display_results:
Created Jun 7, 2018
mean encoding in python
View mean_encode.py
 # mean encoding means = X_tr.groupby(col).target.mean() train_new[col+'_target_mean'] = train_new[col].map(means) val_new[col+'_target_mean'] = val_new[col].map(means)
You can’t perform that action at this time.