{{ message }}

Instantly share code, notes, and snippets.

# Eric A. Scuccimarra escuccim

Created Dec 17, 2020
Multi-scale training for PyTorch ImageFolder dataset
View datasets.py
 """Based on https://github.com/CaoWGG/multi-scale-training""" from torch.utils.data import Sampler,RandomSampler,SequentialSampler import numpy as np class BatchSampler(object): def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None): if not isinstance(sampler, Sampler): raise ValueError("sampler should be an instance of " "torch.utils.data.Sampler, but got sampler={}"
Last active Nov 17, 2019
eigenvectors from eigenvalues
View vecs_from_vals.py
 # To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues # from https://arxiv.org/pdf/1908.03795.pdf import numpy as np # use numpy to calculate the eigen values e_vals = np.linalg.eigvals(mat) eigen_vectors = np.zeros_like(mat) n, _ = mat.shape
Created Feb 26, 2019
Cluster Purity
View purity.py
 import numpy as np from scipy import stats ## Cluster purity def purity(truth, pred): cluster_purities = [] # loop through clusters and calculate purity for each for pred_cluster in np.unique(pred): filter_ = pred == pred_cluster gt_partition = truth[filter_]
Last active Feb 8, 2019
PatternFusion
View PatternFusion.py
 # depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a # to mine the initial set of candidates for the pool import numpy as np import pandas as pd import random from itertools import combinations class PatternFusion(): def __init__(self, min_support=10):
Last active Feb 3, 2019
PatternDistance
 # DB is dictionary with tuple of pattern as key and support as value # P1 and P2 are lists or arrays of the keys to compare # this will only work if one of P1 and P2 is a subset of the other def pattern_distance(P1, P2, db): P1_support = db[tuple(P1)] P2_support = db[tuple(P2)] # if P1 is a subset of P2 then we just use their supports if set(P1).issubset(set(P2)): return 1 - (P2_support / P1_support)
Last active Feb 4, 2019
FP-Growth in Python
View fpgrowth.py
 class FPTree(): def __init__(self, min_support=2, min_length=1, max_length=None): self.min_support = min_support self.min_length = min_length self.max_length = max_length # class for a tree node with a name, count, parent and children # taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/ class treeNode: def __init__(self, nameValue, numOccur, parentNode):
Last active Nov 10, 2018
Python function to calculate blood levels of medication
View gist:323a56730385be6b9b14b546c8c2eb56
 # dose: daily dose # halflife: half-life in hours # start_level: initial blood levels # days: days to model def blood_level(dose, halflife, start_level=0, days=200): # starting level = 0 level_0 = start_level # level after first dose = dose level = dose + start_level
Created Sep 13, 2018
Calculate Intersection over Union on two sets of bounding boxes using Tensorflow
View tf_bbox_overlap_iou.py
 import tensorflow as tf def bbox_overlap_iou(bboxes1, bboxes2): """ Args: bboxes1: shape (total_bboxes1, 4) with x1, y1, x2, y2 point order. bboxes2: shape (total_bboxes2, 4) with x1, y1, x2, y2 point order.
Created Aug 9, 2018
Function to find duplicated columns in pandas dataframe
View gist:8e8f42e8600c14fede59a3a70e60ccee
 def duplicate_columns(frame): groups = frame.columns.to_series().groupby(frame.dtypes).groups dups = [] for t, v in groups.items(): cs = frame[v].columns vs = frame[v] lcs = len(cs)
Created Aug 3, 2018
Code to see what features are not used by any models in sklearn VotingClassifier
View gist:f358688d9412d4bd423275b11382d263
 # plot feature importance for sklearn decision trees def feature_importance(forest, X_train, display_results=True): ranked_list = [] zero_features = [] importances = forest.feature_importances_ indices = np.argsort(importances)[::-1] if display_results: