View datasets.py
"""Based on https://github.com/CaoWGG/multi-scale-training""" | |
from torch.utils.data import Sampler,RandomSampler,SequentialSampler | |
import numpy as np | |
class BatchSampler(object): | |
def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None): | |
if not isinstance(sampler, Sampler): | |
raise ValueError("sampler should be an instance of " | |
"torch.utils.data.Sampler, but got sampler={}" |
View vecs_from_vals.py
# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues | |
# from https://arxiv.org/pdf/1908.03795.pdf | |
import numpy as np | |
# use numpy to calculate the eigen values | |
e_vals = np.linalg.eigvals(mat) | |
eigen_vectors = np.zeros_like(mat) | |
n, _ = mat.shape |
View purity.py
import numpy as np | |
from scipy import stats | |
## Cluster purity | |
def purity(truth, pred): | |
cluster_purities = [] | |
# loop through clusters and calculate purity for each | |
for pred_cluster in np.unique(pred): | |
filter_ = pred == pred_cluster | |
gt_partition = truth[filter_] |
View PatternFusion.py
# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a | |
# to mine the initial set of candidates for the pool | |
import numpy as np | |
import pandas as pd | |
import random | |
from itertools import combinations | |
class PatternFusion(): | |
def __init__(self, min_support=10): |
View gist:3ad7f9b4b538384602ef6df0462564d8
# DB is dictionary with tuple of pattern as key and support as value | |
# P1 and P2 are lists or arrays of the keys to compare | |
# this will only work if one of P1 and P2 is a subset of the other | |
def pattern_distance(P1, P2, db): | |
P1_support = db[tuple(P1)] | |
P2_support = db[tuple(P2)] | |
# if P1 is a subset of P2 then we just use their supports | |
if set(P1).issubset(set(P2)): | |
return 1 - (P2_support / P1_support) |
View fpgrowth.py
class FPTree(): | |
def __init__(self, min_support=2, min_length=1, max_length=None): | |
self.min_support = min_support | |
self.min_length = min_length | |
self.max_length = max_length | |
# class for a tree node with a name, count, parent and children | |
# taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/ | |
class treeNode: | |
def __init__(self, nameValue, numOccur, parentNode): |
View gist:323a56730385be6b9b14b546c8c2eb56
# dose: daily dose | |
# halflife: half-life in hours | |
# start_level: initial blood levels | |
# days: days to model | |
def blood_level(dose, halflife, start_level=0, days=200): | |
# starting level = 0 | |
level_0 = start_level | |
# level after first dose = dose | |
level = dose + start_level | |
View tf_bbox_overlap_iou.py
import tensorflow as tf | |
def bbox_overlap_iou(bboxes1, bboxes2): | |
""" | |
Args: | |
bboxes1: shape (total_bboxes1, 4) | |
with x1, y1, x2, y2 point order. | |
bboxes2: shape (total_bboxes2, 4) | |
with x1, y1, x2, y2 point order. |
View gist:8e8f42e8600c14fede59a3a70e60ccee
def duplicate_columns(frame): | |
groups = frame.columns.to_series().groupby(frame.dtypes).groups | |
dups = [] | |
for t, v in groups.items(): | |
cs = frame[v].columns | |
vs = frame[v] | |
lcs = len(cs) |
View gist:f358688d9412d4bd423275b11382d263
# plot feature importance for sklearn decision trees | |
def feature_importance(forest, X_train, display_results=True): | |
ranked_list = [] | |
zero_features = [] | |
importances = forest.feature_importances_ | |
indices = np.argsort(importances)[::-1] | |
if display_results: |
NewerOlder