Eric A. Scuccimarra escuccim

## purity.py
import numpy as np
from scipy import stats

## Cluster purity
def purity(truth, pred):
    cluster_purities = []
    # loop through clusters and calculate purity for each
    for pred_cluster in np.unique(pred):
        filter_ = pred == pred_cluster
        gt_partition = truth[filter_]

## pdict.py
class PDict:
    def __init__(self, **kwargs):
        for key, value in kwargs.items():
            self.__dict__[key] = value

    def __getitem__(self, key):
        return self.__dict__.get(key)

    def __setitem__(self, key, value):
        self.__dict__[key] = value

## datasets.py
"""Based on https://github.com/CaoWGG/multi-scale-training"""

from torch.utils.data import Sampler,RandomSampler,SequentialSampler
import numpy as np

class BatchSampler(object):
    def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None):
        if not isinstance(sampler, Sampler):
            raise ValueError("sampler should be an instance of "
                             "torch.utils.data.Sampler, but got sampler={}"

## vecs_from_vals.py
# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues
# from https://arxiv.org/pdf/1908.03795.pdf
import numpy as np

# use numpy to calculate the eigen values
e_vals = np.linalg.eigvals(mat)

eigen_vectors = np.zeros_like(mat)

n, _ = mat.shape

## PatternFusion.py
# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a
# to mine the initial set of candidates for the pool

import numpy as np
import pandas as pd
import random
from itertools import combinations

class PatternFusion():
    def __init__(self, min_support=10):

## fpgrowth.py
class FPTree():
    def __init__(self, min_support=2, min_length=1, max_length=None):
        self.min_support = min_support
        self.min_length = min_length
        self.max_length = max_length

    # class for a tree node with a name, count, parent and children
    # taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/
    class treeNode:
        def __init__(self, nameValue, numOccur, parentNode):

## gist:3ad7f9b4b538384602ef6df0462564d8
# DB is dictionary with tuple of pattern as key and support as value
# P1 and P2 are lists or arrays of the keys to compare
# this will only work if one of P1 and P2 is a subset of the other
def pattern_distance(P1, P2, db):
    P1_support = db[tuple(P1)]
    P2_support = db[tuple(P2)]

    # if P1 is a subset of P2 then we just use their supports
    if set(P1).issubset(set(P2)):
        return 1 - (P2_support / P1_support)

## gist:323a56730385be6b9b14b546c8c2eb56
# dose: daily dose
# halflife: half-life in hours
# start_level: initial blood levels
# days: days to model
def blood_level(dose, halflife, start_level=0, days=200):
    # starting level = 0
    level_0 = start_level
    # level after first dose = dose
    level = dose + start_level


## tf_bbox_overlap_iou.py
import tensorflow as tf


def bbox_overlap_iou(bboxes1, bboxes2):
    """
    Args:
        bboxes1: shape (total_bboxes1, 4)
            with x1, y1, x2, y2 point order.
        bboxes2: shape (total_bboxes2, 4)
            with x1, y1, x2, y2 point order.

## gist:8e8f42e8600c14fede59a3a70e60ccee
def duplicate_columns(frame):
    groups = frame.columns.to_series().groupby(frame.dtypes).groups
    dups = []

    for t, v in groups.items():

        cs = frame[v].columns
        vs = frame[v]
        lcs = len(cs)
	import numpy as np
	from scipy import stats

	## Cluster purity
	def purity(truth, pred):
	cluster_purities = []
	# loop through clusters and calculate purity for each
	for pred_cluster in np.unique(pred):
	filter_ = pred == pred_cluster
	gt_partition = truth[filter_]
	class PDict:
	def __init__(self, **kwargs):
	for key, value in kwargs.items():
	self.__dict__[key] = value

	def __getitem__(self, key):
	return self.__dict__.get(key)

	def __setitem__(self, key, value):
	self.__dict__[key] = value
	"""Based on https://github.com/CaoWGG/multi-scale-training"""

	from torch.utils.data import Sampler,RandomSampler,SequentialSampler
	import numpy as np

	class BatchSampler(object):
	def __init__(self, sampler, batch_size, drop_last,multiscale_step=None,img_sizes = None):
	if not isinstance(sampler, Sampler):
	raise ValueError("sampler should be an instance of "
	"torch.utils.data.Sampler, but got sampler={}"
	# To calculate eigenvectors of a Hermitian matrix using nothing but the eigenvalues
	# from https://arxiv.org/pdf/1908.03795.pdf
	import numpy as np

	# use numpy to calculate the eigen values
	e_vals = np.linalg.eigvals(mat)

	eigen_vectors = np.zeros_like(mat)

	n, _ = mat.shape
	# depends on my FPTree algorithm - https://gist.github.com/escuccim/dc9aa979d54af69234f508538af7ee1a
	# to mine the initial set of candidates for the pool

	import numpy as np
	import pandas as pd
	import random
	from itertools import combinations

	class PatternFusion():
	def __init__(self, min_support=10):
	class FPTree():
	def __init__(self, min_support=2, min_length=1, max_length=None):
	self.min_support = min_support
	self.min_length = min_length
	self.max_length = max_length

	# class for a tree node with a name, count, parent and children
	# taken from : https://adataanalyst.com/machine-learning/fp-growth-algorithm-python-3/
	class treeNode:
	def __init__(self, nameValue, numOccur, parentNode):
	# DB is dictionary with tuple of pattern as key and support as value
	# P1 and P2 are lists or arrays of the keys to compare
	# this will only work if one of P1 and P2 is a subset of the other
	def pattern_distance(P1, P2, db):
	P1_support = db[tuple(P1)]
	P2_support = db[tuple(P2)]

	# if P1 is a subset of P2 then we just use their supports
	if set(P1).issubset(set(P2)):
	return 1 - (P2_support / P1_support)
	# dose: daily dose
	# halflife: half-life in hours
	# start_level: initial blood levels
	# days: days to model
	def blood_level(dose, halflife, start_level=0, days=200):
	# starting level = 0
	level_0 = start_level
	# level after first dose = dose
	level = dose + start_level
	import tensorflow as tf


	def bbox_overlap_iou(bboxes1, bboxes2):
	"""
	Args:
	bboxes1: shape (total_bboxes1, 4)
	with x1, y1, x2, y2 point order.
	bboxes2: shape (total_bboxes2, 4)
	with x1, y1, x2, y2 point order.
	def duplicate_columns(frame):
	groups = frame.columns.to_series().groupby(frame.dtypes).groups
	dups = []

	for t, v in groups.items():

	cs = frame[v].columns
	vs = frame[v]
	lcs = len(cs)