Skip to content

Instantly share code, notes, and snippets.

View vaibkumr's full-sized avatar
🌊
Gradient wave off kanagawa

Vaibhav Kumar vaibkumr

🌊
Gradient wave off kanagawa
View GitHub Profile
@vaibkumr
vaibkumr / date.py
Created May 28, 2018 12:11
generating date in python
import arrow
date=arrow.now().format('YYYY-MM-DD')
print(x)
@vaibkumr
vaibkumr / style.css
Created May 28, 2018 13:08
style of this website
body{
background-color: #262626;
}
h3,h2{
font-family: 'Lato', sans-serif;
font-weight: 300;
color: pink;
}
hr{
border-top: 1px dashed #8c8b8b;
sudo telinit 0
class RandomForest():
def __init__(self, x, y, n_trees, sample_sz, min_leaf=5, depth = 10):
np.random.seed(42)
self.x, self.y, self.sample_sz, self.min_leaf, self.depth = x, y, sample_sz, min_leaf, depth
self.trees = [self.create_tree() for i in range(n_trees)]
def create_tree(self):
rnd_idxs = np.random.permutation(len(self.y))[:self.sample_sz] #bagging
return DecisionTree(self.x.iloc[rnd_idxs], self.y[rnd_idxs], min_leaf=self.min_leaf, depth = 10)
class DecisionTree():
def __init__(self, x, y, idxs=None, min_leaf=5, depth = 10):
if idxs is None: idxs=np.arange(len(y)) #bagging with all the rows
self.x, self.y, self.idxs, self.min_leaf, self.depth = x, y, idxs, min_leaf, depth
self.n, self.c = len(idxs), x.shape[1]
self.val = np.mean(y[idxs])
self.score = float('inf')
self.find_varsplit()
# This just does one decision; we'll make it recursive later
class DecisionTree():
def __init__(self, x, y, idxs=None, min_leaf=5, depth = 10):
if idxs is None: idxs=np.arange(len(y)) #bagging with all the rows
self.x, self.y, self.idxs, self.min_leaf, self.depth = x, y, idxs, min_leaf, depth
self.n, self.c = len(idxs), x.shape[1]
self.val = np.mean(y[idxs])
self.score = float('inf')
self.find_varsplit()
# For simplicity it does a single split, make it recursive later
class DecisionTree():
def __init__(self, x, y, idxs=None, min_leaf=5, depth = 10):
if idxs is None: idxs=np.arange(len(y)) #bagging with all the rows
self.x, self.y, self.idxs, self.min_leaf, self.depth = x, y, idxs, min_leaf, depth
self.n, self.c = len(idxs), x.shape[1]
self.val = np.mean(y[idxs])
self.score = float('inf')
self.find_varsplit()
# For simplicity it does a single split, make it recursive later
class RandomForest():
def __init__(self, x, y, n_trees, n_features, sample_sz, depth=10, min_leaf=5):
np.random.seed(12)
if n_features == 'sqrt':
self.n_features = int(np.sqrt(x.shape[1]))
elif n_features == 'log2':
self.n_features = int(np.log2(x.shape[1]))
else:
self.n_features = n_features
print(self.n_features, "sha: ",x.shape[1])
class DecisionTree():
def __init__(self, x, y, n_features, f_idxs,idxs,depth=10, min_leaf=5):
self.x, self.y, self.idxs, self.min_leaf, self.f_idxs = x, y, idxs, min_leaf, f_idxs
self.depth = depth
self.n_features = n_features
self.n, self.c = len(idxs), x.shape[1]
self.val = np.mean(y[idxs])
self.score = float('inf')
self.find_varsplit()
def find_better_split(self, var_idx):
x, y = self.x.values[self.idxs,var_idx], self.y[self.idxs]
sort_idx = np.argsort(x)
sort_y,sort_x = y[sort_idx], x[sort_idx]
rhs_cnt,rhs_sum,rhs_sum2 = self.n, sort_y.sum(), (sort_y**2).sum()
lhs_cnt,lhs_sum,lhs_sum2 = 0,0.,0.
for i in range(0,self.n-self.min_leaf-1):
xi,yi = sort_x[i],sort_y[i]
lhs_cnt += 1; rhs_cnt -= 1