Skip to content

Instantly share code, notes, and snippets.

View devforfu's full-sized avatar

Ilia devforfu

View GitHub Profile
@devforfu
devforfu / tf_oop_build.py
Last active April 18, 2018 05:22
DNNClassifier.build()
def build(self, graph=None, optimizer=None):
"""
Creates model graph and groups important variables into named collections.
"""
if graph is None:
graph = tf.Graph()
with graph.as_default():
inputs = create_inputs()
logits = build_model(
inputs=inputs.x,
@devforfu
devforfu / tf_oop_fit_generator.py
Created April 18, 2018 05:47
DNNClassifier.fit_generator()
def fit_generator(self, generator, epochs, batches_per_epoch,
validation_data=None, callbacks=None):
"""
Fits model with generator yielding batches of (x, y) pairs.
The generator is expected to indefinitely generate samples from
training set. Therefore, we need a "hint" how much times we would like
to call generator's `next()` method during single epoch. For this
purpose `batches_per_epoch` parameter is used.
"""
@devforfu
devforfu / sgd.py
Created May 15, 2018 13:49
Simple implementation of MNIST dataset classifier using TensforFlow and SGD
"""
Simple implementation of MNIST dataset classifier using TensforFlow and SGD.
"""
from os.path import expanduser
import numpy as np
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
@devforfu
devforfu / kmeans_helpers.py
Created May 26, 2018 13:43
Calculating random centroids and inertia score
def normalize_dataset(X):
"""
Re-scales dataset to mean=0 and std=1.
"""
X -= np.mean(X, axis=0)
X /= np.clip(np.std(X, axis=0), 10e-6, np.inf)
return X
def generate_random_centroids(n_features, n_clusters):
@devforfu
devforfu / kmeans_quantization.py
Created May 30, 2018 07:01
K-Means post quantization snippet
def quantize(dataset_path, k):
dataset = []
for points in read_files(dataset_path):
x = np.asarray(points, dtype=np.float)
centroids, _ = kmeans(x, n_clusters=k)
feature_vector = centroids.flatten()
dataset.append(feature_vector)
return np.array(dataset)
@devforfu
devforfu / kmeans_pseudocode.py
Last active June 3, 2018 08:07
K-Means clustering pseudocode
def kmeans(dataset, k=5, num_of_restarts=10, max_iterations=300):
"""
Runs K-Means clustering on dataset and returns the best
centroids assignemnt with its inertia score.
"""
n_features = dataset.n_features
norm_dataset = normalize_dataset(dataset)
best_score = -np.inf
best_centroids = None
@devforfu
devforfu / random_forest.py
Created June 19, 2018 16:47
Random Forest aggregating several Decision Trees
class RandomForestClassifier:
def __init__(self, tree_funcs, n_trees: int=10,
feature_subset_size: str='sqrt', max_depth: int=5,
min_split_size: int=10, min_leaf_size: int=None,
log=None):
if n_trees < 1:
raise ValueError(f'cannot build an ensemble of {n_trees:d} trees')
@devforfu
devforfu / accelerometer.py
Created June 27, 2018 09:38
Prediction snippet for blog post about decision trees
from os.path import join
import numpy as np
from sklearn.model_selection import train_test_split
from quantization import quantize
from decision_tree import learn_tree
from decision_tree import predict_tree
from ensemble import RandomForestClassifier
from utils import train_test_split, encode_labels
@devforfu
devforfu / scikit_learn_accelerometer_pipeline.py
Last active June 29, 2018 04:59
The snippet for Medium post about K-Means and Decision Trees
"""
Wrist-Worn Accelerometer Dataset using scikit-learn.
"""
import re
from os import listdir
from os.path import exists, join, basename, isdir
import numpy as np
@devforfu
devforfu / pipeline_grid_search.py
Created July 2, 2018 10:36
Picking the best classifier
"""
Main repository: https://github.com/devforfu/Blog/tree/master/trees
"""
import os
from os.path import join
from pprint import pprint
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler