Skip to content

Instantly share code, notes, and snippets.

View devforfu's full-sized avatar

Ilia devforfu

View GitHub Profile
@devforfu
devforfu / bing.py
Created October 18, 2018 04:01
Bing API usage example
from io import BytesIO
from pprint import pprint as pp
import requests
from PIL import Image
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
@devforfu
devforfu / embeddings_training_loop.py
Last active August 20, 2018 05:37
Embeddings training loop with PyTorch
# training loop parameters
lr = 1e-3
wd = 1e-5
bs = 2000
n_epochs = 100
patience = 10
no_improvements = 0
best_loss = np.inf
best_weights = None
history = []
@devforfu
devforfu / embeddings_network.py
Created August 16, 2018 06:39
Embeddings Network Class
class EmbeddingNet(nn.Module):
"""
Creates a dense network with embedding layers.
Args:
n_users:
Number of unique users in the dataset.
n_movies:
@devforfu
devforfu / final_model.py
Created July 3, 2018 09:17
The final model training
X, y = reader.dataset
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, train_size=0.8)
pipeline = make_pipeline(
BatchTransformer(StandardScaler()),
KMeansQuantization(k=2),
RandomForestClassifier(n_estimators=500))
pipeline.fit(X_train, y_train)
y_preds = pipeline.predict(X_test)
@devforfu
devforfu / pipeline_grid_search.py
Created July 2, 2018 10:36
Picking the best classifier
"""
Main repository: https://github.com/devforfu/Blog/tree/master/trees
"""
import os
from os.path import join
from pprint import pprint
import pandas as pd
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
@devforfu
devforfu / scikit_learn_accelerometer_pipeline.py
Last active June 29, 2018 04:59
The snippet for Medium post about K-Means and Decision Trees
"""
Wrist-Worn Accelerometer Dataset using scikit-learn.
"""
import re
from os import listdir
from os.path import exists, join, basename, isdir
import numpy as np
@devforfu
devforfu / accelerometer.py
Created June 27, 2018 09:38
Prediction snippet for blog post about decision trees
from os.path import join
import numpy as np
from sklearn.model_selection import train_test_split
from quantization import quantize
from decision_tree import learn_tree
from decision_tree import predict_tree
from ensemble import RandomForestClassifier
from utils import train_test_split, encode_labels
@devforfu
devforfu / random_forest.py
Created June 19, 2018 16:47
Random Forest aggregating several Decision Trees
class RandomForestClassifier:
def __init__(self, tree_funcs, n_trees: int=10,
feature_subset_size: str='sqrt', max_depth: int=5,
min_split_size: int=10, min_leaf_size: int=None,
log=None):
if n_trees < 1:
raise ValueError(f'cannot build an ensemble of {n_trees:d} trees')
@devforfu
devforfu / decision_tree_simple.py
Last active July 3, 2018 03:02
Simple decision tree implementation using Numpy library
"""
Simple decision tree implementation using Numpy.
"""
from collections import Counter
import numpy as np
class _Node:
"""
@devforfu
devforfu / kmeans_quantization.py
Created May 30, 2018 07:01
K-Means post quantization snippet
def quantize(dataset_path, k):
dataset = []
for points in read_files(dataset_path):
x = np.asarray(points, dtype=np.float)
centroids, _ = kmeans(x, n_clusters=k)
feature_vector = centroids.flatten()
dataset.append(feature_vector)
return np.array(dataset)