Skip to content

Instantly share code, notes, and snippets.

@jovianlin
jovianlin / my_standardscalar.py
Created December 24, 2016 08:02
StandardScaler implementation for standardizing data before model-training
def normalize(train, test):
mean, std = train.mean(), test.std()
train = (train - mean) / std
test = (test - mean) / std
return train, test
@jovianlin
jovianlin / plot_decision_boundary.py
Created December 22, 2016 14:32
Plot Decision Boundary
# Train the logistic rgeression classifier
clf = sklearn.linear_model.LogisticRegressionCV()
clf.fit(X, y)
# Plot the decision boundary
plot_decision_boundary(lambda x: clf.predict(x))
plt.title("Logistic Regression")
@jovianlin
jovianlin / clustering_cosine_similarity_matrix.py
Last active December 21, 2020 07:53
Clustering cosine similarity matrix
"""
### Problem Statement ###
Let's say you have a square matrix which consists of cosine similarities (values between 0 and 1).
This square matrix can be of any size.
You want to get clusters which maximize the values between elemnts in the cluster.
For example, for the following matrix:
| A | B | C | D
A | 1.0 | 0.1 | 0.6 | 0.4
B | 0.1 | 1.0 | 0.1 | 0.2
@jovianlin
jovianlin / fix_encoding.py
Created December 8, 2016 07:44
Quickfix for encoding errors
def fix_encoding(some_str):
return ''.join([c for c in some_str if 0x20 <= ord(c) <= 0x78])
@jovianlin
jovianlin / anaconda_tensorflow.txt
Last active December 7, 2016 13:54
Anaconda & Tensorflow
# ===================================================================================
# Many thanks to:
# https://uoa-eresearch.github.io/eresearch-cookbook/recipe/2014/11/20/conda/
#
# More info:
# https://www.continuum.io/blog/developer-blog/python-packages-and-environments-conda
# https://conda-forge.github.io/#about
# ===================================================================================
# conda info --env
@jovianlin
jovianlin / fibonacci.py
Created November 30, 2016 16:07
fibonacci with LRU cache for memoization
from functools import lru_cache
@lru_cache(maxsize=100)
def fibonacci(n):
# Check that the input is a positive integer
if type(n) != int:
raise TypeError("n must be a positive int")
if n < 1:
raise ValueError("n must be a positive int")
@jovianlin
jovianlin / foo.py
Created November 29, 2016 02:07
Convert "aaaabbbbbcccccde" to "a4b5c5d1e1"
def foo(s):
if len(s) <= 0:
return None
else:
output, curr_char, curr_count = '', '', 0
for idx in range(0, len(s)):
if s[idx] == curr_char:
curr_count += 1
else:
output += curr_char + str(curr_count) if curr_count > 0 else curr_char
@jovianlin
jovianlin / mini_batch_learning.py
Created November 26, 2016 07:22
Mini Batch Learning with SGD
from sklearn.linear_model import SGDRegressor
# https://adventuresindatascience.wordpress.com/2014/12/30/minibatch-learning-for-large-scale-data-using-scikit-learn/
def iter_minibatches(chunksize, numtrainingpoints):
# Provide chunks one by one
chunkstartmarker = 0
while chunkstartmarker < numtrainingpoints:
chunkrows = range(chunkstartmarker,chunkstartmarker+chunksize)
X_chunk, y_chunk = getrows(chunkrows)
@jovianlin
jovianlin / TargetedSentimentAnalysis.py
Created November 22, 2016 08:35
Targeted Sentiment Analysis
# -*- coding: utf-8 -*-
from afinn import Afinn
import spacy
import re
class TargetedSentimentAnalysis(object):
def __init__(self):
self.afinn = Afinn(emoticons=True)
@jovianlin
jovianlin / test_argparse.py
Created November 22, 2016 02:03
Python ArgParse
import argparse
import sys
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--x', type=float, default=1.0,
help='What is the first number?')
parser.add_argument('--y', type=float, default=1.0,
help='What is the second number?')
parser.add_argument('--operation', type=str, default='add',