Skip to content

Instantly share code, notes, and snippets.

@jovianlin
jovianlin / track_diskspace.sh
Created November 14, 2016 02:43
Tracking down where disk space has gone in Linux (http://unix.stackexchange.com/a/125433)
du -h <dir> | grep '[0-9\.]\+G'
# e.g.:
# du -h /home/peterparker | grep '[0-9\.]\+G'
@jovianlin
jovianlin / test_multiprocessing_pool.py
Created November 16, 2016 02:23
Test Multiprocessing Pool
from multiprocessing import Pool
def job(x):
return x ** 2
# End of job(...).
if __name__ == "__main__":
p = Pool(processes=50)
data_1 = p.map(job, range(10))
data_2 = p.map(job, [99, 111, 7236])
@jovianlin
jovianlin / test_bipartite.py
Created November 16, 2016 16:14
Bipartite Graphs in NetworkX
import networkx as nx
from networkx.algorithms import bipartite
B = nx.Graph()
B.add_nodes_from([1,2,3,4], bipartite=0) # Add the node attribute "bipartite"
B.add_nodes_from(['a', 'b', 'c', 'd'], bipartite=1)
B.add_edges_from([(1,'a'), (1,'b'), (2,'b'), (2,'c'), (3,'c'), (4,'a')])
print 'Is connected? %s' % nx.is_connected(B)
@jovianlin
jovianlin / get_emojis_from_text.py
Last active November 22, 2016 10:48
Get Emojis from Text
import re
# Returns all the emoji in this string. 'text' is a unicode string.
def get_emoji(text):
text = unicode(text, 'ignore')
try:
ranges = re.compile(u'([\U00002600-\U000027BF])|([\U0001f300-\U0001f64F])|([\U0001f680-\U0001f6FF])')
except re.error:
ranges = re.compile(u'([\u2600-\u27BF])|([\uD83C][\uDF00-\uDFFF])|([\uD83D][\uDC00-\uDE4F])|([\uD83D][\uDE80-\uDEFF])')
emojis = (ranges.findall(text))
@jovianlin
jovianlin / test_argparse.py
Created November 22, 2016 02:03
Python ArgParse
import argparse
import sys
def main():
parser = argparse.ArgumentParser()
parser.add_argument('--x', type=float, default=1.0,
help='What is the first number?')
parser.add_argument('--y', type=float, default=1.0,
help='What is the second number?')
parser.add_argument('--operation', type=str, default='add',
@jovianlin
jovianlin / TargetedSentimentAnalysis.py
Created November 22, 2016 08:35
Targeted Sentiment Analysis
# -*- coding: utf-8 -*-
from afinn import Afinn
import spacy
import re
class TargetedSentimentAnalysis(object):
def __init__(self):
self.afinn = Afinn(emoticons=True)
@jovianlin
jovianlin / mini_batch_learning.py
Created November 26, 2016 07:22
Mini Batch Learning with SGD
from sklearn.linear_model import SGDRegressor
# https://adventuresindatascience.wordpress.com/2014/12/30/minibatch-learning-for-large-scale-data-using-scikit-learn/
def iter_minibatches(chunksize, numtrainingpoints):
# Provide chunks one by one
chunkstartmarker = 0
while chunkstartmarker < numtrainingpoints:
chunkrows = range(chunkstartmarker,chunkstartmarker+chunksize)
X_chunk, y_chunk = getrows(chunkrows)
@jovianlin
jovianlin / foo.py
Created November 29, 2016 02:07
Convert "aaaabbbbbcccccde" to "a4b5c5d1e1"
def foo(s):
if len(s) <= 0:
return None
else:
output, curr_char, curr_count = '', '', 0
for idx in range(0, len(s)):
if s[idx] == curr_char:
curr_count += 1
else:
output += curr_char + str(curr_count) if curr_count > 0 else curr_char
@jovianlin
jovianlin / fibonacci.py
Created November 30, 2016 16:07
fibonacci with LRU cache for memoization
from functools import lru_cache
@lru_cache(maxsize=100)
def fibonacci(n):
# Check that the input is a positive integer
if type(n) != int:
raise TypeError("n must be a positive int")
if n < 1:
raise ValueError("n must be a positive int")
@jovianlin
jovianlin / anaconda_tensorflow.txt
Last active December 7, 2016 13:54
Anaconda & Tensorflow
# ===================================================================================
# Many thanks to:
# https://uoa-eresearch.github.io/eresearch-cookbook/recipe/2014/11/20/conda/
#
# More info:
# https://www.continuum.io/blog/developer-blog/python-packages-and-environments-conda
# https://conda-forge.github.io/#about
# ===================================================================================
# conda info --env