Smelov Vladimir vsmelov

## docker_install
sudo apt-get install \
    apt-transport-https \
    ca-certificates \
    curl \
    software-properties-common
curl -fsSL https://download.docker.com/linux/ubuntu/gpg | sudo apt-key add -
sudo add-apt-repository \
   "deb [arch=amd64] https://download.docker.com/linux/ubuntu \
   $(lsb_release -cs) \
   stable"

## timer.py
from time import time


class Timer:
    def __init__(self, begin=None, end=None):
        self.begin = begin
        self.end = end

    def __enter__(self):
        self.begin = time()

## iterator, yield, send
def foo():
    x = 2
    while True:
        print('a x: {}'.format(x))
        x = yield 2*x
        print('c x: {}'.format(x))

f = foo()
print('begin')
i = next(f)

## cache2parquet.py
def cache2parquet(func):
    """ decorator for cache function result to parquet
    i.e.
        # define:
        @cache2parquet
        def smart_and_slow_calculations(spark, **other_kwargs):
            # some smart and slow code
            return df

        # use:

## base_error_handler.py
class BaseErrorHandler(tornado.web.RequestHandler):
    """ catch exceptions and form error json response
        add traceback if need
    """
    def write_error(self, status_code, **kwargs):
        """ handle exception """
        self.add_header('Result', 'Error')
        if status_code == 500:
            exc_info = kwargs.get('exc_info')
            if exc_info:

## latency.txt
Latency Comparison Numbers (~2012)
----------------------------------
L1 cache reference                           0.5 ns
Branch mispredict                            5   ns
L2 cache reference                           7   ns                      14x L1 cache
Mutex lock/unlock                           25   ns
Main memory reference                      100   ns                      20x L2 cache, 200x L1 cache
Compress 1K bytes with Zippy             3,000   ns        3 us
Send 1K bytes over 1 Gbps network       10,000   ns       10 us
Read 4K randomly from SSD*             150,000   ns      150 us          ~1GB/sec SSD

## intern_timeit.py
from sys import intern
from timeit import repeat, timeit
from random import choice
from string import digits


def random_str(length):
    return ''.join(choice(digits) for _ in range(length))


## heap.py
class MinHeap:
    def __init__(self, items, key):
        """ key must be callable function """
        self.arr = list(items)
        self.key = key
        for i in range(len(self.arr) // 2 - 1, -1, - 1):
            self._heapify(i)

    def empty(self):
        return len(self.arr) == 0

## streaming_peak_detection.py
from window_avg_std import window_avg_std


def peak_detection(window, threshold, influence):
    """ Smoothed z-score algo
    https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/43512887#43512887 """
    elem = yield  # get first elem
    prev_elem = elem  # previous elem (influenced)
    elem_index = 0
    prev_avg = elem

## streaming_std
private long mN = 0L;
private double mM = 0.0;
private double mS = 0.0;
public void handle(double x) {
     ++mN;
    double nextM = mM + (x – mM) / mN;
    mS += (x – mM) * (x – nextM);
    mM = nextM;
}
public void unHandle(double x) {
	sudo apt-get install \
	apt-transport-https \
	ca-certificates \
	curl \
	software-properties-common
	curl -fsSL https://download.docker.com/linux/ubuntu/gpg \| sudo apt-key add -
	sudo add-apt-repository \
	"deb [arch=amd64] https://download.docker.com/linux/ubuntu \
	$(lsb_release -cs) \
	stable"
	from time import time


	class Timer:
	def __init__(self, begin=None, end=None):
	self.begin = begin
	self.end = end

	def __enter__(self):
	self.begin = time()
	def foo():
	x = 2
	while True:
	print('a x: {}'.format(x))
	x = yield 2*x
	print('c x: {}'.format(x))

	f = foo()
	print('begin')
	i = next(f)
	def cache2parquet(func):
	""" decorator for cache function result to parquet
	i.e.
	# define:
	@cache2parquet
	def smart_and_slow_calculations(spark, **other_kwargs):
	# some smart and slow code
	return df

	# use:
	class BaseErrorHandler(tornado.web.RequestHandler):
	""" catch exceptions and form error json response
	add traceback if need
	"""
	def write_error(self, status_code, **kwargs):
	""" handle exception """
	self.add_header('Result', 'Error')
	if status_code == 500:
	exc_info = kwargs.get('exc_info')
	if exc_info:
	Latency Comparison Numbers (~2012)
	----------------------------------
	L1 cache reference 0.5 ns
	Branch mispredict 5 ns
	L2 cache reference 7 ns 14x L1 cache
	Mutex lock/unlock 25 ns
	Main memory reference 100 ns 20x L2 cache, 200x L1 cache
	Compress 1K bytes with Zippy 3,000 ns 3 us
	Send 1K bytes over 1 Gbps network 10,000 ns 10 us
	Read 4K randomly from SSD* 150,000 ns 150 us ~1GB/sec SSD
	from sys import intern
	from timeit import repeat, timeit
	from random import choice
	from string import digits


	def random_str(length):
	return ''.join(choice(digits) for _ in range(length))
	class MinHeap:
	def __init__(self, items, key):
	""" key must be callable function """
	self.arr = list(items)
	self.key = key
	for i in range(len(self.arr) // 2 - 1, -1, - 1):
	self._heapify(i)

	def empty(self):
	return len(self.arr) == 0
	from window_avg_std import window_avg_std


	def peak_detection(window, threshold, influence):
	""" Smoothed z-score algo
	https://stackoverflow.com/questions/22583391/peak-signal-detection-in-realtime-timeseries-data/43512887#43512887 """
	elem = yield # get first elem
	prev_elem = elem # previous elem (influenced)
	elem_index = 0
	prev_avg = elem
	private long mN = 0L;
	private double mM = 0.0;
	private double mS = 0.0;
	public void handle(double x) {
	++mN;
	double nextM = mM + (x – mM) / mN;
	mS += (x – mM) * (x – nextM);
	mM = nextM;
	}
	public void unHandle(double x) {