Bairen Yi byronyi

## compile_tensorflow_serving.sh
#!/bin/bash

TENSORFLOW_COMMIT=9e76bf324f6bac63137a02bb6e6ec9120703ea9b # August 16, 2017
TENSORFLOW_SERVING_COMMIT=267d682bf43df1c8e87332d3712c411baf162fe9 # August 18, 2017
MODELS_COMMIT=78007443138108abf5170b296b4d703b49454487 # July 25, 2017

if [ -z $TENSORFLOW_SERVING_REPO_PATH ]; then
	TENSORFLOW_SERVING_REPO_PATH="serving"
fi
INITIAL_PATH=$(pwd)

## ubuntu-vm.xml
<domain type='kvm'>
  <name>ubuntu-4b</name>
  <uuid>7dfbcb8a-77da-11e6-a116-408d5cb4b9e6</uuid>
  <memory unit='KiB'>12582912</memory>
  <currentMemory unit='KiB'>12582912</currentMemory>
  <vcpu placement='static'>2</vcpu>
  <os>
    <type arch='x86_64' machine='pc-q35-2.5'>hvm</type>
    <loader readonly='no' type='pflash'>/usr/share/OVMF/OVMF_CODE.fd</loader>
    <nvram>/var/lib/libvirt/qemu/nvram/ubuntu-4b_VARS.fd</nvram>

## benchmark_grpc_recv.py
# Dependencies:
# portpicker (pip install portpicker)
# tcmalloc4 (sudo apt-get install google-perftools)
# TF 0.12
#
#
# Benchmarks on Xeon E5-2630 v3 @ 2.40GHz
#
# export LD_PRELOAD=/usr/lib/libtcmalloc.so.4
# python benchmark_grpc_recv.py --data_mb=128

## local_distributed_benchmark.py
"""Benchmark tensorflow distributed by adding vector of ones on worker2
to variable on worker1 as fast as possible.

On 2014 macbook, TensorFlow 0.10 this shows

Local rate:       2175.28 MB per second
Distributed rate: 107.13 MB per second

"""

## latency.txt
Latency Comparison Numbers
--------------------------
L1 cache reference                           0.5 ns
Branch mispredict                            5   ns
L2 cache reference                           7   ns                      14x L1 cache
Mutex lock/unlock                           25   ns
Main memory reference                      100   ns                      20x L2 cache, 200x L1 cache
Compress 1K bytes with Zippy             3,000   ns        3 us
Send 1K bytes over 1 Gbps network       10,000   ns       10 us
Read 4K randomly from SSD*             150,000   ns      150 us          ~1GB/sec SSD

## soft-iwarp.adoc

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                byronyi
                / soft-iwarp.adoc
            
            
              Created
              May 11, 2016 15:30
                — forked from jasonbrooks/soft-iwarp.adoc
            
              
                Toward a method of testing Gluster RDMA with regular ethernet NICs, for the hardware-challenged. For now, I have the installing soft-iwarp on Fedora 18 part down (I think). Up next, the getting it to work with Gluster RDMA part...
              
          
    Testing Gluster RDMA with Soft-iWARP


There’s a Gluster 3.4 RDMA test day right around the corner, and I want to join in on the fun. The trouble is, I don’t have any RDMA-capable hardware in my lab right now. Undaunted, I hit the Web in search of a software-based solution, one that would at least allow me to run through the tests.


I found a pair of promising-looking options:


Soft-iWARP


Soft ROCE


The information out on the web about these projects is a bit thinner than I’d like, but I found a blog post howto on installing Soft-iWARP on Ubuntu 10.10 and another for Debian 6 and figured I’d try it out on Fedora 18.

  
## System Design.md

      
              1 file
            
          
              0 forks
            
          
              0 comments
            
          
              0 stars
            
          
                byronyi
                / System Design.md
            
            
              Created
              April 18, 2016 07:10
                — forked from vasanthk/System Design.md
            
              
                System Design Cheatsheet
              
          
    #System Design Cheatsheet

Picking the right architecture = Picking the right battles + Managing trade-offs

##Basic Steps

Clarify and agree on the scope of the system


User cases (description of sequences of events that, taken together, lead to a system doing something useful)

Who is going to use it?
How are they going to use it?


## zmqstub.c
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <libgen.h>
#include <signal.h>

#include <err.h>
#include <assert.h>

#include <zmq.h>

## iunzip.py
import itertools
from operator import itemgetter

def iunzip(iterable):
    """Iunzip is the same as zip(*iter) but returns iterators, instead of
    expand the iterator. Mostly used for large sequence"""

    _tmp, iterable = itertools.tee(iterable, 2)
    iters = itertools.tee(iterable, len(_tmp.next()))
    return (itertools.imap(itemgetter(i), it) for i, it in enumerate(iters))

## Bibliography for January 29, 2015
Gartner:
https://www.gartner.com/doc/2867017/-planning-guide-overview-architecting
https://www.gartner.com/doc/2929317/framework-evaluating-big-data-initiatives
https://www.gartner.com/doc/2773117/security-futures-plan-peak-threat
https://www.gartner.com/doc/2691917/big-data-needs-datacentric-security
https://www.gartner.com/doc/2621115/big-data-analytics-requires-ethical

Books and Training:
http://www.amazon.com/Data-Science-Big-Analytics-Discovering/dp/111887613X
http://www.kaggle.com/competitions#getting-started
	#!/bin/bash

	TENSORFLOW_COMMIT=9e76bf324f6bac63137a02bb6e6ec9120703ea9b # August 16, 2017
	TENSORFLOW_SERVING_COMMIT=267d682bf43df1c8e87332d3712c411baf162fe9 # August 18, 2017
	MODELS_COMMIT=78007443138108abf5170b296b4d703b49454487 # July 25, 2017

	if [ -z $TENSORFLOW_SERVING_REPO_PATH ]; then
	TENSORFLOW_SERVING_REPO_PATH="serving"
	fi
	INITIAL_PATH=$(pwd)
	<domain type='kvm'>
	<name>ubuntu-4b</name>
	<uuid>7dfbcb8a-77da-11e6-a116-408d5cb4b9e6</uuid>
	<memory unit='KiB'>12582912</memory>
	<currentMemory unit='KiB'>12582912</currentMemory>
	<vcpu placement='static'>2</vcpu>
	<os>
	<type arch='x86_64' machine='pc-q35-2.5'>hvm</type>
	<loader readonly='no' type='pflash'>/usr/share/OVMF/OVMF_CODE.fd</loader>
	<nvram>/var/lib/libvirt/qemu/nvram/ubuntu-4b_VARS.fd</nvram>
	# Dependencies:
	# portpicker (pip install portpicker)
	# tcmalloc4 (sudo apt-get install google-perftools)
	# TF 0.12
	#
	#
	# Benchmarks on Xeon E5-2630 v3 @ 2.40GHz
	#
	# export LD_PRELOAD=/usr/lib/libtcmalloc.so.4
	# python benchmark_grpc_recv.py --data_mb=128
	"""Benchmark tensorflow distributed by adding vector of ones on worker2
	to variable on worker1 as fast as possible.

	On 2014 macbook, TensorFlow 0.10 this shows

	Local rate: 2175.28 MB per second
	Distributed rate: 107.13 MB per second

	"""
	Latency Comparison Numbers
	--------------------------
	L1 cache reference 0.5 ns
	Branch mispredict 5 ns
	L2 cache reference 7 ns 14x L1 cache
	Mutex lock/unlock 25 ns
	Main memory reference 100 ns 20x L2 cache, 200x L1 cache
	Compress 1K bytes with Zippy 3,000 ns 3 us
	Send 1K bytes over 1 Gbps network 10,000 ns 10 us
	Read 4K randomly from SSD* 150,000 ns 150 us ~1GB/sec SSD
	#include <stdlib.h>
	#include <stdio.h>
	#include <string.h>
	#include <libgen.h>
	#include <signal.h>

	#include <err.h>
	#include <assert.h>

	#include <zmq.h>
	import itertools
	from operator import itemgetter

	def iunzip(iterable):
	"""Iunzip is the same as zip(*iter) but returns iterators, instead of
	expand the iterator. Mostly used for large sequence"""

	_tmp, iterable = itertools.tee(iterable, 2)
	iters = itertools.tee(iterable, len(_tmp.next()))
	return (itertools.imap(itemgetter(i), it) for i, it in enumerate(iters))
	Gartner:
	https://www.gartner.com/doc/2867017/-planning-guide-overview-architecting
	https://www.gartner.com/doc/2929317/framework-evaluating-big-data-initiatives
	https://www.gartner.com/doc/2773117/security-futures-plan-peak-threat
	https://www.gartner.com/doc/2691917/big-data-needs-datacentric-security
	https://www.gartner.com/doc/2621115/big-data-analytics-requires-ethical

	Books and Training:
	http://www.amazon.com/Data-Science-Big-Analytics-Discovering/dp/111887613X
	http://www.kaggle.com/competitions#getting-started