Skip to content

Instantly share code, notes, and snippets.

import tensorflow as tf
from numpy.testing.utils import nulp_diff
import time
n = 1024
v1 = tf.Variable(tf.ones_initializer((n, n)))
v2 = tf.Variable(tf.ones_initializer((n, n)))
op = v1.assign(tf.matmul(v1, v2)/n).op
norm = tf.reduce_sum(v1)
@yaroslavvb
yaroslavvb / sessrun
Created November 1, 2016 17:59
Example of wrapper for session.run that returns dictionaries instead of lists
def sessrun(fetches):
values = tf.get_default_session().run(fetches)
return {fetches[i]: values[i] for i in range(len(values))}
a = tf.constant(1)
b = tf.constant(2)
c = tf.constant(3)
sess = tf.InteractiveSession()
result1 = sessrun([a, b])
@yaroslavvb
yaroslavvb / scratch.py
Created December 8, 2016 22:51
Example of using stats summarizer
import tensorflow as tf
a = tf.ones((10, 10))
b = tf.ones((10, 10))
c = tf.matmul(a, b)
ss = tf.contrib.stat_summarizer.NewStatSummarizer(tf.get_default_graph().as_graph_def().SerializeToString())
sess = tf.Session()
for i in range(10):
run_metadata = tf.RunMetadata()
run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)
@yaroslavvb
yaroslavvb / sharded_ps_benchmark.py
Last active December 27, 2022 06:25
Example of local cluster with multiple workers/training loops sharded parameter server
#!/usr/bin/env python
# Benchmark transferring data, part of troubleshooting https://github.com/tensorflow/tensorflow/issues/6116
#
# Take a independent workers communicating with b parameter shards
# Each worker tries to add to variables stored on parameter server as fast as
# possible.
#
# macbook
# ps=1: 1.6 GB/s
# ps=2: 2.6 GB/s
@yaroslavvb
yaroslavvb / stats_summarizer_example.py
Last active February 6, 2018 09:25
Example of using stats summarizer
# export LD_LIBRARY_PATH="/usr/local/cuda/lib64:/usr/local/cuda/extras/CUPTI/lib64:$LD_LIBRARY_PATH"
import tensorflow as tf
a = tf.ones((10, 10))
b = tf.ones((10, 10))
c = tf.matmul(a, b)
ss = tf.contrib.stat_summarizer.NewStatSummarizer(tf.get_default_graph().as_graph_def().SerializeToString())
sess = tf.Session()
for i in range(10):
@yaroslavvb
yaroslavvb / client_transfer_benchmark.py
Last active February 6, 2018 09:25
benchmark TensorFlow<->Python transfer rate
# Benchmark transferring data from TF into Python runtime
#
## Dependencies:
# portpicker (pip install portpicker)
# tcmalloc4 (sudo apt-get install google-perftools)
# TF 0.12 (for var.read_value(), ones_initializer())
#
# On Linux default malloc is slow
# sudo apt-get install google-perftools
# export LD_PRELOAD="/usr/lib/libtcmalloc.so.4"
@yaroslavvb
yaroslavvb / count_ops.py
Created December 11, 2016 01:44
count number of ops in TensorFlow low-level API
from google.protobuf import text_format
from tensorflow.core.framework import op_def_pb2
ops = op_def_pb2.OpList()
ops_text = open("/local_home/yaroslav/tensorflow.git/tensorflow/tensorflow/core/ops/ops.pbtxt").read()
text_format.Merge(ops_text, ops)
print(len(ops.op))
@yaroslavvb
yaroslavvb / benchmark_grpc_recv.py
Last active December 27, 2022 06:24
Benchmark slowness of passing Tensors around between TF workers
# Dependencies:
# portpicker (pip install portpicker)
# tcmalloc4 (sudo apt-get install google-perftools)
# TF 0.12
#
#
# Benchmarks on Xeon E5-2630 v3 @ 2.40GHz
#
# export LD_PRELOAD=/usr/lib/libtcmalloc.so.4
# python benchmark_grpc_recv.py --data_mb=128
@yaroslavvb
yaroslavvb / simple_barrier.py
Created December 16, 2016 06:03
Example of using shared counters to implement Barrier primitive
"""Example of barrier implementation using TensorFlow shared variables.
All workers synchronize on barrier, copy global parameters to local versions
and increment global parameter variable asynchronously. Should see something
like this:
bash> killall python
bash> python simple_barrier.py --num_workers=4
worker 0, local_param 4 global_param 5
worker 2, local_param 4 global_param 7
# Try to copy "a" value to "c" while simultaneously adding vector of 1's to a.
# If the copy is started before the first assign_add, the copied value will be inconsistent.
#
# Running it on macbook my "c" ends up with a mix of values between 1 and 6
#
#
# 16.017478 copy 1 (0) starting
# 17.006894 write 1 (0) starting
# 28.431654 write 1 ending (11.4247 sec)
# 29.436692 write 1 (1) starting