Skip to content

Instantly share code, notes, and snippets.

@dimitry12
Created December 28, 2017 04:41
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save dimitry12/d8eb165eb9ecd474d6a017156bec3466 to your computer and use it in GitHub Desktop.
Save dimitry12/d8eb165eb9ecd474d6a017156bec3466 to your computer and use it in GitHub Desktop.
# On Titan X (Pascal)
# 8192 x 8192 matmul took: 0.10 sec, 11304.59 G ops/sec
# http://stackoverflow.com/questions/41804380/testing-gpu-with-tensorflow-matrix-multiplication
#
# On V100/fp16 peak 85k for 8192x8192
# timing under matmul_times subdirectory
#
# TODO: figure out the deal with spikes, is it CUDA autotune?
from __future__ import print_function
import ctypes
import errno
from ctypes.util import find_library
from functools import partial
from math import floor,sqrt
CLOCK_PROCESS_CPUTIME_ID = 2 # time.h
CLOCK_MONOTONIC_RAW = 4
clockid_t = ctypes.c_int
time_t = ctypes.c_long
class timespec(ctypes.Structure):
_fields_ = [
('tv_sec', time_t), # seconds
('tv_nsec', ctypes.c_long) # nanoseconds
]
_clock_gettime = ctypes.CDLL(find_library('rt'), use_errno=True).clock_gettime
_clock_gettime.argtypes = [clockid_t, ctypes.POINTER(timespec)]
def clock_gettime(clk_id):
tp = timespec()
if _clock_gettime(clk_id, ctypes.byref(tp)) < 0:
err = ctypes.get_errno()
msg = errno.errorcode[err]
if err == errno.EINVAL:
msg += (" The clk_id specified is not supported on this system"
" clk_id=%r") % (clk_id,)
raise OSError(err, msg)
return tp.tv_sec + tp.tv_nsec * 1e-9
try:
from time import perf_counter, process_time
except ImportError: # Python <3.3
perf_counter = partial(clock_gettime, CLOCK_MONOTONIC_RAW)
perf_counter.__name__ = 'perf_counter'
process_time = partial(clock_gettime, CLOCK_PROCESS_CPUTIME_ID)
process_time.__name__ = 'process_time'
import math
import os
import sys
import numpy as np
import tensorflow as tf
import time
import argparse
parser = argparse.ArgumentParser(description='PyTorch MNIST Example')
parser.add_argument('--dtype', type=str, default='float16',
help='dtype, float32 or float16')
parser.add_argument('--agg', type=str, default='min',
help='min, mean or median')
args = parser.parse_args()
def bench(n):
if args.dtype == 'float32':
dtype = tf.float32
elif args.dtype == 'float16':
dtype = tf.float16
else:
assert False, 'unknown dtype '+args.dtype
with tf.device("/gpu:0"):
size = floor(sqrt(((8192*8192*2*0.01-n**2)/n)))
matrix1 = tf.Variable(tf.ones((1, size, size, n), dtype=dtype))
matrix2 = tf.Variable(tf.ones((1, 1, n, n), dtype=dtype))
product = tf.nn.conv2d(matrix1, matrix2,[1,1,1,1],'VALID')
config = tf.ConfigProto()
sess = tf.Session(config=config)
sess.run(tf.global_variables_initializer())
iters = 11
# pre-warming
sess.run(product.op)
times = []
for i in range(iters):
start = perf_counter()
sess.run(product.op)
times.append(perf_counter()-start)
ops = (n**2 + (n-1)*n)*(134217728/n) # n^2*(n-1) additions, n^3 multiplications
times_ms = 1000*np.array(times) # get seconds, convert to ms
if len(times_ms)>0:
min = np.min(times_ms)
median = np.median(times_ms)
formatted = ["%.2f"%(d,) for d in times_ms[:10]]
# print("Times: min: %.2f, median: %.2f, mean: %.2f"%(min, median,
# np.mean(times_ms)))
if args.agg == 'min':
elapsed_ms = np.min(times_ms)
elif args.agg == 'mean':
elapsed_ms = np.mean(times_ms)
elif args.agg == 'median':
elapsed_ms = np.median(times_ms)
else:
assert False, 'unknown aggregation method: ' + args.agg
rate = ops/elapsed_ms/10**9
# print('\n %d x %d matmul took: %.4f ms, %.2f G ops/sec' % (n, n,
# elapsed_ms,
# rate,))
return rate
def main():
steps = 8 # number of steps between n doubling
np.set_printoptions(suppress=True)
with open("times.csv", "w") as myfile:
myfile.write("\n")
for i in range(20*steps):
n = int(math.pow(2, float(i)/steps))
rate = bench(n)
print("%d,%.10f" %(n, rate))
with open("times.csv", "a") as myfile:
myfile.write("%d,%.10f\n"%(n, rate))
if __name__=='__main__':
main()
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment