Skip to content

Instantly share code, notes, and snippets.

@Laurawly
Last active November 22, 2022 13:28
Show Gist options
  • Star 1 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Laurawly/5288b88a091ed4646a5840317ab59d84 to your computer and use it in GitHub Desktop.
Save Laurawly/5288b88a091ed4646a5840317ab59d84 to your computer and use it in GitHub Desktop.
import os
import numpy as np
import tvm
from tvm import te, auto_scheduler, topi
from tvm.topi.testing import conv2d_nchw_python
from tvm.contrib import cublas
target = tvm.target.Target('cuda')
M = 8192
N = 2304
K = 768
A = te.placeholder((M, K), name='data', dtype='float16')
B = te.placeholder((N, K), name='kernel', dtype='float16')
C = cublas.matmul(A, B, False, True, dtype='float32')
sch = te.create_schedule(C.op)
args = [A, B, C]
func = tvm.build(sch, args, target)
# Check correctness
data_np = np.random.uniform(size=(M, K)).astype(np.float16)
weight_np = np.random.uniform(size=(N, K)).astype(np.float16)
out_np = np.matmul(data_np, weight_np.T)
ctx = tvm.gpu()
data_tvm = tvm.nd.array(data_np, ctx=ctx)
weight_tvm = tvm.nd.array(weight_np, ctx=ctx)
out_tvm = tvm.nd.array(np.zeros((M, N), dtype=C.dtype), ctx=ctx)
func(data_tvm, weight_tvm, out_tvm)
# Check results
np.testing.assert_allclose(out_np, out_tvm.asnumpy(), rtol=1e-3)
# Evaluate execution time
evaluator = func.time_evaluator(func.entry_name, ctx, number=100, repeat=10)
time = np.median(evaluator(data_tvm, weight_tvm, out_tvm).results)
print("shape", data_np.shape, weight_np.shape)
print("Execution time of this operator: %.3f ms" % (time * 1000))
print("Speed: %.3f TFLOPS" % (2 * (M*N*K) / time / 1e12))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment