Skip to content

Instantly share code, notes, and snippets.

@nomaddo
Last active September 15, 2018 16:32
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save nomaddo/31aaf839f2463816beb9563173b48728 to your computer and use it in GitHub Desktop.
Save nomaddo/31aaf839f2463816beb9563173b48728 to your computer and use it in GitHub Desktop.
TMU load experiment
import numpy as np
import time
from videocore.assembler import qpu
from videocore.driver import Driver
@qpu
def hello_world(asm):
mov(r0, uniform)
ldi(r2, 3000000)
L.loop
mov(tmu0_s, r0)
# 1.
# if we insert nops here, 0.6456s -> 0.6461s
# for i in range(0, 4):
# nop()
nop(sig='load tmu0')
# 2.
# if we insert nops here, 0.6456s -> 0.8060s
# for i in range(0, 4):
# nop()
isub(r2, r2, 1, set_flags=True)
jzc(L.loop)
nop()
nop()
nop()
# Finish the thread
exit()
with Driver() as drv:
# Input vectors
a = np.random.random(16).astype('float32')
# Copy vectors to shared memory for DMA transfer
inp = drv.copy(np.r_[a])
# Run the program
start = time.perf_counter()
drv.execute(
n_threads=1,
program=drv.program(hello_world),
uniforms=[inp.address]
)
end = time.perf_counter()
print('{:.4f}'.format(end - start))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment