Skip to content

Instantly share code, notes, and snippets.

@pervognsen
Last active April 4, 2020 12:01
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save pervognsen/f24342e67795addec0d19c5139166439 to your computer and use it in GitHub Desktop.
Save pervognsen/f24342e67795addec0d19c5139166439 to your computer and use it in GitHub Desktop.
def bits(x, start, stop):
assert 0 <= start <= stop
return (x >> start) & (1 << (stop - start) - 1)
# Instruction set definition
sse1 = make_instruction_set("SSE1")
m128 = sse1.make_vector_type("m128", float32, 4)
instruction = sse1.instruction
# Arithmetic instructions
def binary_instruction(name, operation):
ps_name = "_mm_%s_ps" % name
ss_name = "_mm_%s_ss" % name
ps = instruction({a: m128, b: m128}, m128, name=ps_name)(operation)
ss = instruction({a: m128, b: m128}, m128, name=ss_name)(lambda a, b: m128(operation(a[0], b[0]), a[1], a[2], a[3]))
return ps, ss
_mm_add_ps, _mm_add_ss = binary_instruction("add", lambda a, b: x + y)
_mm_sub_ps, _mm_sub_ss = binary_instruction("sub", lambda a, b: x - y)
_mm_mul_ps, _mm_mul_ss = binary_instruction("mul", lambda a, b: x * y)
_mm_div_ps, _mm_div_ss = binary_instruction("div", lambda a, b: x / y)
_mm_min_ps, _mm_min_ss = binary_instruction("min", min)
_mm_max_ps, _mm_max_ss = binary_instruction("max", max)
def unary_instruction(name, operation):
ps_name = "_mm_%s_ps" % name
ss_name = "_mm_%s_ss" % name
ps = instruction({a: m128}, m128, name=ps_name)(operation)
ss = instruction({a: m128}, m128, name=ss_name)(lambda a: m128(operation(a[0]), a[1], a[2], a[3]))
return ps, ss
_mm_rcp_ps, _mm_rcp_ss = unary_instruction("rcp", rcp)
_mm_rsqrt_ps, _mm_rsqrt_ss = unary_instruction("rsqrt", rsqrt)
_mm_sqrt_ps, _mm_sqrt_ss = unary_instruction("sqrt", sqrt)
# Data movement instructions
@instruction({a: m128, b: m128, imm8: immediate(int)}, m128)
def _mm_shuffle_ps(a, b, imm8):
dst0 = a[bits(imm8, 0, 2)]
dst1 = a[bits(imm8, 2, 4)]
dst2 = b[bits(imm8, 4, 6)]
dst3 = b[bits(imm8, 6, 8)]
return m128(dst0, dst1, dst2, dst3)
@instruction({a: m128, b: m128}, m128)
def _mm_unpacklo_ps(a, b):
return m128(a[0], b[0], a[1], b[1])
@instruction({a: m128, b: m128}, m128)
def _mm_unpackhi_ps(a, b):
return m128(a[2], b[2], a[3], b[3])
@instruction({a: m128, b: m128}, m128)
def _mm_movelh_ps(a, b):
return m128(a[0], a[1], b[0], b[1])
@instruction({a: m128, b: m128}, m128)
def _mm_movehl_ps(a, b):
return m128(b[2], b[3], a[2], a[3])
@instruction({a: m128, b: m128}, m128)
def _mm_move_ss(a, b):
return m128(b[0], a[1], a[2], a[3])
@instruction({e3: float32, e2: float32, e1: float32, e0: float32}, m128)
def _mm_set_ps(e3, e2, e1, e0):
return m128(e0, e1, e2, e3)
@instruction({a: float32}, m128)
def _mm_set_ps1(a):
return m128(a, a, a, a)
_mm_set1_ps = _mm_set_ps1
@instruction({e3: float32, e2: float32, e1: float32, e0: float32}, m128)
def _mm_setr_ps(e3, e2, e1, e0):
return m128(e3, e2, e1, e0)
@instruction({}, m128)
def _mm_setzero():
return m128(0, 0, 0, 0)
# Load instructions
@instruction({mem_addr: pointer(float32)}, m128)
def _mm_load_ps(mem_addr):
return load_aligned(mem_addr, m128)
@instruction({mem_addr: pointer(float32)}, m128)
def _mm_loadu_ps(mem_addr):
return load(mem_addr, m128)
@instruction({mem_addr: pointer(float32)}, m128)
def _mm_load_ps1(mem_addr):
a = load(mem_addr)
return m128(a, a, a, a)
_mm_load1_ps = _mm_load_ps1
@instruction({mem_addr: pointer(float32)}, m128)
def _mm_load_ss1(mem_addr):
a = load(mem_addr)
return m128(a, 0, 0, 0)
@instruction({a: m128, mem_addr: pointer(float32)}, m128)
def _mm_loadh_pi(a, mem_addr):
return m128(a[0], a[1], load(mem_addr), load(mem_addr + 1))
@instruction({a: m128, mem_addr: pointer(float32)}, m128)
def _mm_loadl_pi(a, mem_addr):
return m128(load(mem_addr), load(mem_addr + 1), a[2], a[3])
@instruction({mem_addr: pointer(float32)}, m128)
def _mm_loadr_pi(mem_addr):
a = load_aligned(mem_addr, m128)
return m128(a[3], a[2], a[1], a[0])
# Store instructions
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_store_ps(mem_addr, a):
store_aligned(mem_addr, a)
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_store_ps1(mem_addr, a):
store_aligned(mem_addr, m128(a[0], a[0], a[0], a[0]))
_mm_store1_ps = _mm_store_ps1
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_store_ss(mem_addr, a):
store_aligned(mem_addr, m128(a[0], 0, 0, 0))
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_storeh_pi(mem_addr, a):
store(mem_addr, a[2])
store(mem_addr + 1, a[3])
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_storel_pi(mem_addr, a):
store(mem_addr, a[0])
store(mem_addr + 1, a[1])
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_storer_ps(mem_addr, a):
store_aligned(mem_addr, m128(a[3], a[2], a[1], a[0]))
@instruction({mem_addr: pointer(float32), a: m128})
def _mm_storeu_ps(mem_addr, a):
store(mem_addr, a)
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment