Last active
April 4, 2020 12:01
-
-
Save pervognsen/f24342e67795addec0d19c5139166439 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
def bits(x, start, stop): | |
assert 0 <= start <= stop | |
return (x >> start) & (1 << (stop - start) - 1) | |
# Instruction set definition | |
sse1 = make_instruction_set("SSE1") | |
m128 = sse1.make_vector_type("m128", float32, 4) | |
instruction = sse1.instruction | |
# Arithmetic instructions | |
def binary_instruction(name, operation): | |
ps_name = "_mm_%s_ps" % name | |
ss_name = "_mm_%s_ss" % name | |
ps = instruction({a: m128, b: m128}, m128, name=ps_name)(operation) | |
ss = instruction({a: m128, b: m128}, m128, name=ss_name)(lambda a, b: m128(operation(a[0], b[0]), a[1], a[2], a[3])) | |
return ps, ss | |
_mm_add_ps, _mm_add_ss = binary_instruction("add", lambda a, b: x + y) | |
_mm_sub_ps, _mm_sub_ss = binary_instruction("sub", lambda a, b: x - y) | |
_mm_mul_ps, _mm_mul_ss = binary_instruction("mul", lambda a, b: x * y) | |
_mm_div_ps, _mm_div_ss = binary_instruction("div", lambda a, b: x / y) | |
_mm_min_ps, _mm_min_ss = binary_instruction("min", min) | |
_mm_max_ps, _mm_max_ss = binary_instruction("max", max) | |
def unary_instruction(name, operation): | |
ps_name = "_mm_%s_ps" % name | |
ss_name = "_mm_%s_ss" % name | |
ps = instruction({a: m128}, m128, name=ps_name)(operation) | |
ss = instruction({a: m128}, m128, name=ss_name)(lambda a: m128(operation(a[0]), a[1], a[2], a[3])) | |
return ps, ss | |
_mm_rcp_ps, _mm_rcp_ss = unary_instruction("rcp", rcp) | |
_mm_rsqrt_ps, _mm_rsqrt_ss = unary_instruction("rsqrt", rsqrt) | |
_mm_sqrt_ps, _mm_sqrt_ss = unary_instruction("sqrt", sqrt) | |
# Data movement instructions | |
@instruction({a: m128, b: m128, imm8: immediate(int)}, m128) | |
def _mm_shuffle_ps(a, b, imm8): | |
dst0 = a[bits(imm8, 0, 2)] | |
dst1 = a[bits(imm8, 2, 4)] | |
dst2 = b[bits(imm8, 4, 6)] | |
dst3 = b[bits(imm8, 6, 8)] | |
return m128(dst0, dst1, dst2, dst3) | |
@instruction({a: m128, b: m128}, m128) | |
def _mm_unpacklo_ps(a, b): | |
return m128(a[0], b[0], a[1], b[1]) | |
@instruction({a: m128, b: m128}, m128) | |
def _mm_unpackhi_ps(a, b): | |
return m128(a[2], b[2], a[3], b[3]) | |
@instruction({a: m128, b: m128}, m128) | |
def _mm_movelh_ps(a, b): | |
return m128(a[0], a[1], b[0], b[1]) | |
@instruction({a: m128, b: m128}, m128) | |
def _mm_movehl_ps(a, b): | |
return m128(b[2], b[3], a[2], a[3]) | |
@instruction({a: m128, b: m128}, m128) | |
def _mm_move_ss(a, b): | |
return m128(b[0], a[1], a[2], a[3]) | |
@instruction({e3: float32, e2: float32, e1: float32, e0: float32}, m128) | |
def _mm_set_ps(e3, e2, e1, e0): | |
return m128(e0, e1, e2, e3) | |
@instruction({a: float32}, m128) | |
def _mm_set_ps1(a): | |
return m128(a, a, a, a) | |
_mm_set1_ps = _mm_set_ps1 | |
@instruction({e3: float32, e2: float32, e1: float32, e0: float32}, m128) | |
def _mm_setr_ps(e3, e2, e1, e0): | |
return m128(e3, e2, e1, e0) | |
@instruction({}, m128) | |
def _mm_setzero(): | |
return m128(0, 0, 0, 0) | |
# Load instructions | |
@instruction({mem_addr: pointer(float32)}, m128) | |
def _mm_load_ps(mem_addr): | |
return load_aligned(mem_addr, m128) | |
@instruction({mem_addr: pointer(float32)}, m128) | |
def _mm_loadu_ps(mem_addr): | |
return load(mem_addr, m128) | |
@instruction({mem_addr: pointer(float32)}, m128) | |
def _mm_load_ps1(mem_addr): | |
a = load(mem_addr) | |
return m128(a, a, a, a) | |
_mm_load1_ps = _mm_load_ps1 | |
@instruction({mem_addr: pointer(float32)}, m128) | |
def _mm_load_ss1(mem_addr): | |
a = load(mem_addr) | |
return m128(a, 0, 0, 0) | |
@instruction({a: m128, mem_addr: pointer(float32)}, m128) | |
def _mm_loadh_pi(a, mem_addr): | |
return m128(a[0], a[1], load(mem_addr), load(mem_addr + 1)) | |
@instruction({a: m128, mem_addr: pointer(float32)}, m128) | |
def _mm_loadl_pi(a, mem_addr): | |
return m128(load(mem_addr), load(mem_addr + 1), a[2], a[3]) | |
@instruction({mem_addr: pointer(float32)}, m128) | |
def _mm_loadr_pi(mem_addr): | |
a = load_aligned(mem_addr, m128) | |
return m128(a[3], a[2], a[1], a[0]) | |
# Store instructions | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_store_ps(mem_addr, a): | |
store_aligned(mem_addr, a) | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_store_ps1(mem_addr, a): | |
store_aligned(mem_addr, m128(a[0], a[0], a[0], a[0])) | |
_mm_store1_ps = _mm_store_ps1 | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_store_ss(mem_addr, a): | |
store_aligned(mem_addr, m128(a[0], 0, 0, 0)) | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_storeh_pi(mem_addr, a): | |
store(mem_addr, a[2]) | |
store(mem_addr + 1, a[3]) | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_storel_pi(mem_addr, a): | |
store(mem_addr, a[0]) | |
store(mem_addr + 1, a[1]) | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_storer_ps(mem_addr, a): | |
store_aligned(mem_addr, m128(a[3], a[2], a[1], a[0])) | |
@instruction({mem_addr: pointer(float32), a: m128}) | |
def _mm_storeu_ps(mem_addr, a): | |
store(mem_addr, a) |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment