Skip to content

Instantly share code, notes, and snippets.

Last active April 5, 2021 22:40
Show Gist options
  • Save TellowKrinkle/c5bc0f39a139e0ce8ec4176e2540919b to your computer and use it in GitHub Desktop.
Save TellowKrinkle/c5bc0f39a139e0ce8ec4176e2540919b to your computer and use it in GitHub Desktop.
Swizzle emulation planning tool for GSdx
<!DOCTYPE html>
<title>GSdx Shuffle Planner</title>
<textarea id="input" oninput="refresh()"></textarea>
<table id="SrcID"></table>
<table id="DstID"></table>
Type commands into the text box and see them executed below. Src ID marks blocks with their starting position, Dst ID marks blocks with their ending position <br />
Supported commands:
<li>load v0 v1 [v2 v3]: Load from memory. Reorder the vectors to load out of order. Specifying two vectors puts all following instructions in 256-bit (AVX2) mode, four selects 128-bit (SSE) mode</li>
<li>sw8, sw16, sw32, sw64, sw128: Corresponds to GSVector functions of the same name. Specify two (256-bit) or four (128-bit) vectors, same as you would the GSVector functions</li>
<li>pblendd v0 v1 [v2 v3] 0xaa: Execute a pblendd from v0 to v1 and the inverse, swapping words between v0 and v1</li>
<li>pblendw v0 v1 [v2 v3] 0xaa: Like pblendd but with pblendw</li>
<li>pshufb v0 [v1 v2 v3] 0 1 3 2 etc: Execute a pshufb on the given vectors (0x80 not supported). The mask list is looped, so you can specify 16 values with 256-bit vectors and it'll do the same mask to both lanes</li>
<li>vpermq v0 [v1] xyzw|0xe4: Execute a vpermq on the given vectors (only supported in AVX2 mode)</li>
<li>pshufd v0 [v1 v2 v3] xyzw|0xe4: Execute a pshufd on the given vectors</li>
<li>pshuf[lh]w v0 [v1 v2 v3] xyzw|0xe4: Execute a pshuflw / pshufhw on the given vectors</li>
<li>shufps v0 v1 [v1 v0] xxyy|0x50 [zzww|0xfa]: For each vector-length section in memory, execute a shufps on the n*2 and n*2+1 input vector, using the nth input imm value. Both wrap around so you don't need to specify things extra times if it's the same each time</li>
<li>store v0 v1 [v2 v3]: Store to memory. Use if you plan to store out of order</li>
<li>inv: Switch vector with the indices of each element. Use if you want to work backwards instead of forwards (work with Dst IDs instead of Src IDs)</li>
<label for="examplelist">Load an example: </label><select id="examplelist"></select>
<button onclick="loadExample(document.getElementById('examplelist').value)">Load</button>
td.v0 {
background-color: #FDA190;
td.v1 {
background-color: #FDD699;
td.v2 {
background-color: #B8E598;
td.v3 {
background-color: #A5F0EB;
td {
width: 1.5em;
height: 1.5em;
vertical-align: middle;
text-align: center;
th {
min-width: 10em;
#input {
min-height: 10em;
width: 40em;
function store(vsize, vectors, ints, arr) {
let out = [...arr];
for (let i = 0; i < vectors.length; i++) {
let wbase = i * vsize;
let rbase = vectors[i] * vsize;
for (let j = 0; j < vsize; j++) {
out[wbase + j] = arr[rbase + j];
return out;
function makeSwizzle(size) {
return function(vsize, vectors, ints, arr) {
if (vsize * vectors.length !== 64) { throw "Unsupported"; }
let out = [];
for (let i = 0; i < 64; i++) {
let a, b;
let low = i < 32;
let tmp = i % 32;
let low2 = tmp < 16;
if (vsize == 16) {
a = low2 ? vectors[0] : vectors[2];
b = low2 ? vectors[1] : vectors[3];
} else {
a = vectors[0] * 2 + (low2 ? 0 : 1);
b = vectors[1] * 2 + (low2 ? 0 : 1);
let sidx = ((i%16)/size|0);
let src = sidx & 1 ? b : a;
out.push(arr[src*16 + (low ? 0 : 8) + size*(sidx>>1) + (i % size)]);
return store(vsize, vectors, ints, out);
function sw128(vsize, vectors, ints, arr) {
if (vsize !== 32) { throw "Unsupported"; }
let out = [];
for (let part of [0, 2, 1, 3]) {
let src = vectors[part>>1] * 32 + (part & 1 ? 16 : 0);
for (let i = 0; i < 16; i++) {
out.push(arr[src + i]);
return out;
* @param {function(Number[], Number, Number[]): Number} f In-lane transform
function makeInLane(f) {
return function(vsize, vectors, ints, arr) {
let out = [...arr];
for (const vector of vectors) {
for (let i = 0; i < (vsize/16|0); i++) {
let base = vector * vsize + i * 16;
let section = arr.slice(base, base+16);
for (let j = 0; j < 16; j++) {
out[base+j] = f(ints, j, section);
return out;
* @param {function(Number[], Number, Number[]): Number} f Per-vector transform
function makePerVector(f) {
return function(vsize, vectors, ints, arr) {
let out = [...arr];
for (const vector of vectors) {
let base = vector * vsize;
let section = arr.slice(base, base+vsize);
for (let j = 0; j < vsize; j++) {
out[base+j] = f(ints, j, section);
return out;
function makeShuf(shift) {
return function(ints, idx, arr) {
let section = idx>>shift;
let newSection = (ints[0] >> (section*2)) & 3;
return arr[(newSection<<shift) + (idx - (section<<shift))];
function shufps(vsize, vectors, ints, arr) {
let out = [];
for (let i = 0; i < (64/vsize|0); i++) {
let base1 = vectors[i*2 % vectors.length] * vsize;
let base2 = vectors[(i*2+1) % vectors.length] * vsize;
let mask = ints[i % ints.length];
for (let j = 0; j < vsize; j++) {
let src = j & 8 ? base2 : base1;
let section = (j >> 2) & 3;
let newSection = (mask >> (section*2)) & 3;
let idx = j - (section << 2) + (newSection << 2);
out.push(arr[src + idx]);
return out;
function makeBlend(shift) {
return function(vsize, vectors, ints, arr) {
let out = [...arr];
for (let i = 0; i < vectors.length; i += 2) {
let base1 = vectors[i] * vsize;
let base2 = vectors[i+1] * vsize;
for (let j = 0; j < vsize; j++) {
let idx = j >> shift;
let swap = (ints[0] >> (idx & 7)) & 1;
if (swap) {
let tmp = out[base1 + j];
out[base1 + j] = out[base2 + j];
out[base2 + j] = tmp;
return out;
function inv(arr) {
let out = [];
for (let i = 0; i < arr.length; i++) {
return out;
/** @type {Object.<string, function(Number, Number[], Number[], Number[]): Number[]>} */
const OPS = {
"sw8": makeSwizzle(1),
"sw16": makeSwizzle(2),
"sw32": makeSwizzle(4),
"sw64": makeSwizzle(8),
"sw128": sw128,
"pblendw": makeBlend(1),
"pblendd": makeBlend(2),
"pshufb": makePerVector(function(ints, idx, arr) {
let newIDX = ints[idx % ints.length];
if (newIDX >= 16) { throw "bad pshufb index"; }
return arr[newIDX + (idx/16|0)*16];
"vpermq": makePerVector(makeShuf(3)),
"pshufd": makeInLane(makeShuf(2)),
"pshuflw": makeInLane(function(ints, idx, arr) {
return idx < 8 ? makeShuf(1)(ints, idx, arr) : arr[idx];
"pshufhw": makeInLane(function(ints, idx, arr) {
return idx < 8 ? arr[idx] : makeShuf(1)(ints, idx - 8, arr.slice(8));
"shufps": shufps,
"load": function(vsize, vectors, ints, arr) {
let out = [];
for (const vector of vectors) {
let base = vector*vsize;
for (let i = 0; i < vsize; i++) {
return out;
"store": store,
"inv": function(vsize, vectors, ints, arr) {
return inv(arr);
class Executor {
constructor() {
this.vsize = 16;
* @param {string} str String to parse
* @param {Number[]} arr Input vector
execute(str, arr) {
let args = str.split(/[^0-9a-zA-Z]+/);
let vectors = [];
let ints = [];
let op = "";
let desc = "";
const go = () => {
if (op.length === 0) { return; }
if (op === "load") {
this.vsize = vectors.length === 2 ? 32 : 16;
if (desc.length !== 0) { desc += " "; }
desc += op;
for (const vector of vectors) {
desc += " v" + vector;
arr = OPS[op](this.vsize, vectors, ints, arr);
op = "";
vectors = [];
ints = [];
for (const arg of args) {
if (/^v\d+$/.test(arg)) {
} else if (/^\d+$/.test(arg)) {
} else if (/^0x[A-fa-f0-9]+$/.test(arg)) {
ints.push(parseInt(arg.slice(2), 16));
} else if (/^[xyzw]{4}$/.test(arg)) {
let n = 0;
for (let i = 0; i < 4; i++) {
n += ({x: 0, y: 1, z: 2, w: 3}[arg.charAt(i)] << (i*2));
} else {
op = arg.toLowerCase();
return {desc: desc, arr: arr};
function destIDs(str) {
let e = new Executor();
if (str === "") { return [...Array(64).keys()]; }
let results = e.execute(str, [...Array(64).keys()]).arr;
return inv(results);
* @typedef {Object} CreateNodeOptions
* @property {string} [class]
* @property {(string | null)[]} [classes]
* @property {Object.<string, string>} [styles]
* @property {string} [id]
* @property {Node | string} [child]
* @property {(Node | string | null)[]} [children]
* @property {Object.<string, string>} [other] other attributes to add
* Creates a new HTML node
* @param {string} tag
* @param {CreateNodeOptions} [options]
* @returns {HTMLElement}
function createNode(tag, options) {
let node = document.createElement(tag);
options = options || {};
let classes = options.classes || [];
if (options.class) { classes.push(options.class); }
classes.forEach(function(cls) { if (cls) { node.classList.add(cls); } });
let children = options.children || [];
if (options.child) { children.push(options.child); }
children.forEach(function(child) {
if (typeof child === "string") {
else if (child) {
if ( { =;
if (options.styles) {
for (let name in options.styles) {
if (!, name)) { continue; }
if (!options.styles[name]) { continue; }, options.styles[name]);
let other = options.other || {};
for (let name in other) {
if (!, name)) { continue; }
if (!other[name]) { continue; }
node.setAttribute(name, other[name]);
return node;
function makeRow(head, arr) {
let row = createNode("tr", {child: createNode("th", {child: head})});
for (const item of arr) {
let el = document.createElement("td");
el.classList.add("v" + (item>>4));
return row;
function makeTable(title, str, arr) {
let e = new Executor();
let head = createNode("th", {child: title, other: {colspan: arr.length + 1}});
let table = createNode("table", {child: createNode("thead", {child: createNode("tr", {child: head})})});
let body = createNode("tbody");
for (const line of str.split("\n")) {
if (line === "") { continue; }
let res = e.execute(line, arr);
body.appendChild(makeRow(res.desc, res.arr));
arr = res.arr;
return table;
function refresh() {
let input = document.getElementById("input").value;
try {
let srcID = makeTable("Src ID", input, [...Array(64).keys()]);
let dstID = makeTable("Dst ID", input, destIDs(input));
document.getElementById("SrcID").innerHTML = srcID.innerHTML;
document.getElementById("DstID").innerHTML = dstID.innerHTML;
} catch {}
const EXAMPLES = {
"SSE2 ReadColumn16":
"load v0 v1 v2 v3\n" +
"sw16 v0 v1 v2 v3\n" +
"sw32 v0 v1 v2 v3\n" +
"sw16 v0 v2 v1 v3\n",
"SSE3 ReadColumn16":
"load v0 v1 v2 v3\n" +
"pshufb v0 v1 v2 v3 0 1 4 5 8 9 12 13 2 3 6 7 10 11 14 15\n" +
"sw32 v0 v1 v2 v3\n" +
"sw64 v0 v1 v2 v3",
"AVX2 ReadColumn16":
"load v0 v1\n" +
"pshufb v0 v1 0 1 4 5 8 9 12 13 2 3 6 7 10 11 14 15\n" +
"vpermq v0 v1 xzyw\n" +
"shufps v0 v1 xzxz ywyw\n"
for (const key of Object.keys(EXAMPLES)) {
let node = createNode("option", {child: key, other: {value: key}});
function loadExample(name) {
if (EXAMPLES[name]) {
document.getElementById("input").value = EXAMPLES[name];
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment