Skip to content

Instantly share code, notes, and snippets.

@Hackerpilot
Last active October 12, 2015 23:31
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Hackerpilot/eabb136a840a67b6fb27 to your computer and use it in GitHub Desktop.
Save Hackerpilot/eabb136a840a67b6fb27 to your computer and use it in GitHub Desktop.
Sort a 128-bit SIMD register with ubyte contents
module vector_sort;
private enum byteSortCode = `asm
{
mov R8, p;
movdqu XMM0, [R8];
mov R9, 0x0607040502030001;
movq XMM14, R9;
mov R9, 0x0e0f0c0d0a0b0809;
movq XMM15, R9;
movlhps XMM14, XMM15;
movdqa XMM3, XMM14;
mov R10, 0x00ff00ff00ff00ff;
movq XMM4, R10;
movddup XMM4, XMM4;
mov R11, 0xff00ff00ff00ff00;
movq XMM5, R11;
movddup XMM5, XMM5;
mov R12, 0x0805060304010200;
movq XMM12, R12;
mov R12, 0x0f0d0e0b0c090a07;
movq XMM13, R12;
movlhps XMM12, XMM13;
movdqa XMM6, XMM12;
mov R13, 0xff00ff00ff00ffff;
movq XMM10, R13;
mov R13, 0xff00ff00ff00ff00;
movq XMM11, R13;
movlhps XMM10, XMM11;
movdqa XMM7, XMM10;
mov R14, 0x00ff00ff00ff0000;
movq XMM14, R14;
mov R14, 0x00ff00ff00ff00ff;
movq XMM15, R14;
movlhps XMM14, XMM15;
movdqa XMM8, XMM14;
xor RCX, RCX;
dec RCX;
begin:
inc RCX;
movdqa XMM1, XMM0;
pshufb XMM1, XMM3;
movdqa XMM2, XMM1;
%1$s XMM1, XMM0;
%2$s XMM2, XMM0;
pand XMM1, XMM4;
pand XMM2, XMM5;
por XMM1, XMM2;
movdqa XMM0, XMM1;
pshufb XMM1, XMM6;
movdqa XMM2, XMM1;
%1$s XMM1, XMM0;
%2$s XMM2, XMM0;
pand XMM1, XMM7;
pand XMM2, XMM8;
por XMM1, XMM2;
movdqa XMM0, XMM1;
cmp RCX, 8;
jle begin;
movdqu [R8], XMM0;
}`;
/**
* Sorts a 16-byte array.
*
* Params:
* v = the array to sort
*
* Returns:
* v in least-to-greatest order
*/
ubyte[16] vectorSort(ubyte[16] v)
{
import std.format : format;
size_t p = cast(size_t) v.ptr;
mixin(byteSortCode.format("pminub", "pmaxub"));
return v;
}
/**
* Sorts a 16-byte array.
*
* Params:
* v = the array to sort
*
* Returns:
* v in least-to-greatest order
*/
byte[16] vectorSort(byte[16] v)
{
import std.format : format;
size_t p = cast(size_t) v.ptr;
mixin(byteSortCode.format("pminsb", "pmaxsb"));
return v;
}
void main()
{
import std.stdio : writeln;
import std.algorithm : isSorted;
import std.random : uniform;
ubyte[16] vec1 = [2, 9, 6, 7, 10, 1, 15, 16, 14, 13, 3, 8, 4, 5, 11, 12];
writeln(vectorSort(vec1));
foreach (i; 0 .. 10_000)
{
foreach (ref n; vec1[])
n = cast(ubyte) uniform(0, ubyte.max);
assert(isSorted(vectorSort(vec1)[]));
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment