Skip to content

Instantly share code, notes, and snippets.

@Const-me
Created November 23, 2019 00:42
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save Const-me/0f0c283a0b998aa9977550d85fa33958 to your computer and use it in GitHub Desktop.
Save Const-me/0f0c283a0b998aa9977550d85fa33958 to your computer and use it in GitHub Desktop.
using System;
using System.Diagnostics;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
namespace SimdBrightness
{
static class Program
{
/// <summary>Load 4 pixels of RGB</summary>
static unsafe Vector128<int> load4( byte* src )
{
return Sse2.LoadVector128( (int*)src );
}
/// <summary>Pack red channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary>
static Vector128<ushort> packRed( Vector128<int> a, Vector128<int> b, Vector128<int> mask )
{
a = Sse2.And( a, mask );
b = Sse2.And( b, mask );
return Sse2.ShiftLeftLogical128BitLane( Sse41.PackUnsignedSaturate( a, b ), 1 );
}
/// <summary>Pack green channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary>
static Vector128<ushort> packGreen( Vector128<int> a, Vector128<int> b, Vector128<int> mask )
{
a = Sse2.And( a, mask );
b = Sse2.And( b, mask );
return Sse41.PackUnsignedSaturate( a, b );
}
/// <summary>Pack blue channel of 8 pixels into ushort values in [ 0xFF00 .. 0 ] interval</summary>
static Vector128<ushort> packBlue( Vector128<int> a, Vector128<int> b, Vector128<int> mask )
{
a = Sse2.ShiftRightLogical128BitLane( a, 1 );
b = Sse2.ShiftRightLogical128BitLane( b, 1 );
a = Sse2.And( a, mask );
b = Sse2.And( b, mask );
return Sse41.PackUnsignedSaturate( a, b );
}
/// <summary>Split 8 RGBA pixels into RGB channels, 16 bit / channel.</summary>
static void packRgb( Vector128<int> a, Vector128<int> b, out Vector128<ushort> red, out Vector128<ushort> green, out Vector128<ushort> blue, Vector128<int> lowByte, Vector128<int> secondByte )
{
red = packRed( a, b, lowByte );
green = packGreen( a, b, secondByte );
blue = packBlue( a, b, secondByte );
}
/// <summary>Compute brightness of 8 pixels</summary>
static Vector128<short> brightness( Vector128<ushort> r, Vector128<ushort> g, Vector128<ushort> b, Vector128<ushort> redMul, Vector128<ushort> greenMul, Vector128<ushort> blueMul )
{
r = Sse2.MultiplyHigh( r, redMul );
g = Sse2.MultiplyHigh( g, greenMul );
b = Sse2.MultiplyHigh( b, blueMul );
var result = Sse2.AddSaturate( Sse2.AddSaturate( r, g ), b );
return Vector128.AsInt16( Sse2.ShiftRightLogical( result, 8 ) );
}
const ushort mulRed = (ushort)( 0.29891 * 0x10000 );
const ushort mulGreen = (ushort)( 0.58661 * 0x10000 );
const ushort mulBlue = (ushort)( 0.11448 * 0x10000 );
/// <summary>Convert buffer from RGBA to grayscale.</summary>
/// <remarks>
/// <para>If your image has line paddings, you'll want to call this once per line, not for the complete image.</para>
/// <para>If width of the image is not multiple of 16 pixels, you'll need to do more work to handle the last few pixels of every line.</para>
/// </remarks>
static unsafe void convertToGrayscale( byte* src, byte* dst, long count )
{
var lowByte = Vector128.Create( 0xFF );
var secondByte = Vector128.Create( 0xFF00 );
var redMul = Vector128.Create( mulRed );
var greenMul = Vector128.Create( mulGreen );
var blueMul = Vector128.Create( mulBlue );
byte* srcEnd = src + count * 4;
while( src < srcEnd )
{
var p1 = load4( src );
var p2 = load4( src + 16 );
var p3 = load4( src + 32 );
var p4 = load4( src + 48 );
packRgb( p1, p2, out var r, out var g, out var b, lowByte, secondByte );
var low = brightness( r, g, b, redMul, greenMul, blueMul );
packRgb( p3, p4, out r, out g, out b, lowByte, secondByte );
var hi = brightness( r, g, b, redMul, greenMul, blueMul );
var bytes = Sse2.PackUnsignedSaturate( low, hi );
Sse2.Store( dst, bytes );
src += 64;
dst += 16;
}
}
const long count = 1024 * 1024 * 511;
static unsafe void Main( string[] args )
{
byte[] source = new byte[ 4 * count ];
new Random( 11 ).NextBytes( source );
byte[] dest = new byte[ count ];
Stopwatch sw1, sw2, sw3;
fixed( byte* pSource = source )
fixed( byte* pDest = dest )
{
// Running the same code 3 times, on the same data.
sw1 = Stopwatch.StartNew();
convertToGrayscale( pSource, pDest, count );
sw1.Stop();
sw2 = Stopwatch.StartNew();
convertToGrayscale( pSource, pDest, count );
sw2.Stop();
sw3 = Stopwatch.StartNew();
convertToGrayscale( pSource, pDest, count );
sw3.Stop();
}
Console.WriteLine( "#1 {0} ms, #2 {1} ms, #3 {2} ms",
sw1.Elapsed.TotalMilliseconds, sw2.Elapsed.TotalMilliseconds, sw3.Elapsed.TotalMilliseconds );
}
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment