Skip to content

Instantly share code, notes, and snippets.

@chikuzen
Created March 18, 2017 19:41
Show Gist options
  • Save chikuzen/77e70f8431f59bc9192987cfe040df9c to your computer and use it in GitHub Desktop.
Save chikuzen/77e70f8431f59bc9192987cfe040df9c to your computer and use it in GitHub Desktop.
#include <algorithm>
#include <tmmintrin.h>
#define WIN32_LEAN_AND_MEAN
#define NOMINMAX
#include <windows.h>
#include <avisynth.h>
typedef IScriptEnvironment ise_t;
static void convert_c(
const BYTE* srcp, BYTE* dstp, int width, int height, int spitch,
int dpitch) noexcept
{
for (int y = 0; y < height; ++y) {
int* d = reinterpret_cast<int*>(dstp);
for (int x = 0; x < width; ++x) {
int x4 = 4 * x;
d[x] = srcp[x4 +0] + srcp[x4 + 1] + srcp[x4 + 2];
}
srcp += spitch;
dstp += dpitch;
}
}
static void convert_sse2(
const BYTE* srcp, BYTE* dstp, int width, int height, int spitch,
int dpitch) noexcept
{
const __m128i m0 = _mm_set1_epi32(0x00FFFFFF);
const __m128i m1 = _mm_set1_epi32(0x00FF00FF);
const __m128i m2 = _mm_set1_epi16(1);
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x += 4) {
__m128i s0 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + 4 * x));
s0 = _mm_and_si128(s0, m0);
__m128i s1 = _mm_srli_si128(s0, 1);
s0 = _mm_and_si128(s0, m1);
s1 = _mm_and_si128(s1, m1);
s0 = _mm_add_epi16(s0, s1);
s0 = _mm_madd_epi16(s0, m2);
_mm_store_si128(reinterpret_cast<__m128i*>(dstp + 4 * x), s0);
}
srcp += spitch;
dstp += dpitch;
}
}
static void convert_ssse3(
const BYTE* srcp, BYTE* dstp, int width, int height, int spitch,
int dpitch) noexcept
{
const __m128i m0 = _mm_set1_epi32(0x00FFFFFF);
const __m128i m1 = _mm_set1_epi32(0x00FF00FF);
const __m128i zero = _mm_setzero_si128();
for (int y = 0; y < height; ++y) {
for (int x = 0; x < width; x += 8) {
__m128i s0 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + 4 * x));
__m128i s1 = _mm_load_si128(reinterpret_cast<const __m128i*>(srcp + 4 * x + 16));
s0 = _mm_and_si128(s0, m0);
s1 = _mm_and_si128(s1, m0);
__m128i t0 = _mm_srli_si128(s0, 1);
__m128i t1 = _mm_srli_si128(s1, 1);
s0 = _mm_and_si128(s0, m1);
s1 = _mm_and_si128(s1, m1);
t0 = _mm_and_si128(t0, m1);
t1 = _mm_and_si128(t1, m1);
s0 = _mm_add_epi16(s0, t0);
s1 = _mm_add_epi16(s1, t1);
s0 = _mm_hadd_epi16(s0, s1);
t0 = _mm_unpacklo_epi16(s0, zero);
t1 = _mm_unpackhi_epi16(s0, zero);
_mm_store_si128(reinterpret_cast<__m128i*>(dstp + 4 * x), t0);
_mm_store_si128(reinterpret_cast<__m128i*>(dstp + 4 * x + 16), t1);
}
srcp += spitch;
dstp += dpitch;
}
}
class MyGrey: public GenericVideoFilter {
const int type;
public:
MyGrey(PClip c, const int t): GenericVideoFilter(c), type(t)
{
vi.pixel_type = VideoInfo::CS_Y8;
vi.width *= 4;
vi.width = (vi.width + 31) & ~31;
}
~MyGrey() {}
PVideoFrame __stdcall GetFrame(int n, ise_t* env)
{
auto src = child->GetFrame(n, env);
auto dst = env->NewVideoFrame(vi);
const BYTE* srcp = src->GetReadPtr();
BYTE* dstp = dst->GetWritePtr();
int spitch = src->GetPitch();
int dpitch = dst->GetPitch();
int width = vi.width / 4;
switch (type) {
case 0:
convert_c(srcp, dstp, width, vi.height, spitch, dpitch);
break;
case 1:
convert_sse2(srcp, dstp, width, vi.height, spitch, dpitch);
break;
default:
convert_ssse3(srcp, dstp, width, vi.height, spitch, dpitch);
}
return dst;
}
static AVSValue __cdecl create(AVSValue args, void*, ise_t* env)
{
PClip clip = args[0].AsClip();
const VideoInfo& vi = clip->GetVideoInfo();
if (!vi.IsRGB32()) {
env->ThrowError("not RGBA clip.");
}
int type = std::min(std::max(args[1].AsInt(0), 0), 2);
return new MyGrey(clip, type);
}
};
const AVS_Linkage* AVS_linkage = nullptr;
extern "C" __declspec(dllexport) const char* __stdcall
AvisynthPluginInit3(ise_t* env, const AVS_Linkage* const vectors)
{
AVS_linkage = vectors;
env->AddFunction("MyGrey", "c[type]i", MyGrey::create, nullptr);
return nullptr;
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment