Skip to content

Instantly share code, notes, and snippets.

@TimothyGu
Last active August 26, 2015 02:53
Show Gist options
  • Star 0 You must be signed in to star a gist
  • Fork 0 You must be signed in to fork a gist
  • Save TimothyGu/5063105e0b1bc5cf18e8 to your computer and use it in GitHub Desktop.
Save TimothyGu/5063105e0b1bc5cf18e8 to your computer and use it in GitHub Desktop.
;******************************************************************************
;* SIMD-optimized HuffYUV encoding functions
;* Copyright (c) 2000, 2001 Fabrice Bellard
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at>
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru>
;* Ported to NASM syntax by Tiancheng "Timothy" Gu <timothygu99@gmail.com>
;*
;* This file is part of FFmpeg.
;*
;* FFmpeg is free software; you can redistribute it and/or
;* modify it under the terms of the GNU Lesser General Public
;* License as published by the Free Software Foundation; either
;* version 2.1 of the License, or (at your option) any later version.
;*
;* FFmpeg is distributed in the hope that it will be useful,
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
;* Lesser General Public License for more details.
;*
;* You should have received a copy of the GNU Lesser General Public
;* License along with FFmpeg; if not, write to the Free Software
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
;*
%include "libavutil/x86/x86util.asm"
SECTION .text
; void ff_sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1,
; const uint8_t *src2, int w,
; int *left, int *left_top)
; TODO: sse2?
INIT_MMX mmxext
cglobal sub_hfyu_median_pred, 6,7,0, dst, src1, src2, w, left, left_top, i
; FIXME: move w to ptrdiff_t
movsxdifnidn w, wd
movu m0, [src1] ; LT
LSHIFT m0, 8
.loop:
movu m1, [src1 + i] ; T
movu m2, [src2 + i - 1] ; L
movu m3, [src2 + i] ; X
mova m4, m2 ; L
psubb m2, m0 ; LT - L
paddb m2, m1 ; L + T - LT
mova m5, m4 ; L
pmaxub m4, m1 ; max(T, L)
pminub m1, m5 ; min(T, L)
pminub m4, m2 ; min(max(T, L), L + T - LT)
pmaxub m4, m1 ; pred = max(min(max(T, L), L + T - LT), min(T, L))
psubb m3, m4 ; dst - pred
movu [dst + i], m3
add i, 8
movu m0, [src1 + i - 1]
cmp i, w
jb .loop
movu
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment