Last active
August 26, 2015 02:53
-
-
Save TimothyGu/5063105e0b1bc5cf18e8 to your computer and use it in GitHub Desktop.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
;****************************************************************************** | |
;* SIMD-optimized HuffYUV encoding functions | |
;* Copyright (c) 2000, 2001 Fabrice Bellard | |
;* Copyright (c) 2002-2004 Michael Niedermayer <michaelni@gmx.at> | |
;* MMX optimization by Nick Kurshev <nickols_k@mail.ru> | |
;* Ported to NASM syntax by Tiancheng "Timothy" Gu <timothygu99@gmail.com> | |
;* | |
;* This file is part of FFmpeg. | |
;* | |
;* FFmpeg is free software; you can redistribute it and/or | |
;* modify it under the terms of the GNU Lesser General Public | |
;* License as published by the Free Software Foundation; either | |
;* version 2.1 of the License, or (at your option) any later version. | |
;* | |
;* FFmpeg is distributed in the hope that it will be useful, | |
;* but WITHOUT ANY WARRANTY; without even the implied warranty of | |
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | |
;* Lesser General Public License for more details. | |
;* | |
;* You should have received a copy of the GNU Lesser General Public | |
;* License along with FFmpeg; if not, write to the Free Software | |
;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA | |
;* | |
%include "libavutil/x86/x86util.asm" | |
SECTION .text | |
; void ff_sub_hfyu_median_pred_mmxext(uint8_t *dst, const uint8_t *src1, | |
; const uint8_t *src2, int w, | |
; int *left, int *left_top) | |
; TODO: sse2? | |
INIT_MMX mmxext | |
cglobal sub_hfyu_median_pred, 6,7,0, dst, src1, src2, w, left, left_top, i | |
; FIXME: move w to ptrdiff_t | |
movsxdifnidn w, wd | |
movu m0, [src1] ; LT | |
LSHIFT m0, 8 | |
.loop: | |
movu m1, [src1 + i] ; T | |
movu m2, [src2 + i - 1] ; L | |
movu m3, [src2 + i] ; X | |
mova m4, m2 ; L | |
psubb m2, m0 ; LT - L | |
paddb m2, m1 ; L + T - LT | |
mova m5, m4 ; L | |
pmaxub m4, m1 ; max(T, L) | |
pminub m1, m5 ; min(T, L) | |
pminub m4, m2 ; min(max(T, L), L + T - LT) | |
pmaxub m4, m1 ; pred = max(min(max(T, L), L + T - LT), min(T, L)) | |
psubb m3, m4 ; dst - pred | |
movu [dst + i], m3 | |
add i, 8 | |
movu m0, [src1 + i - 1] | |
cmp i, w | |
jb .loop | |
movu |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment