forked from KolibriOS/kolibrios
179 lines
4.1 KiB
NASM
179 lines
4.1 KiB
NASM
|
;*****************************************************************************
|
||
|
;* x86-optimized functions for pullup filter
|
||
|
;*
|
||
|
;* This file is part of FFmpeg.
|
||
|
;*
|
||
|
;* FFmpeg is free software; you can redistribute it and/or modify
|
||
|
;* it under the terms of the GNU General Public License as published by
|
||
|
;* the Free Software Foundation; either version 2 of the License, or
|
||
|
;* (at your option) any later version.
|
||
|
;*
|
||
|
;* FFmpeg is distributed in the hope that it will be useful,
|
||
|
;* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
;* GNU General Public License for more details.
|
||
|
;*
|
||
|
;* You should have received a copy of the GNU General Public License along
|
||
|
;* with FFmpeg; if not, write to the Free Software Foundation, Inc.,
|
||
|
;* 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||
|
;******************************************************************************
|
||
|
|
||
|
%include "libavutil/x86/x86util.asm"
|
||
|
|
||
|
SECTION_TEXT
|
||
|
|
||
|
INIT_MMX mmx
|
||
|
cglobal pullup_filter_diff, 3, 5, 8, first, second, size
|
||
|
mov r3, 4
|
||
|
pxor m4, m4
|
||
|
pxor m7, m7
|
||
|
|
||
|
.loop:
|
||
|
movq m0, [firstq]
|
||
|
movq m2, [firstq]
|
||
|
add firstq, sizeq
|
||
|
movq m1, [secondq]
|
||
|
add secondq, sizeq
|
||
|
psubusb m2, m1
|
||
|
psubusb m1, m0
|
||
|
movq m0, m2
|
||
|
movq m3, m1
|
||
|
punpcklbw m0, m7
|
||
|
punpcklbw m1, m7
|
||
|
punpckhbw m2, m7
|
||
|
punpckhbw m3, m7
|
||
|
paddw m4, m0
|
||
|
paddw m4, m1
|
||
|
paddw m4, m2
|
||
|
paddw m4, m3
|
||
|
|
||
|
dec r3
|
||
|
jnz .loop
|
||
|
|
||
|
movq m3, m4
|
||
|
punpcklwd m4, m7
|
||
|
punpckhwd m3, m7
|
||
|
paddd m3, m4
|
||
|
movd eax, m3
|
||
|
psrlq m3, 32
|
||
|
movd r4, m3
|
||
|
add eax, r4d
|
||
|
RET
|
||
|
|
||
|
INIT_MMX mmx
|
||
|
cglobal pullup_filter_comb, 3, 5, 8, first, second, size
|
||
|
mov r3, 4
|
||
|
pxor m6, m6
|
||
|
pxor m7, m7
|
||
|
sub secondq, sizeq
|
||
|
|
||
|
.loop:
|
||
|
movq m0, [secondq]
|
||
|
movq m1, [secondq]
|
||
|
punpcklbw m0, m7
|
||
|
movq m2, [secondq+sizeq]
|
||
|
punpcklbw m1, m7
|
||
|
punpcklbw m2, m7
|
||
|
paddw m0, m0
|
||
|
paddw m1, m2
|
||
|
movq m2, m0
|
||
|
psubusw m0, m1
|
||
|
psubusw m1, m2
|
||
|
paddw m6, m0
|
||
|
paddw m6, m1
|
||
|
|
||
|
movq m0, [firstq]
|
||
|
movq m1, [secondq]
|
||
|
punpckhbw m0, m7
|
||
|
movq m2, [secondq+sizeq]
|
||
|
punpckhbw m1, m7
|
||
|
punpckhbw m2, m7
|
||
|
paddw m0, m0
|
||
|
paddw m1, m2
|
||
|
movq m2, m0
|
||
|
psubusw m0, m1
|
||
|
psubusw m1, m2
|
||
|
paddw m6, m0
|
||
|
paddw m6, m1
|
||
|
|
||
|
movq m0, [secondq+sizeq]
|
||
|
movq m1, [firstq]
|
||
|
punpcklbw m0, m7
|
||
|
movq m2, [firstq+sizeq]
|
||
|
punpcklbw m1, m7
|
||
|
punpcklbw m2, m7
|
||
|
paddw m0, m0
|
||
|
paddw m1, m2
|
||
|
movq m2, m0
|
||
|
psubusw m0, m1
|
||
|
psubusw m1, m2
|
||
|
paddw m6, m0
|
||
|
paddw m6, m1
|
||
|
|
||
|
movq m0, [secondq+sizeq]
|
||
|
movq m1, [firstq]
|
||
|
punpckhbw m0, m7
|
||
|
movq m2, [firstq+sizeq]
|
||
|
punpckhbw m1, m7
|
||
|
punpckhbw m2, m7
|
||
|
paddw m0, m0
|
||
|
paddw m1, m2
|
||
|
movq m2, m0
|
||
|
psubusw m0, m1
|
||
|
psubusw m1, m2
|
||
|
paddw m6, m0
|
||
|
paddw m6, m1
|
||
|
|
||
|
add firstq, sizeq
|
||
|
add secondq, sizeq
|
||
|
dec r3
|
||
|
jnz .loop
|
||
|
|
||
|
movq m5, m6
|
||
|
punpcklwd m6, m7
|
||
|
punpckhwd m5, m7
|
||
|
paddd m5, m6
|
||
|
movd eax, m5
|
||
|
psrlq m5, 32
|
||
|
movd r4, m5
|
||
|
add eax, r4d
|
||
|
RET
|
||
|
|
||
|
INIT_MMX mmx
|
||
|
cglobal pullup_filter_var, 3, 5, 8, first, second, size
|
||
|
mov r3, 3
|
||
|
pxor m4, m4
|
||
|
pxor m7, m7
|
||
|
|
||
|
.loop:
|
||
|
movq m0, [firstq]
|
||
|
movq m2, [firstq]
|
||
|
movq m1, [firstq+sizeq]
|
||
|
add firstq, sizeq
|
||
|
psubusb m2, m1
|
||
|
psubusb m1, m0
|
||
|
movq m0, m2
|
||
|
movq m3, m1
|
||
|
punpcklbw m0, m7
|
||
|
punpcklbw m1, m7
|
||
|
punpckhbw m2, m7
|
||
|
punpckhbw m3, m7
|
||
|
paddw m4, m0
|
||
|
paddw m4, m1
|
||
|
paddw m4, m2
|
||
|
paddw m4, m3
|
||
|
|
||
|
dec r3
|
||
|
jnz .loop
|
||
|
|
||
|
movq m3, m4
|
||
|
punpcklwd m4, m7
|
||
|
punpckhwd m3, m7
|
||
|
paddd m3, m4
|
||
|
movd eax, m3
|
||
|
psrlq m3, 32
|
||
|
movd r4, m3
|
||
|
add eax, r4d
|
||
|
shl eax, 2
|
||
|
RET
|