forked from KolibriOS/kolibrios
libmpg123 1.9.0
git-svn-id: svn://kolibrios.org@1905 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
246
programs/develop/libraries/libmpg123/synth_sse3d.h
Normal file
246
programs/develop/libraries/libmpg123/synth_sse3d.h
Normal file
@@ -0,0 +1,246 @@
|
||||
/*
|
||||
decode_sse3d: Synth for SSE and extended 3DNow (yeah, the name is a relic)
|
||||
|
||||
copyright 2006-2007 by Zuxy Meng/the mpg123 project - free software under the terms of the LGPL 2.1
|
||||
see COPYING and AUTHORS files in distribution or http://mpg123.org
|
||||
initially written by the mysterious higway for MMX (apparently)
|
||||
then developed into SSE opt by Zuxy Meng, also building on Romain Dolbeau's AltiVec
|
||||
Both have agreed to distribution under LGPL 2.1 .
|
||||
|
||||
Transformed back into standalone asm, with help of
|
||||
gcc -S -DHAVE_CONFIG_H -I. -march=pentium -O3 -Wall -pedantic -fno-strict-aliasing -DREAL_IS_FLOAT -c -o decode_mmxsse.{S,c}
|
||||
|
||||
The difference between SSE and 3DNowExt is the dct64 function and the synth function name.
|
||||
This template here uses the SYNTH_NAME and MPL_DCT64 macros for this - see decode_sse.S and decode_3dnowext.S...
|
||||
That's not memory efficient since there's doubled code, but it's easier than giving another function pointer.
|
||||
Maybe I'll change it in future, but now I need something that works.
|
||||
|
||||
Original comment from MPlayer source follows:
|
||||
*/
|
||||
|
||||
/*
|
||||
* this code comes under GPL
|
||||
* This code was taken from http://www.mpg123.org
|
||||
* See ChangeLog of mpg123-0.59s-pre.1 for detail
|
||||
* Applied to mplayer by Nick Kurshev <nickols_k@mail.ru>
|
||||
*
|
||||
* Local ChangeLog:
|
||||
* - Partial loops unrolling and removing MOVW insn from loops
|
||||
*/
|
||||
|
||||
#include "mangle.h"
|
||||
|
||||
.data
|
||||
ALIGN8
|
||||
one_null:
|
||||
.long -65536
|
||||
.long -65536
|
||||
ALIGN8
|
||||
null_one:
|
||||
.long 65535
|
||||
.long 65535
|
||||
|
||||
.text
|
||||
ALIGN16,,15
|
||||
/* void SYNTH_NAME(real *bandPtr, int channel, short *samples, short *buffs, int *bo, float *decwins) */
|
||||
.globl SYNTH_NAME
|
||||
SYNTH_NAME:
|
||||
pushl %ebp
|
||||
/* stack:0=ebp 4=back 8=bandptr 12=channel 16=samples 20=buffs 24=bo 28=decwins */
|
||||
movl %esp, %ebp
|
||||
/* Now the old stack addresses are preserved via %epb. */
|
||||
subl $4,%esp /* What has been called temp before. */
|
||||
pushl %edi
|
||||
pushl %esi
|
||||
pushl %ebx
|
||||
#define TEMP 12(%esp)
|
||||
/* APP */
|
||||
movl 12(%ebp),%ecx
|
||||
movl 16(%ebp),%edi
|
||||
movl $15,%ebx
|
||||
movl 24(%ebp),%edx
|
||||
leal (%edi,%ecx,2),%edi
|
||||
decl %ecx
|
||||
movl 20(%ebp),%esi
|
||||
movl (%edx),%eax
|
||||
jecxz .L01
|
||||
decl %eax
|
||||
andl %ebx,%eax
|
||||
leal 1088(%esi),%esi
|
||||
movl %eax,(%edx)
|
||||
.L01:
|
||||
leal (%esi,%eax,2),%edx
|
||||
movl %eax,TEMP
|
||||
incl %eax
|
||||
andl %ebx,%eax
|
||||
leal 544(%esi,%eax,2),%ecx
|
||||
incl %ebx
|
||||
testl $1, %eax
|
||||
jnz .L02
|
||||
xchgl %edx,%ecx
|
||||
incl TEMP
|
||||
leal 544(%esi),%esi
|
||||
.L02:
|
||||
pushl 8(%ebp)
|
||||
pushl %edx
|
||||
pushl %ecx
|
||||
call MPL_DCT64
|
||||
addl $12, %esp
|
||||
leal 1(%ebx), %ecx
|
||||
subl TEMP,%ebx
|
||||
pushl %ecx
|
||||
/* leal ASM_NAME(decwins)(%ebx,%ebx,1), %edx */
|
||||
movl 28(%ebp),%ecx
|
||||
leal (%ecx,%ebx,2), %edx
|
||||
movl (%esp),%ecx /* restore, but leave value on stack */
|
||||
shrl $1, %ecx
|
||||
ALIGN16
|
||||
.L03:
|
||||
movq (%edx),%mm0
|
||||
movq 64(%edx),%mm4
|
||||
pmaddwd (%esi),%mm0
|
||||
pmaddwd 32(%esi),%mm4
|
||||
movq 8(%edx),%mm1
|
||||
movq 72(%edx),%mm5
|
||||
pmaddwd 8(%esi),%mm1
|
||||
pmaddwd 40(%esi),%mm5
|
||||
movq 16(%edx),%mm2
|
||||
movq 80(%edx),%mm6
|
||||
pmaddwd 16(%esi),%mm2
|
||||
pmaddwd 48(%esi),%mm6
|
||||
movq 24(%edx),%mm3
|
||||
movq 88(%edx),%mm7
|
||||
pmaddwd 24(%esi),%mm3
|
||||
pmaddwd 56(%esi),%mm7
|
||||
paddd %mm1,%mm0
|
||||
paddd %mm5,%mm4
|
||||
paddd %mm2,%mm0
|
||||
paddd %mm6,%mm4
|
||||
paddd %mm3,%mm0
|
||||
paddd %mm7,%mm4
|
||||
movq %mm0,%mm1
|
||||
movq %mm4,%mm5
|
||||
psrlq $32,%mm1
|
||||
psrlq $32,%mm5
|
||||
paddd %mm1,%mm0
|
||||
paddd %mm5,%mm4
|
||||
psrad $13,%mm0
|
||||
psrad $13,%mm4
|
||||
packssdw %mm0,%mm0
|
||||
packssdw %mm4,%mm4
|
||||
movq (%edi), %mm1
|
||||
punpckldq %mm4, %mm0
|
||||
pand one_null, %mm1
|
||||
pand null_one, %mm0
|
||||
por %mm0, %mm1
|
||||
movq %mm1,(%edi)
|
||||
leal 64(%esi),%esi
|
||||
leal 128(%edx),%edx
|
||||
leal 8(%edi),%edi
|
||||
decl %ecx
|
||||
jnz .L03
|
||||
popl %ecx
|
||||
andl $1, %ecx
|
||||
jecxz .next_loop
|
||||
movq (%edx),%mm0
|
||||
pmaddwd (%esi),%mm0
|
||||
movq 8(%edx),%mm1
|
||||
pmaddwd 8(%esi),%mm1
|
||||
movq 16(%edx),%mm2
|
||||
pmaddwd 16(%esi),%mm2
|
||||
movq 24(%edx),%mm3
|
||||
pmaddwd 24(%esi),%mm3
|
||||
paddd %mm1,%mm0
|
||||
paddd %mm2,%mm0
|
||||
paddd %mm3,%mm0
|
||||
movq %mm0,%mm1
|
||||
psrlq $32,%mm1
|
||||
paddd %mm1,%mm0
|
||||
psrad $13,%mm0
|
||||
packssdw %mm0,%mm0
|
||||
movd %mm0,%eax
|
||||
movw %ax, (%edi)
|
||||
leal 32(%esi),%esi
|
||||
leal 64(%edx),%edx
|
||||
leal 4(%edi),%edi
|
||||
.next_loop:
|
||||
subl $64,%esi
|
||||
movl $7,%ecx
|
||||
ALIGN16
|
||||
.L04:
|
||||
movq (%edx),%mm0
|
||||
movq 64(%edx),%mm4
|
||||
pmaddwd (%esi),%mm0
|
||||
pmaddwd -32(%esi),%mm4
|
||||
movq 8(%edx),%mm1
|
||||
movq 72(%edx),%mm5
|
||||
pmaddwd 8(%esi),%mm1
|
||||
pmaddwd -24(%esi),%mm5
|
||||
movq 16(%edx),%mm2
|
||||
movq 80(%edx),%mm6
|
||||
pmaddwd 16(%esi),%mm2
|
||||
pmaddwd -16(%esi),%mm6
|
||||
movq 24(%edx),%mm3
|
||||
movq 88(%edx),%mm7
|
||||
pmaddwd 24(%esi),%mm3
|
||||
pmaddwd -8(%esi),%mm7
|
||||
paddd %mm1,%mm0
|
||||
paddd %mm5,%mm4
|
||||
paddd %mm2,%mm0
|
||||
paddd %mm6,%mm4
|
||||
paddd %mm3,%mm0
|
||||
paddd %mm7,%mm4
|
||||
movq %mm0,%mm1
|
||||
movq %mm4,%mm5
|
||||
psrlq $32,%mm1
|
||||
psrlq $32,%mm5
|
||||
paddd %mm0,%mm1
|
||||
paddd %mm4,%mm5
|
||||
psrad $13,%mm1
|
||||
psrad $13,%mm5
|
||||
packssdw %mm1,%mm1
|
||||
packssdw %mm5,%mm5
|
||||
psubd %mm0,%mm0
|
||||
psubd %mm4,%mm4
|
||||
psubsw %mm1,%mm0
|
||||
psubsw %mm5,%mm4
|
||||
movq (%edi), %mm1
|
||||
punpckldq %mm4, %mm0
|
||||
pand one_null, %mm1
|
||||
pand null_one, %mm0
|
||||
por %mm0, %mm1
|
||||
movq %mm1,(%edi)
|
||||
subl $64,%esi
|
||||
addl $128,%edx
|
||||
leal 8(%edi),%edi
|
||||
decl %ecx
|
||||
jnz .L04
|
||||
movq (%edx),%mm0
|
||||
pmaddwd (%esi),%mm0
|
||||
movq 8(%edx),%mm1
|
||||
pmaddwd 8(%esi),%mm1
|
||||
movq 16(%edx),%mm2
|
||||
pmaddwd 16(%esi),%mm2
|
||||
movq 24(%edx),%mm3
|
||||
pmaddwd 24(%esi),%mm3
|
||||
paddd %mm1,%mm0
|
||||
paddd %mm2,%mm0
|
||||
paddd %mm3,%mm0
|
||||
movq %mm0,%mm1
|
||||
psrlq $32,%mm1
|
||||
paddd %mm0,%mm1
|
||||
psrad $13,%mm1
|
||||
packssdw %mm1,%mm1
|
||||
psubd %mm0,%mm0
|
||||
psubsw %mm1,%mm0
|
||||
movd %mm0,%eax
|
||||
movw %ax,(%edi)
|
||||
emms
|
||||
|
||||
/* NO_APP */
|
||||
popl %ebx
|
||||
popl %esi
|
||||
popl %edi
|
||||
addl $4,%esp
|
||||
popl %ebp
|
||||
ret
|
Reference in New Issue
Block a user