forked from KolibriOS/kolibrios
upload sdk
git-svn-id: svn://kolibrios.org@4349 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
493
contrib/sdk/sources/ffmpeg/libavcodec/arm/dcadsp_vfp.S
Normal file
493
contrib/sdk/sources/ffmpeg/libavcodec/arm/dcadsp_vfp.S
Normal file
@@ -0,0 +1,493 @@
|
||||
/*
|
||||
* Copyright (c) 2013 RISC OS Open Ltd
|
||||
* Author: Ben Avison <bavison@riscosopen.org>
|
||||
*
|
||||
* This file is part of FFmpeg.
|
||||
*
|
||||
* FFmpeg is free software; you can redistribute it and/or
|
||||
* modify it under the terms of the GNU Lesser General Public
|
||||
* License as published by the Free Software Foundation; either
|
||||
* version 2.1 of the License, or (at your option) any later version.
|
||||
*
|
||||
* FFmpeg is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
||||
* Lesser General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU Lesser General Public
|
||||
* License along with FFmpeg; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
#include "libavutil/arm/asm.S"
|
||||
|
||||
POUT .req a1
|
||||
PIN .req a2
|
||||
PCOEF .req a3
|
||||
DECIFACTOR .req a4
|
||||
OLDFPSCR .req a4
|
||||
COUNTER .req ip
|
||||
|
||||
SCALE32 .req s28 @ use vector of 4 in place of 9th scalar when decifactor=32 / JMAX=8
|
||||
SCALE64 .req s0 @ spare register in scalar bank when decifactor=64 / JMAX=4
|
||||
IN0 .req s4
|
||||
IN1 .req s5
|
||||
IN2 .req s6
|
||||
IN3 .req s7
|
||||
IN4 .req s0
|
||||
IN5 .req s1
|
||||
IN6 .req s2
|
||||
IN7 .req s3
|
||||
COEF0 .req s8 @ coefficient elements
|
||||
COEF1 .req s9
|
||||
COEF2 .req s10
|
||||
COEF3 .req s11
|
||||
COEF4 .req s12
|
||||
COEF5 .req s13
|
||||
COEF6 .req s14
|
||||
COEF7 .req s15
|
||||
ACCUM0 .req s16 @ double-buffered multiply-accumulate results
|
||||
ACCUM4 .req s20
|
||||
POST0 .req s24 @ do long-latency post-multiply in this vector in parallel
|
||||
POST1 .req s25
|
||||
POST2 .req s26
|
||||
POST3 .req s27
|
||||
|
||||
|
||||
.macro inner_loop decifactor, dir, tail, head
|
||||
.ifc "\dir","up"
|
||||
.set X, 0
|
||||
.set Y, 4
|
||||
.else
|
||||
.set X, 4*JMAX*4 - 4
|
||||
.set Y, -4
|
||||
.endif
|
||||
.ifnc "\head",""
|
||||
vldr COEF0, [PCOEF, #X + (0*JMAX + 0) * Y]
|
||||
vldr COEF1, [PCOEF, #X + (1*JMAX + 0) * Y]
|
||||
vldr COEF2, [PCOEF, #X + (2*JMAX + 0) * Y]
|
||||
vldr COEF3, [PCOEF, #X + (3*JMAX + 0) * Y]
|
||||
.endif
|
||||
.ifnc "\tail",""
|
||||
vadd.f POST0, ACCUM0, ACCUM4 @ vector operation
|
||||
.endif
|
||||
.ifnc "\head",""
|
||||
vmul.f ACCUM0, COEF0, IN0 @ vector = vector * scalar
|
||||
vldr COEF4, [PCOEF, #X + (0*JMAX + 1) * Y]
|
||||
vldr COEF5, [PCOEF, #X + (1*JMAX + 1) * Y]
|
||||
vldr COEF6, [PCOEF, #X + (2*JMAX + 1) * Y]
|
||||
.endif
|
||||
.ifnc "\tail",""
|
||||
vmul.f POST0, POST0, SCALE\decifactor @ vector operation (SCALE may be scalar)
|
||||
.endif
|
||||
.ifnc "\head",""
|
||||
vldr COEF7, [PCOEF, #X + (3*JMAX + 1) * Y]
|
||||
.ifc "\tail",""
|
||||
vmul.f ACCUM4, COEF4, IN1 @ vector operation
|
||||
.endif
|
||||
vldr COEF0, [PCOEF, #X + (0*JMAX + 2) * Y]
|
||||
vldr COEF1, [PCOEF, #X + (1*JMAX + 2) * Y]
|
||||
.ifnc "\tail",""
|
||||
vmul.f ACCUM4, COEF4, IN1 @ vector operation
|
||||
.endif
|
||||
vldr COEF2, [PCOEF, #X + (2*JMAX + 2) * Y]
|
||||
vldr COEF3, [PCOEF, #X + (3*JMAX + 2) * Y]
|
||||
.endif
|
||||
.ifnc "\tail",""
|
||||
vstmia POUT!, {POST0-POST3}
|
||||
.endif
|
||||
.ifnc "\head",""
|
||||
vmla.f ACCUM0, COEF0, IN2 @ vector = vector * scalar
|
||||
vldr COEF4, [PCOEF, #X + (0*JMAX + 3) * Y]
|
||||
vldr COEF5, [PCOEF, #X + (1*JMAX + 3) * Y]
|
||||
vldr COEF6, [PCOEF, #X + (2*JMAX + 3) * Y]
|
||||
vldr COEF7, [PCOEF, #X + (3*JMAX + 3) * Y]
|
||||
vmla.f ACCUM4, COEF4, IN3 @ vector = vector * scalar
|
||||
.if \decifactor == 32
|
||||
vldr COEF0, [PCOEF, #X + (0*JMAX + 4) * Y]
|
||||
vldr COEF1, [PCOEF, #X + (1*JMAX + 4) * Y]
|
||||
vldr COEF2, [PCOEF, #X + (2*JMAX + 4) * Y]
|
||||
vldr COEF3, [PCOEF, #X + (3*JMAX + 4) * Y]
|
||||
vmla.f ACCUM0, COEF0, IN4 @ vector = vector * scalar
|
||||
vldr COEF4, [PCOEF, #X + (0*JMAX + 5) * Y]
|
||||
vldr COEF5, [PCOEF, #X + (1*JMAX + 5) * Y]
|
||||
vldr COEF6, [PCOEF, #X + (2*JMAX + 5) * Y]
|
||||
vldr COEF7, [PCOEF, #X + (3*JMAX + 5) * Y]
|
||||
vmla.f ACCUM4, COEF4, IN5 @ vector = vector * scalar
|
||||
vldr COEF0, [PCOEF, #X + (0*JMAX + 6) * Y]
|
||||
vldr COEF1, [PCOEF, #X + (1*JMAX + 6) * Y]
|
||||
vldr COEF2, [PCOEF, #X + (2*JMAX + 6) * Y]
|
||||
vldr COEF3, [PCOEF, #X + (3*JMAX + 6) * Y]
|
||||
vmla.f ACCUM0, COEF0, IN6 @ vector = vector * scalar
|
||||
vldr COEF4, [PCOEF, #X + (0*JMAX + 7) * Y]
|
||||
vldr COEF5, [PCOEF, #X + (1*JMAX + 7) * Y]
|
||||
vldr COEF6, [PCOEF, #X + (2*JMAX + 7) * Y]
|
||||
vldr COEF7, [PCOEF, #X + (3*JMAX + 7) * Y]
|
||||
vmla.f ACCUM4, COEF4, IN7 @ vector = vector * scalar
|
||||
.endif
|
||||
.endif
|
||||
.endm
|
||||
|
||||
.macro dca_lfe_fir decifactor
|
||||
.if \decifactor == 32
|
||||
.set JMAX, 8
|
||||
vpush {s16-s31}
|
||||
vmov SCALE32, s0 @ duplicate scalar across vector
|
||||
vldr IN4, [PIN, #-4*4]
|
||||
vldr IN5, [PIN, #-5*4]
|
||||
vldr IN6, [PIN, #-6*4]
|
||||
vldr IN7, [PIN, #-7*4]
|
||||
.else
|
||||
.set JMAX, 4
|
||||
vpush {s16-s27}
|
||||
.endif
|
||||
|
||||
mov COUNTER, #\decifactor/4 - 1
|
||||
inner_loop \decifactor, up,, head
|
||||
1: add PCOEF, PCOEF, #4*JMAX*4
|
||||
subs COUNTER, COUNTER, #1
|
||||
inner_loop \decifactor, up, tail, head
|
||||
bne 1b
|
||||
inner_loop \decifactor, up, tail
|
||||
|
||||
mov COUNTER, #\decifactor/4 - 1
|
||||
inner_loop \decifactor, down,, head
|
||||
1: sub PCOEF, PCOEF, #4*JMAX*4
|
||||
subs COUNTER, COUNTER, #1
|
||||
inner_loop \decifactor, down, tail, head
|
||||
bne 1b
|
||||
inner_loop \decifactor, down, tail
|
||||
|
||||
.if \decifactor == 32
|
||||
vpop {s16-s31}
|
||||
.else
|
||||
vpop {s16-s27}
|
||||
.endif
|
||||
fmxr FPSCR, OLDFPSCR
|
||||
bx lr
|
||||
.endm
|
||||
|
||||
|
||||
/* void ff_dca_lfe_fir_vfp(float *out, const float *in, const float *coefs,
|
||||
* int decifactor, float scale)
|
||||
*/
|
||||
function ff_dca_lfe_fir_vfp, export=1
|
||||
teq DECIFACTOR, #32
|
||||
fmrx OLDFPSCR, FPSCR
|
||||
ldr ip, =0x03030000 @ RunFast mode, short vectors of length 4, stride 1
|
||||
fmxr FPSCR, ip
|
||||
NOVFP vldr s0, [sp]
|
||||
vldr IN0, [PIN, #-0*4]
|
||||
vldr IN1, [PIN, #-1*4]
|
||||
vldr IN2, [PIN, #-2*4]
|
||||
vldr IN3, [PIN, #-3*4]
|
||||
beq 32f
|
||||
64: dca_lfe_fir 64
|
||||
.ltorg
|
||||
32: dca_lfe_fir 32
|
||||
endfunc
|
||||
|
||||
.unreq POUT
|
||||
.unreq PIN
|
||||
.unreq PCOEF
|
||||
.unreq DECIFACTOR
|
||||
.unreq OLDFPSCR
|
||||
.unreq COUNTER
|
||||
|
||||
.unreq SCALE32
|
||||
.unreq SCALE64
|
||||
.unreq IN0
|
||||
.unreq IN1
|
||||
.unreq IN2
|
||||
.unreq IN3
|
||||
.unreq IN4
|
||||
.unreq IN5
|
||||
.unreq IN6
|
||||
.unreq IN7
|
||||
.unreq COEF0
|
||||
.unreq COEF1
|
||||
.unreq COEF2
|
||||
.unreq COEF3
|
||||
.unreq COEF4
|
||||
.unreq COEF5
|
||||
.unreq COEF6
|
||||
.unreq COEF7
|
||||
.unreq ACCUM0
|
||||
.unreq ACCUM4
|
||||
.unreq POST0
|
||||
.unreq POST1
|
||||
.unreq POST2
|
||||
.unreq POST3
|
||||
|
||||
|
||||
IN .req a1
|
||||
SBACT .req a2
|
||||
OLDFPSCR .req a3
|
||||
IMDCT .req a4
|
||||
WINDOW .req v1
|
||||
OUT .req v2
|
||||
BUF .req v3
|
||||
SCALEINT .req v4 @ only used in softfp case
|
||||
COUNT .req v5
|
||||
|
||||
SCALE .req s0
|
||||
|
||||
/* Stack layout differs in softfp and hardfp cases:
|
||||
*
|
||||
* hardfp
|
||||
* fp -> 6 arg words saved by caller
|
||||
* a3,a4,v1-v3,v5,fp,lr on entry (a3 just to pad to 8 bytes)
|
||||
* s16-s23 on entry
|
||||
* align 16
|
||||
* buf -> 8*32*4 bytes buffer
|
||||
* s0 on entry
|
||||
* sp -> 3 arg words for callee
|
||||
*
|
||||
* softfp
|
||||
* fp -> 7 arg words saved by caller
|
||||
* a4,v1-v5,fp,lr on entry
|
||||
* s16-s23 on entry
|
||||
* align 16
|
||||
* buf -> 8*32*4 bytes buffer
|
||||
* sp -> 4 arg words for callee
|
||||
*/
|
||||
|
||||
/* void ff_dca_qmf_32_subbands_vfp(float samples_in[32][8], int sb_act,
|
||||
* SynthFilterContext *synth, FFTContext *imdct,
|
||||
* float (*synth_buf_ptr)[512],
|
||||
* int *synth_buf_offset, float (*synth_buf2)[32],
|
||||
* const float (*window)[512], float *samples_out,
|
||||
* float (*raXin)[32], float scale);
|
||||
*/
|
||||
function ff_dca_qmf_32_subbands_vfp, export=1
|
||||
VFP push {a3-a4,v1-v3,v5,fp,lr}
|
||||
NOVFP push {a4,v1-v5,fp,lr}
|
||||
add fp, sp, #8*4
|
||||
vpush {s16-s23}
|
||||
@ The buffer pointed at by raXin isn't big enough for us to do a
|
||||
@ complete matrix transposition as we want to, so allocate an
|
||||
@ alternative buffer from the stack. Align to 4 words for speed.
|
||||
sub BUF, sp, #8*32*4
|
||||
bic BUF, BUF, #15
|
||||
mov sp, BUF
|
||||
ldr lr, =0x03330000 @ RunFast mode, short vectors of length 4, stride 2
|
||||
fmrx OLDFPSCR, FPSCR
|
||||
fmxr FPSCR, lr
|
||||
@ COUNT is used to count down 2 things at once:
|
||||
@ bits 0-4 are the number of word pairs remaining in the output row
|
||||
@ bits 5-31 are the number of words to copy (with possible negation)
|
||||
@ from the source matrix before we start zeroing the remainder
|
||||
mov COUNT, #(-4 << 5) + 16
|
||||
adds COUNT, COUNT, SBACT, lsl #5
|
||||
bmi 2f
|
||||
1:
|
||||
vldr s8, [IN, #(0*8+0)*4]
|
||||
vldr s10, [IN, #(0*8+1)*4]
|
||||
vldr s12, [IN, #(0*8+2)*4]
|
||||
vldr s14, [IN, #(0*8+3)*4]
|
||||
vldr s16, [IN, #(0*8+4)*4]
|
||||
vldr s18, [IN, #(0*8+5)*4]
|
||||
vldr s20, [IN, #(0*8+6)*4]
|
||||
vldr s22, [IN, #(0*8+7)*4]
|
||||
vneg.f s8, s8
|
||||
vldr s9, [IN, #(1*8+0)*4]
|
||||
vldr s11, [IN, #(1*8+1)*4]
|
||||
vldr s13, [IN, #(1*8+2)*4]
|
||||
vldr s15, [IN, #(1*8+3)*4]
|
||||
vneg.f s16, s16
|
||||
vldr s17, [IN, #(1*8+4)*4]
|
||||
vldr s19, [IN, #(1*8+5)*4]
|
||||
vldr s21, [IN, #(1*8+6)*4]
|
||||
vldr s23, [IN, #(1*8+7)*4]
|
||||
vstr d4, [BUF, #(0*32+0)*4]
|
||||
vstr d5, [BUF, #(1*32+0)*4]
|
||||
vstr d6, [BUF, #(2*32+0)*4]
|
||||
vstr d7, [BUF, #(3*32+0)*4]
|
||||
vstr d8, [BUF, #(4*32+0)*4]
|
||||
vstr d9, [BUF, #(5*32+0)*4]
|
||||
vstr d10, [BUF, #(6*32+0)*4]
|
||||
vstr d11, [BUF, #(7*32+0)*4]
|
||||
vldr s9, [IN, #(3*8+0)*4]
|
||||
vldr s11, [IN, #(3*8+1)*4]
|
||||
vldr s13, [IN, #(3*8+2)*4]
|
||||
vldr s15, [IN, #(3*8+3)*4]
|
||||
vldr s17, [IN, #(3*8+4)*4]
|
||||
vldr s19, [IN, #(3*8+5)*4]
|
||||
vldr s21, [IN, #(3*8+6)*4]
|
||||
vldr s23, [IN, #(3*8+7)*4]
|
||||
vneg.f s9, s9
|
||||
vldr s8, [IN, #(2*8+0)*4]
|
||||
vldr s10, [IN, #(2*8+1)*4]
|
||||
vldr s12, [IN, #(2*8+2)*4]
|
||||
vldr s14, [IN, #(2*8+3)*4]
|
||||
vneg.f s17, s17
|
||||
vldr s16, [IN, #(2*8+4)*4]
|
||||
vldr s18, [IN, #(2*8+5)*4]
|
||||
vldr s20, [IN, #(2*8+6)*4]
|
||||
vldr s22, [IN, #(2*8+7)*4]
|
||||
vstr d4, [BUF, #(0*32+2)*4]
|
||||
vstr d5, [BUF, #(1*32+2)*4]
|
||||
vstr d6, [BUF, #(2*32+2)*4]
|
||||
vstr d7, [BUF, #(3*32+2)*4]
|
||||
vstr d8, [BUF, #(4*32+2)*4]
|
||||
vstr d9, [BUF, #(5*32+2)*4]
|
||||
vstr d10, [BUF, #(6*32+2)*4]
|
||||
vstr d11, [BUF, #(7*32+2)*4]
|
||||
add IN, IN, #4*8*4
|
||||
add BUF, BUF, #4*4
|
||||
subs COUNT, COUNT, #(4 << 5) + 2
|
||||
bpl 1b
|
||||
2: @ Now deal with trailing < 4 samples
|
||||
adds COUNT, COUNT, #3 << 5
|
||||
bmi 4f @ sb_act was a multiple of 4
|
||||
bics lr, COUNT, #0x1F
|
||||
bne 3f
|
||||
@ sb_act was n*4+1
|
||||
vldr s8, [IN, #(0*8+0)*4]
|
||||
vldr s10, [IN, #(0*8+1)*4]
|
||||
vldr s12, [IN, #(0*8+2)*4]
|
||||
vldr s14, [IN, #(0*8+3)*4]
|
||||
vldr s16, [IN, #(0*8+4)*4]
|
||||
vldr s18, [IN, #(0*8+5)*4]
|
||||
vldr s20, [IN, #(0*8+6)*4]
|
||||
vldr s22, [IN, #(0*8+7)*4]
|
||||
vneg.f s8, s8
|
||||
vldr s9, zero
|
||||
vldr s11, zero
|
||||
vldr s13, zero
|
||||
vldr s15, zero
|
||||
vneg.f s16, s16
|
||||
vldr s17, zero
|
||||
vldr s19, zero
|
||||
vldr s21, zero
|
||||
vldr s23, zero
|
||||
vstr d4, [BUF, #(0*32+0)*4]
|
||||
vstr d5, [BUF, #(1*32+0)*4]
|
||||
vstr d6, [BUF, #(2*32+0)*4]
|
||||
vstr d7, [BUF, #(3*32+0)*4]
|
||||
vstr d8, [BUF, #(4*32+0)*4]
|
||||
vstr d9, [BUF, #(5*32+0)*4]
|
||||
vstr d10, [BUF, #(6*32+0)*4]
|
||||
vstr d11, [BUF, #(7*32+0)*4]
|
||||
add BUF, BUF, #2*4
|
||||
sub COUNT, COUNT, #1
|
||||
b 4f
|
||||
3: @ sb_act was n*4+2 or n*4+3, so do the first 2
|
||||
vldr s8, [IN, #(0*8+0)*4]
|
||||
vldr s10, [IN, #(0*8+1)*4]
|
||||
vldr s12, [IN, #(0*8+2)*4]
|
||||
vldr s14, [IN, #(0*8+3)*4]
|
||||
vldr s16, [IN, #(0*8+4)*4]
|
||||
vldr s18, [IN, #(0*8+5)*4]
|
||||
vldr s20, [IN, #(0*8+6)*4]
|
||||
vldr s22, [IN, #(0*8+7)*4]
|
||||
vneg.f s8, s8
|
||||
vldr s9, [IN, #(1*8+0)*4]
|
||||
vldr s11, [IN, #(1*8+1)*4]
|
||||
vldr s13, [IN, #(1*8+2)*4]
|
||||
vldr s15, [IN, #(1*8+3)*4]
|
||||
vneg.f s16, s16
|
||||
vldr s17, [IN, #(1*8+4)*4]
|
||||
vldr s19, [IN, #(1*8+5)*4]
|
||||
vldr s21, [IN, #(1*8+6)*4]
|
||||
vldr s23, [IN, #(1*8+7)*4]
|
||||
vstr d4, [BUF, #(0*32+0)*4]
|
||||
vstr d5, [BUF, #(1*32+0)*4]
|
||||
vstr d6, [BUF, #(2*32+0)*4]
|
||||
vstr d7, [BUF, #(3*32+0)*4]
|
||||
vstr d8, [BUF, #(4*32+0)*4]
|
||||
vstr d9, [BUF, #(5*32+0)*4]
|
||||
vstr d10, [BUF, #(6*32+0)*4]
|
||||
vstr d11, [BUF, #(7*32+0)*4]
|
||||
add BUF, BUF, #2*4
|
||||
sub COUNT, COUNT, #(2 << 5) + 1
|
||||
bics lr, COUNT, #0x1F
|
||||
bne 4f
|
||||
@ sb_act was n*4+3
|
||||
vldr s8, [IN, #(2*8+0)*4]
|
||||
vldr s10, [IN, #(2*8+1)*4]
|
||||
vldr s12, [IN, #(2*8+2)*4]
|
||||
vldr s14, [IN, #(2*8+3)*4]
|
||||
vldr s16, [IN, #(2*8+4)*4]
|
||||
vldr s18, [IN, #(2*8+5)*4]
|
||||
vldr s20, [IN, #(2*8+6)*4]
|
||||
vldr s22, [IN, #(2*8+7)*4]
|
||||
vldr s9, zero
|
||||
vldr s11, zero
|
||||
vldr s13, zero
|
||||
vldr s15, zero
|
||||
vldr s17, zero
|
||||
vldr s19, zero
|
||||
vldr s21, zero
|
||||
vldr s23, zero
|
||||
vstr d4, [BUF, #(0*32+0)*4]
|
||||
vstr d5, [BUF, #(1*32+0)*4]
|
||||
vstr d6, [BUF, #(2*32+0)*4]
|
||||
vstr d7, [BUF, #(3*32+0)*4]
|
||||
vstr d8, [BUF, #(4*32+0)*4]
|
||||
vstr d9, [BUF, #(5*32+0)*4]
|
||||
vstr d10, [BUF, #(6*32+0)*4]
|
||||
vstr d11, [BUF, #(7*32+0)*4]
|
||||
add BUF, BUF, #2*4
|
||||
sub COUNT, COUNT, #1
|
||||
4: @ Now fill the remainder with 0
|
||||
vldr s8, zero
|
||||
vldr s9, zero
|
||||
ands COUNT, COUNT, #0x1F
|
||||
beq 6f
|
||||
5: vstr d4, [BUF, #(0*32+0)*4]
|
||||
vstr d4, [BUF, #(1*32+0)*4]
|
||||
vstr d4, [BUF, #(2*32+0)*4]
|
||||
vstr d4, [BUF, #(3*32+0)*4]
|
||||
vstr d4, [BUF, #(4*32+0)*4]
|
||||
vstr d4, [BUF, #(5*32+0)*4]
|
||||
vstr d4, [BUF, #(6*32+0)*4]
|
||||
vstr d4, [BUF, #(7*32+0)*4]
|
||||
add BUF, BUF, #2*4
|
||||
subs COUNT, COUNT, #1
|
||||
bne 5b
|
||||
6:
|
||||
fmxr FPSCR, OLDFPSCR
|
||||
ldr WINDOW, [fp, #3*4]
|
||||
ldr OUT, [fp, #4*4]
|
||||
sub BUF, BUF, #32*4
|
||||
NOVFP ldr SCALEINT, [fp, #6*4]
|
||||
mov COUNT, #8
|
||||
VFP vpush {SCALE}
|
||||
VFP sub sp, sp, #3*4
|
||||
NOVFP sub sp, sp, #4*4
|
||||
7:
|
||||
VFP ldr a1, [fp, #-7*4] @ imdct
|
||||
NOVFP ldr a1, [fp, #-8*4]
|
||||
ldmia fp, {a2-a4}
|
||||
VFP stmia sp, {WINDOW, OUT, BUF}
|
||||
NOVFP stmia sp, {WINDOW, OUT, BUF, SCALEINT}
|
||||
VFP vldr SCALE, [sp, #3*4]
|
||||
bl X(ff_synth_filter_float_vfp)
|
||||
add OUT, OUT, #32*4
|
||||
add BUF, BUF, #32*4
|
||||
subs COUNT, COUNT, #1
|
||||
bne 7b
|
||||
|
||||
A sub sp, fp, #(8+8)*4
|
||||
T sub fp, fp, #(8+8)*4
|
||||
T mov sp, fp
|
||||
vpop {s16-s23}
|
||||
VFP pop {a3-a4,v1-v3,v5,fp,pc}
|
||||
NOVFP pop {a4,v1-v5,fp,pc}
|
||||
endfunc
|
||||
|
||||
.unreq IN
|
||||
.unreq SBACT
|
||||
.unreq OLDFPSCR
|
||||
.unreq IMDCT
|
||||
.unreq WINDOW
|
||||
.unreq OUT
|
||||
.unreq BUF
|
||||
.unreq SCALEINT
|
||||
.unreq COUNT
|
||||
|
||||
.unreq SCALE
|
||||
|
||||
.align 2
|
||||
zero: .word 0
|
Reference in New Issue
Block a user