FHT: SSE_patch_1

git-svn-id: svn://kolibrios.org@2215 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Artem Jerdev (art_zh) 2011-09-20 15:34:33 +00:00
parent 9e5b90e354
commit 39a79feb29
3 changed files with 41 additions and 139 deletions

View File

@ -25,7 +25,7 @@ use32
include '../../macros.inc' include '../../macros.inc'
include '../debug.inc' include '../debug.inc'
include 'FHT4i.inc' include 'fht4code.asm'
START: ; start of execution START: ; start of execution

View File

@ -185,6 +185,45 @@ step1:
jnz .loop jnz .loop
ret ret
;=================================================================
; SSE3 version: Step1
;
;==========================
align 4
step1_sse:
mov ebx, [esp+8]
mov esi, [esp+4]
shl esi, 3
add esi, ebx
.loop:
movddup xmm0, [ebx] ; xmm0: f0 ; f0
movddup xmm1, [ebx+8] ; xmm1: f1 ; f1
addsubpd xmm0, xmm1 ; xmm0: t1 ; t2 ( + - )
movddup xmm1, [ebx+16] ; xmm1: f2 ; f2
movddup xmm2, [ebx+24] ; xmm2: f3 ; f3
addsubpd xmm1, xmm2 ; xmm1: t3 ; t4 ( + - )
movddup xmm2, xmm0 ; xmm2: t2 ; t2
movddup xmm3, xmm1 ; xmm3: t4 ; t4
addsubpd xmm2, xmm3 ; xmm2: 2+4; 2-4
shufpd xmm2, xmm2, 1 ; xmm2: 2-4; 2+4
movapd [ebx+16], xmm2
shufpd xmm0, xmm0, 1 ; xmm0: t2 ; t1
shufpd xmm1, xmm1, 1 ; xmm1: t4 ; t3
movddup xmm2, xmm0 ; xmm2: t1 ; t1
movddup xmm3, xmm1 ; xmm3: t3 ; t3
addsubpd xmm2, xmm3 ; xmm2: 1+3; 1-3
shufpd xmm2, xmm2, 1 ; xmm2: 1-3; 1+3
movapd [ebx], xmm2
add ebx, 32
cmp ebx, esi
jnz .loop
ret
; local stack definitions ; local stack definitions
;=========================================================================== ;===========================================================================
_t0 equ dword [esp] _t0 equ dword [esp]

View File

@ -1,137 +0,0 @@
macro debug_print str
{
local ..string, ..label
jmp ..label
..string db str,0
..label:
pushf
pushad
mov edx,..string
call debug_outstr
popad
popf
}
dps fix debug_print
macro debug_print_dec arg
{
pushf
pushad
if ~arg eq eax
mov eax,arg
end if
call debug_outdec
popad
popf
}
dpd fix debug_print_dec
;---------------------------------
debug_outdec: ;(eax - num, edi-str)
push 10 ;2
pop ecx ;1
push -'0' ;2
.l0:
xor edx,edx ;2
div ecx ;2
push edx ;1
test eax,eax ;2
jnz .l0 ;2
.l1:
pop eax ;1
add al,'0' ;2
call debug_outchar ; stosb
jnz .l1 ;2
ret ;1
;---------------------------------
debug_outchar: ; al - char
pushf
pushad
mov cl,al
mov eax,63
mov ebx,1
mcall
popad
popf
ret
debug_outstr:
mov eax,63
mov ebx,1
@@:
mov cl,[edx]
test cl,cl
jz @f
mcall
inc edx
jmp @b
@@:
ret
_debug_crlf db 13, 10, 0
macro newline
{
pushf
pushad
mov edx, _debug_crlf
call debug_outstr
popad
popf
}
macro print message
{
dps message
newline
}
macro pregs
{
dps "EAX: "
dpd eax
dps " EBX: "
dpd ebx
newline
dps "ECX: "
dpd ecx
dps " EDX: "
dpd edx
newline
}
macro debug_print_hex arg
{
pushf
pushad
if ~arg eq eax
mov eax, arg
end if
call debug_outhex
popad
popf
}
dph fix debug_print_hex
debug_outhex:
; eax - number
mov edx, 8
.new_char:
rol eax, 4
movzx ecx, al
and cl, 0x0f
mov cl, [__hexdigits + ecx]
pushad
mcall 63, 1
popad
dec edx
jnz .new_char
ret
__hexdigits:
db '0123456789ABCDEF'