diff --git a/programs/other/fft/FHT4A.asm b/programs/other/fft/FHT4A.asm index 43962aa2ce..5b7d3dbeb6 100644 --- a/programs/other/fft/FHT4A.asm +++ b/programs/other/fft/FHT4A.asm @@ -25,7 +25,7 @@ use32 include '../../macros.inc' include '../debug.inc' -include 'FHT4i.inc' +include 'fht4code.asm' START: ; start of execution diff --git a/programs/other/fft/FHT4i.inc b/programs/other/fft/fht4code.asm similarity index 93% rename from programs/other/fft/FHT4i.inc rename to programs/other/fft/fht4code.asm index ec1984ec3b..746a8285ba 100644 --- a/programs/other/fft/FHT4i.inc +++ b/programs/other/fft/fht4code.asm @@ -183,7 +183,46 @@ step1: add ebx, 32 cmp ebx, esi jnz .loop -ret +ret + +;================================================================= +; SSE3 version: Step1 +; +;========================== + +align 4 +step1_sse: + mov ebx, [esp+8] + mov esi, [esp+4] + shl esi, 3 + add esi, ebx + +.loop: + movddup xmm0, [ebx] ; xmm0: f0 ; f0 + movddup xmm1, [ebx+8] ; xmm1: f1 ; f1 + addsubpd xmm0, xmm1 ; xmm0: t1 ; t2 ( + - ) + movddup xmm1, [ebx+16] ; xmm1: f2 ; f2 + movddup xmm2, [ebx+24] ; xmm2: f3 ; f3 + addsubpd xmm1, xmm2 ; xmm1: t3 ; t4 ( + - ) + + movddup xmm2, xmm0 ; xmm2: t2 ; t2 + movddup xmm3, xmm1 ; xmm3: t4 ; t4 + addsubpd xmm2, xmm3 ; xmm2: 2+4; 2-4 + shufpd xmm2, xmm2, 1 ; xmm2: 2-4; 2+4 + movapd [ebx+16], xmm2 + + shufpd xmm0, xmm0, 1 ; xmm0: t2 ; t1 + shufpd xmm1, xmm1, 1 ; xmm1: t4 ; t3 + movddup xmm2, xmm0 ; xmm2: t1 ; t1 + movddup xmm3, xmm1 ; xmm3: t3 ; t3 + addsubpd xmm2, xmm3 ; xmm2: 1+3; 1-3 + shufpd xmm2, xmm2, 1 ; xmm2: 1-3; 1+3 + movapd [ebx], xmm2 + + add ebx, 32 + cmp ebx, esi + jnz .loop +ret ; local stack definitions ;=========================================================================== diff --git a/programs/other/rtfread/trunk/debug.inc b/programs/other/rtfread/trunk/debug.inc deleted file mode 100644 index b1bb13cd13..0000000000 --- a/programs/other/rtfread/trunk/debug.inc +++ /dev/null @@ -1,137 +0,0 @@ -macro debug_print str -{ - local ..string, ..label - - jmp ..label - ..string db str,0 - ..label: - - pushf - pushad - mov edx,..string - call debug_outstr - popad - popf -} - -dps fix debug_print - -macro debug_print_dec arg -{ - pushf - pushad - if ~arg eq eax - mov eax,arg - end if - call debug_outdec - popad - popf -} - -dpd fix debug_print_dec - -;--------------------------------- -debug_outdec: ;(eax - num, edi-str) - push 10 ;2 - pop ecx ;1 - push -'0' ;2 - .l0: - xor edx,edx ;2 - div ecx ;2 - push edx ;1 - test eax,eax ;2 - jnz .l0 ;2 - .l1: - pop eax ;1 - add al,'0' ;2 - call debug_outchar ; stosb - jnz .l1 ;2 - ret ;1 -;--------------------------------- - -debug_outchar: ; al - char - pushf - pushad - mov cl,al - mov eax,63 - mov ebx,1 - mcall - popad - popf -ret - -debug_outstr: - mov eax,63 - mov ebx,1 - @@: - mov cl,[edx] - test cl,cl - jz @f - mcall - inc edx - jmp @b - @@: - ret - -_debug_crlf db 13, 10, 0 - -macro newline -{ - pushf - pushad - mov edx, _debug_crlf - call debug_outstr - popad - popf -} - -macro print message -{ - dps message - newline -} - -macro pregs -{ - dps "EAX: " - dpd eax - dps " EBX: " - dpd ebx - newline - dps "ECX: " - dpd ecx - dps " EDX: " - dpd edx - newline -} - -macro debug_print_hex arg -{ - pushf - pushad - if ~arg eq eax - mov eax, arg - end if - call debug_outhex - popad - popf -} -dph fix debug_print_hex - -debug_outhex: - ; eax - number - mov edx, 8 - .new_char: - rol eax, 4 - movzx ecx, al - and cl, 0x0f - mov cl, [__hexdigits + ecx] - pushad - mcall 63, 1 - popad - dec edx - jnz .new_char -ret - -__hexdigits: - db '0123456789ABCDEF' \ No newline at end of file