an experimental kernel with a mad syscall and FHT inside

git-svn-id: svn://kolibrios.org@1641 a494cfbc-eb01-0410-851d-a64ba20cac60
2010-10-03 23:11:42 +00:00 · 2010-10-03 23:11:42 +00:00 · 112a3665cb
commit 112a3665cb
parent abbc09c677
5 changed files with 661 additions and 479 deletions
--- a/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc
+++ b/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc
@ -76,9 +76,6 @@ pci_ext_config:
 	shl	eax, 8
 	test	eax, 0x000F0000 	; MMIO Base must be bus0-aligned
 	jnz	.no_pcie_cfg
 ;       -- it looks like a true PCIe config space;
 	ret	; <<<<<<<<<<< OK >>>>>>>>>>>
 .no_pcie_cfg:
@ -92,6 +89,7 @@ pci_ext_config:
 .pcie_failed:
 	mov	esi, boot_pcie_fail
 	call	boot_log
 	xor	eax, eax
 	ret	; <<<<<<<<< FAILURE >>>>>>>>>
--- a/kernel/branches/Kolibri-A/trunk/core/syscall.inc
+++ b/kernel/branches/Kolibri-A/trunk/core/syscall.inc
@ -21,37 +21,6 @@ cross_order:
 	call	dword [servetable+edi*4]
 	ret
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;                                                            ;;
 ;;                     SYSENTER ENTRY                         ;;
 ;;                                  (not used on AMD systems) ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;align 32
 ;sysenter_entry:
 ;        ; Íàñòðàèâàåì ñòåê
 ;        mov     esp, [ss:tss._esp0]
 ;        sti
 ;        push    ebp                     ; save app esp + 4
 ;        mov     ebp, [ebp]              ; ebp - original ebp
 ;        ;------------------
 ;        pushad
 ;        cld
 ;
 ;        movzx   eax, al
 ;        call    dword [servetable2 + eax * 4]
 ;       popad
 ;       ;------------------
 ;       xchg    ecx, [ss:esp]           ; â âåðøèí ñòåêà - app ecx, ecx - app esp + 4
 ;        sub     ecx, 4
 ;        xchg    edx, [ecx]              ; edx - return point, & save original edx
 ;        push    edx
 ;        mov     edx, [ss:esp + 4]
 ;        mov     [ecx + 4], edx          ; save original ecx
 ;        pop     edx
 ;        sysexit
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;                                                            ;;
 ;;                   SYSTEM CALL ENTRY                        ;;
@ -69,17 +38,17 @@ i40:
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;;                                                            ;;
-;;                     SYSCALL ENTRY                          ;;
+;;               SYSCALL ENTRY   --    NEW  !!!               ;;
 ;;                                                            ;;
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 align 32
 syscall_entry:
-;       push    ecx
+;        sti
-	  sti
+	push ecx
 	and   eax, 3
 	call	dword [servetable3 + eax * 4]
-
+	pop	ecx
 ;      pop     ecx
 	sysret
 iglobal
@ -89,7 +58,6 @@ iglobal
  align 4
  servetable:
      dd socket 		 ; 53-Socket interface
      dd 0
      dd 0
@ -187,8 +155,8 @@ iglobal
 align 4
 servetable3:
-	dd 	FFT4	; 0
+	dd	FHT_4	 ; 0
-	dd 	FFT4	; 1
+	dd	FHT_4	 ; 1
 	dd	paleholder	; 2
 	dd	sys_end ; last
--- a/kernel/branches/Kolibri-A/trunk/kernel32.inc
+++ b/kernel/branches/Kolibri-A/trunk/kernel32.inc
@ -16,30 +16,6 @@
 $Revision$
 ;struc db [a] { common . db a
 ;  if ~used .
 ;    display 'not used db: ',`.,13,10
 ;  end if }
 ;struc dw [a] { common . dw a
 ;  if ~used .
 ;    display 'not used dw: ',`.,13,10
 ;  end if }
 ;struc dd [a] { common . dd a
 ;  if ~used .
 ;    display 'not used dd: ',`.,13,10
 ;  end if }
 ;struc dp [a] { common . dp a
 ;  if ~used .
 ;    display 'not used dp: ',`.,13,10
 ;  end if }
 ;struc dq [a] { common . dq a
 ;  if ~used .
 ;    display 'not used dq: ',`.,13,10
 ;  end if }
 ;struc dt [a] { common . dt a
 ;  if ~used .
 ;    display 'not used dt: ',`.,13,10
 ;  end if }
 struc POINT {
  .x dd ?
@ -244,7 +220,7 @@ include "fs/ext2.inc"     ; read / write for ext2 filesystem
 ; sound
 include "sound/playnote.inc"	; player Note for Speaker PC
-include "sound/FFT.inc" 	; fast Fourier transform routines
+include "sound/FHT.inc" 	; fast Fourier transform routines
 ; display
@ -311,3 +287,4 @@ include "core/ext_lib.inc"
 ; list of external functions
 include "imports.inc"
--- a/kernel/branches/Kolibri-A/trunk/sound/FHT.INC
+++ b/kernel/branches/Kolibri-A/trunk/sound/FHT.INC
@ -5,62 +5,51 @@
 ; free KolibriOS version - not to be ported to other OSes
 ; ==========================================================
 Power_of_4	equ	5
 NumPoints	equ	1024
 N_2		equ	NumPoints / 2
 N_4		equ	NumPoints / 4
 ;=================================================================
 ; global constants
 align 8
-_root		dq	1.41421356237309504880169	; = sqrt(2)
+fht_r	   dq	   1.41421356237309504880169	   ; = sqrt(2)
-_root2	dq	0.70710678118654752440084	; = sqrt(2)/2
+fht_r2	   dq	   0.70710678118654752440084	   ; = sqrt(2)/2
-_c1		dq	0.92387953251128675612818	; = cos(pi/8)
+fht_c1	   dq	   0.92387953251128675612818	   ; = cos(pi/8)
-_s1		dq	0.38268343236508977172846	; = sin(pi/8)
+fht_s1	   dq	   0.38268343236508977172846	   ; = sin(pi/8)
 _dx		dq	0.00613592315154296875		; pi/512
 ;[_CosTable]	dd	0 ; N_2 elements
 ;[_SinTable]	dd	0 ; N_2 elements
 ;=================================================================
 ; parameter1:
 ; -- reg  dl (bits[3:0])   = Power_of_4
 ; -- reg edx && (-16) = 4k-aligned data array address
 ; returns:
 ; -- edx = Power_of_4
 ; -- ecx = N
 ; destroys:
 ; -- eax, ebx, ecx, edx, esi
 ;; ==========================
 align 4
 MakeSinCosTable:
 	mov	ebx, [_Sines]
 	mov	ecx, [_Cosins]
 	xor	eax,  eax
 	fld	[_dx]			; st : dx
 	fldz				; st : 0, dx
 .loop:
 	fld	st0			; st : x, x, dx
 	FSINCOS 			; st : cos, sin, x, dx
 	fstp	qword [ecx+eax*8]	; st : sin, x, dx
 	fstp	qword [ebx+eax*8]	; st : x, dx
 	fadd	st0, st1		; st : x+dx, dx
 	inc	eax
 	cmp	eax, N_2
 	jne	.loop
 	fstp	st0			; st : dx
 	fstp	st0			; st : <empty>
 	ret
 ; ================================================================
 align 4
 BitInvert:
-	mov	esi, [x]	; array of qwords
+	mov	esi, edx
 	and	esi, 0xFFFFFFF0
 	and	edx, 0x0F
 	push	edx
 	mov	cl, dl
 	xor	eax, eax
 	inc	eax
 	shl	eax, cl
 	shl	eax, cl
 	push	eax
 	xor	ecx, ecx		; index term
 align 4
 .newterm:
 	inc	ecx
-	cmp	ecx, NumPoints
+	cmp	ecx, [esp]		; N
 	jge	.done
 	xor	eax, eax
 	mov	edx, ecx
 	xor	bl, bl
-
+align 4
 .do_invert:
 	inc	bl
-	cmp	bl, Power_of_4
+	cmp	bl, byte[esp+4] ; Power_of_4
 	jg	.switch
 	mov	bh, dl
@ -69,6 +58,7 @@ BitInvert:
 	or	al, bh
 	shr	edx, 2
 	jmp	.do_invert
 align 8
 .switch:
 	cmp	eax, ecx
@ -80,17 +70,32 @@ BitInvert:
 	fstp	qword [esi+ecx*8]
 	jmp	.newterm
 align 4
 .done:
 	pop	ecx
 	pop	edx
 	ret
 ;=================================================================
 ;=================================================================
 ; stdcall parameters:
 ; -- [esp+4]  = N
 ; -- [esp+8]  = 4k-aligned data array  address
 ; returns:
 ; -- nothing
 ; destroys:
 ; -- ebx, esi
 ;; ==========================
 align 4
 step1:
-	mov	esi, [x]
+	mov	ebx, [esp+8]
-	mov	ebx, esi
+	mov	esi, [esp+4]
-	add	esi, NumPoints*8
+	shl	esi, 3
 	add	esi, ebx
 align 4
 .loop:
 	fld	qword[ebx]
 	fld	qword[ebx+8]
@ -119,19 +124,65 @@ step1:
 	add	ebx, 32
 	cmp	ebx, esi
 	jnz	.loop
 ret
-
+;       local stack definitions
 ;
 ;===========================================================================
 step2:				; Step2
 ;===========================================================================
 _t0	equ	dword [esp]
 _t1	equ	dword[esp+4]
 _t2	equ	dword[esp+8]
 _t3	equ	dword[esp+12]
 _t4	equ	dword[esp+16]
 _t5	equ	dword[esp+20]
 _t6	equ	dword[esp+24]
 _t7	equ	dword[esp+28]
 _t8	equ	dword[esp+32]
 _t9	equ	dword[esp+36]
-	mov	eax, [_f]
+_l1   equ	dword[esp+40]
-	mov	ebx, eax
+_l2   equ	dword[esp+44]
-	add	eax, NumPoints*8
+_l3   equ	dword[esp+48]
 _l4   equ	dword[esp+52]
 _l5   equ	dword[esp+56]
 _l6   equ	dword[esp+60]
 _l7   equ	dword[esp+64]
 _l8   equ	dword[esp+68]
 _l9   equ	dword[esp+72]
 _l0   equ	dword[esp+76]
 _d1   equ	dword[esp+80]
 _d2   equ	dword[esp+84]
 _d3   equ	dword[esp+88]
 _d4   equ	dword[esp+92]
 _d5   equ	dword[esp+96]
 _d6   equ	dword[esp+100]
 _j5   equ	dword[esp+104]
 _jj   equ	dword[esp+108]
 _end_of_array	equ	dword[esp+112]
 _step		equ	word [esp+116]
 ;=================================================================
 ; cdecl parameters:
 ; -- [ebp+8]   = N
 ; -- [ebp+12]  = 4k-aligned data array  address
 ; returns:
 ; -- nothing
 ; destroys:
 ; -- eax, ebx
 ; locals:
 ; -- 10 stack-located dwords (_t0 ... _t9)
 ;; ==========================
 align 4
 step2:
 	push	ebp
 	mov	ebp, esp
 	sub	esp, 40
 	mov	ebx, [ebp+12]
 	mov	eax, [ebp+ 8]
 	shl	eax, 3
 	add	eax, ebx
 align 4
 .loop_i:
 ; -- quad subelements  +0, +4, +8 and +12 (simpliest operations)
@ -163,7 +214,7 @@ step2:				; Step2
 ; -- even subelements  +2, +6, +10 and +14 (2 multiplications needed)
 	fld	qword[ebx+8*2]
 	fld	qword[ebx+8*6]
-	fld	[_root]
+	fld	[fht_r]
 	fmul	st1, st0	; st : r, t2, t1
 	fld	qword[ebx+8*10]
 	fxch	st1		; st : r, t3, t2, t1
@ -194,20 +245,20 @@ step2:				; Step2
 	fsub	st0, st1
 	fxch	st1
 	faddp	st2, st0	; st : (f[l3]-f[l7]), (f[l3]+f[l7])
-	fld	[_root2]
+	fld	[fht_r2]
 	fmul	st2, st0
 	fmulp	st1, st0	; st : t9, t6
 	fld	qword[ebx+8*3]
 	fld	st0
 	fadd	st0, st2	; st : t1, f[l5], t9, t6
-	fstp	[_t1]
+	fstp	_t1
 	fsub	st0, st1
-	fstp	[_t2]
+	fstp	_t2
-	fstp	[_t9]	; (t9 never used)
+	fstp	_t9	; (t9 never used)
-	fstp	[_t6]		; st : <empty>
+	fstp	_t6		; st : <empty>
-	fld	[_c1]
+	fld	[fht_c1]
-	fld	[_s1]
+	fld	[fht_s1]
 	fld	qword[ebx+8*5]
 	fld	qword[ebx+8*7]
 	fld	st3		; st: c1, f[l6], f[l2], s1, c1
@ -215,13 +266,13 @@ step2:				; Step2
 	fld	st1		; st: f_6, f_2*c, f_6, f_2, s, c
 	fmul	st0, st4	; st: f_6*s, f_2*c, f_6, f_2, s, c
 	faddp	st1, st0	; st: t5, f_6, f_2, s, c
-	fstp	[_t5]		; st: f_6, f_2, s, c
+	fstp	_t5		; st: f_6, f_2, s, c
 	fld	st3		; st: c, f_6, f_2, s, c
 	fmul	st0, st1
 	fld	st3
 	fmul	st0, st3	; st: f_2*s, f_6*c, f_6, f_2, s, c
 	fsubp	st1, st0	; st: t8, f_6, f_2, s, c
-	fstp	[_t8]		; st: f_6, f_2, s, c
+	fstp	_t8		; st: f_6, f_2, s, c
 	fstp	st0		; st: f_2, s, c
 	fstp	st0		; st: s, c
@ -232,51 +283,51 @@ step2:				; Step2
 	fld	st3
 	fmul	st0, st3	; st: f_4*s, f_8*c, f_8, f_4, s, c
 	faddp	st1, st0	; st: t7, f_8, f_4, s, c
-	fld	[_t5]		; st: t5, t7, f_8, f_4, s, c
+	fld	_t5		; st: t5, t7, f_8, f_4, s, c
 	fsub	st0, st1	; st: t4, t7, f_8, f_4, s, c
-	fstp	[_t4]
+	fstp	_t4
-	fstp	[_t7]		; st: f_8, f_4, s, c
+	fstp	_t7		; st: f_8, f_4, s, c
 	fld	st3		; st: c, f_8, f_4, s, c
 	fmul	st0, st2
 	fld	st3
 	fmul	st0, st2	; st: f_8*s, f_4*c, f_8, f_4, s, c
 	fsubp	st1, st0	; st:-t0, f_8, f_4, s, c
 	fchs
-	fld	[_t8]
+	fld	_t8
 	fchs			; st:-t8, t0, f_8, f_4, s, c
 	fsub	st0, st1	; st: t3, t0, f_8, f_4, s, c
-	fstp	[_t3]
+	fstp	_t3
-	fstp	[_t0]		; st: f_8, f_4, s, c
+	fstp	_t0		; st: f_8, f_4, s, c
 	fstp	st0		; st: f_4, s, c
 	fstp	st0		; st: s, c
 	fstp	st0		; st: c
 	fstp	st0		; st: <empty>
-	fld	[_t1]
+	fld	_t1
-	fld	[_t4]
+	fld	_t4
 	fld	st1
 	fsub	st0, st1
 	fstp	qword[ebx+8*11] ; f[l7] = t1-t4
 	faddp	st1, st0
 	fstp	qword[ebx+8*3]	; f[l5] = t1+t4
-	fld	[_t2]
+	fld	_t2
-	fld	[_t3]
+	fld	_t3
 	fld	st1
 	fsub	st0, st1
 	fstp	qword[ebx+8*15] ; f[l8]
 	faddp	st1, st0
 	fstp	qword[ebx+8*7]	; f[l6]
-	fld	[_t6]
+	fld	_t6
 	fld	qword[ebx+8]
 	fld	st1
 	fsub	st0, st1
 	fxch	st1
 	faddp	st2, st0	; st : t2, t1
-	fld	[_t8]
+	fld	_t8
-	fsub	[_t0]
+	fsub	_t0
-	fld	[_t5]
+	fld	_t5
-	fadd	[_t7]		; st : t4, t3, t2, t1
+	fadd	_t7		; st : t4, t3, t2, t1
 	fld	st3
 	fsub	st0, st1
@ -294,36 +345,42 @@ step2:				; Step2
 	cmp	ebx, eax
 	jb	.loop_i
 	mov	esp, ebp
 	pop	ebp
 ret
 align 8 	; shared local vars
 _t0	dq	0
 _t1	dq	0
 _t2	dq	0
 _t3	dq	0
 _t4	dq	0
 _t5	dq	0
 _t6	dq	0
 _t7	dq	0
 _t8	dq	0
 _t9	dq	0
-;===================================================================
+;=================================================================
 ; cdecl parameters:
 ; -- [ebp+8]   = N
 ; -- [ebp+12]  = p
 ; -- [ebp+16]  = 4k-aligned data array  address
 ; -- [ebp+20]  = 4k-aligned SinCosTable address
 ; returns:
 ; -- nothing
 ; destroys:
 ; -- all GPRegs
 ; locals:
 ; -- 120 stack-located dwords (_t0 ... _t9, _l0..._step)
 ;; ==========================
 align 4
 step3:
-;===================================================================
+	push	ebp
-
+	mov	ebp, esp
 	sub	esp, 120
 ; 283  : {
 ; 293  :   for (l=3; l<=p; l++)
 	mov	cx, 0x0200
 align 4
 .newstep:
 	inc	ch
-	cmp	ch, Power_of_4
+	cmp	ch, byte[ebp+12]
 	jg	.done
-	mov	[.step], cx
+	mov	_step, cx
 ; 294  :   {
 ; 295  :     d1 = 1 << (l + l - 3);
@ -333,61 +390,63 @@ step3:
 	sub	cl, 3
 	mov	edx, 1
 	shl	edx, cl
-	mov	[.d1], edx
+	mov	_d1, edx
 ; 296  :     d2 = d1 << 1;
 	shl	edx, 1
-	mov	[.d2], edx
+	mov	_d2, edx
 	mov	eax, edx
 ; 297  :     d3 = d2 << 1;
 	shl	edx, 1
-	mov	[.d3], edx
+	mov	_d3, edx
 ; 298  :     d4 = d2 + d3;
 	add	eax, edx
-	mov	[.d4], eax
+	mov	_d4, eax
 ; 299  :     d5 = d3 << 1;
 	shl	edx, 1
-	mov	[.d5], edx
+	mov	_d5, edx
 	shl	edx, 3
-	mov	[.d6], edx	; d6 = d5*8 to simplify index operations
+	mov	_d6, edx	; d6 = d5*8 to simplify index operations
 ; 339  :         j5 = N / d5;   ; moved out of internal loop
-	mov	cl, Power_of_4
+	mov	cl, [ebp+12]
 	sub	cl, ch
 	add	cl, cl
 	mov	edx, 1
 	shl	edx, cl
-	mov	[.j5], edx
+	mov	_j5, edx
 ; 300  :
 ; 301  :     for (j=0; j<N; j+=d5)
-	mov	esi, [_f]
+	mov	ebx, [ebp+16]
-	mov	ebx, esi
+	mov	esi, [ebp+8]
-	add	esi, NumPoints*8
+	shl	esi, 3
-	mov	[.end_of_array], esi
+	add	esi, ebx
 	mov	_end_of_array, esi
 align 4
 .next_j:
 ; {
 ; t1 = f[j] + f[j+d2];
-	mov	eax, [.d2]
+	mov	eax, _d2
 	fld	qword[ebx]
 	fld	qword[ebx+eax*8]
 	fld	st1
 	fadd	st0, st1
-	fstp	[_t1]
+	fstp	_t1
 ; t2 = f[j] - f[j+d2];
 	fsubp	st1, st0
-	fstp	[_t2]
+	fstp	_t2
 ; t3 = f[j+d3] + f[j+d4];
-	mov	edi, [.d3]
+	mov	edi, _d3
 	fld	qword[ebx+edi*8]
-	mov	edx, [.d4]
+	mov	edx, _d4
 	fld	qword[ebx+edx*8]
 	fld	st1
 	fsub	st0, st1		; st : t4, f4, f3
@ -398,7 +457,7 @@ step3:
 ; f[j+d4] = t2 - t4;
 ; f[j+d3] = t2 + t4;
-	fld	[_t2]
+	fld	_t2
 	fld	st0
 	fsub	st0, st2		; st : f4, t2, t4, t3
 	fstp	qword[ebx+edx*8]	; st : t2, t4, t3
@ -407,7 +466,7 @@ step3:
 ; f[j+d2] = t1 - t3;
 ; f[j]    = t1 + t3;
-	fld	[_t1]
+	fld	_t1
 	fst	st1
 	fsub	st0, st2		; st : f2, t1, t3
 	fstp	qword[ebx+eax*8]	; st : t1, t3
@ -416,7 +475,7 @@ step3:
 	fstp	st0
 ; jj = j + d1;     / ??
-	mov	edi, [.d1]
+	mov	edi, _d1
 	shl	edi, 3		; = d1*8
 	mov	edx, edi
 	mov	eax, edi
@ -432,7 +491,7 @@ step3:
 ; t2 = f[jj+d2] * r;
 	fld	qword [edi+eax]
-	fld	[_root]
+	fld	[fht_r]
 	fmul	st1, st0	; st : r,  t2, t3, t1
 ; t4 = f[jj+d4] * r
 	fmul	qword [edx+eax] ; st : t4, t2, t3, t1
@ -461,58 +520,61 @@ step3:
 ; for (k=1; k<d1; k++)
 	xor	ecx, ecx	; ecx = k
-	mov	[.jj], ecx
+	mov	_jj, ecx
 align 4
 .next_k:
 	inc	ecx
-	cmp	ecx, [.d1]
+	cmp	ecx, _d1
 	jge	.done_k
 ; {
-	mov	eax, [.d2]	; the sector increment
+	mov	eax, _d2	; the sector increment
 ; l1 = j  + k;
 	mov	edx, ecx
-	mov	[.l1], edx	; [ebx+edx*8] --> f[j+k]
+	mov	_l1, edx	; [ebx+edx*8] --> f[j+k]
 ; l2 = l1 + d2;
 	add	edx, eax
-	mov	[.l2], edx
+	mov	_l2, edx
 ; l3 = l1 + d3;
 	add	edx, eax
-	mov	[.l3], edx
+	mov	_l3, edx
 ; l4 = l1 + d4;
 	add	edx, eax
-	mov	[.l4], edx
+	mov	_l4, edx
 ; l5 = j  + d2 - k;
 	mov	edx, eax
 	sub	edx, ecx
-	mov	[.l5], edx
+	mov	_l5, edx
 ; l6 = l5 + d2;
 	add	edx, eax
-	mov	[.l6], edx
+	mov	_l6, edx
 ; l7 = l5 + d3;
 	add	edx, eax
-	mov	[.l7], edx
+	mov	_l7, edx
 ; l8 = l5 + d4;
 	add	edx, eax
-	mov	[.l8], edx
+	mov	_l8, edx
 ; 340  :         j5 *= k;       // add-substituted multiplication
-	mov	eax, [.jj]
+	mov	eax, _jj
-	add	eax, [.j5]
+	add	eax, _j5
-	mov	[.jj], eax
+	mov	_jj, eax
 ; c1 = C[jj];
 ; s1 = S[jj];
-	mov	edi, [_Cosins]
+	mov	edi, [ebp+20]
 	fld	qword[edi+eax*8]
-	mov	esi, [_Sines]
+	mov	esi, [ebp+8]
 	shl	esi, 2
 	add	esi, edi
 	fld	qword[esi+eax*8]	; st : s1, c1
 ; t5 = f[l2] * c1 + f[l6] * s1;
 ; t8 = f[l6] * c1 - f[l2] * s1;
-	mov	edx, [.l6]
+	mov	edx, _l6
 	fld	qword[ebx+edx*8]
-	mov	edx, [.l2]
+	mov	edx, _l2
 	fld	st0
 	fmul	st0, st2
 	fxch	st1
@ -521,10 +583,10 @@ step3:
 	fmul	st4, st0
 	fmulp	st3, st0		; st : f[l6]*c, f[l6]*s, f[l2]*s, f[l2]*c
 	fsub	st0, st2		; st :   t8,    f[l6]*s, f[l2]*s, f[l2]*c
-	fstp	[_t8]
+	fstp	_t8
 	faddp	st2, st0		; st :  f[l2]*s, t5
 	fstp	st0			; st :  t5
-	fstp	[_t5]			; st :  <empty>
+	fstp	_t5			; st :  <empty>
 ; c2 = C[2*jj];
 ; s2 = S[2*jj];
@ -534,9 +596,9 @@ step3:
 ; t6 = f[l3] * c2 + f[l7] * s2;
 ; t9 = f[l7] * c2 - f[l3] * s2;
-	mov	edx, [.l7]
+	mov	edx, _l7
 	fld	qword[ebx+edx*8]
-	mov	edx, [.l3]
+	mov	edx, _l3
 	fld	st0
 	fmul	st0, st2
 	fxch	st1
@ -545,22 +607,22 @@ step3:
 	fmul	st4, st0
 	fmulp	st3, st0		; st : f[l7]*c, f[l7]*s, f[l3]*s, f[l3]*c
 	fsub	st0, st2		; st :   t9,    f[l7]*s, f[l3]*s, f[l3]*c
-	fstp	[_t9]
+	fstp	_t9
 	faddp	st2, st0		; st :  f[l2]*s, t6
 	fstp	st0			; st :  t6
-	fstp	[_t6]			; st :  <empty>
+	fstp	_t6			; st :  <empty>
 ; c3 = C[3*jj];
 ; s3 = S[3*jj];
-	add	eax, [.jj]
+	add	eax, _jj
 	fld	qword[edi+eax*8]
 	fld	qword[esi+eax*8]	; st : s3, c3
 ; t7 = f[l4] * c3 + f[l8] * s3;
 ; t0 = f[l8] * c3 - f[l4] * s3;
-	mov	edx, [.l8]
+	mov	edx, _l8
 	fld	qword[ebx+edx*8]
-	mov	edx, [.l4]
+	mov	edx, _l4
 	fld	st0
 	fmul	st0, st2
 	fxch	st1
@ -569,192 +631,162 @@ step3:
 	fmul	st4, st0
 	fmulp	st3, st0		; st : f[l8]*c, f[l8]*s, f[l4]*s, f[l4]*c
 	fsub	st0, st2		; st :   t9,    f[l8]*s, f[l4]*s, f[l4]*c
-	fstp	[_t0]
+	fstp	_t0
 	faddp	st2, st0		; st : f[l2]*s, t7
 	fstp	st0			; st :  t7
-	fstp	[_t7]			; st :  <empty>
+	fstp	_t7			; st :  <empty>
 ; t1 = f[l5] - t9;
 ; t2 = f[l5] + t9;
-	mov	eax, [.l5]
+	mov	eax, _l5
 	fld	qword [ebx+eax*8]
-	fld	[_t9]
+	fld	_t9
 	fld	st0
 	fadd	st0, st2
-	fstp	[_t2]
+	fstp	_t2
 	fsubp	st1, st0
-	fstp	[_t1]
+	fstp	_t1
 ; t3 = - t8  - t0;
-	fld	[_t8]
+	fld	_t8
-	fadd	[_t0]
+	fadd	_t0
 	fchs
-	fstp	[_t3]
+	fstp	_t3
 ; t4 =   t5  - t7;
-	fld	[_t5]
+	fld	_t5
-	fsub	[_t7]
+	fsub	_t7
-	fstp	[_t4]
+	fstp	_t4
 ; f[l5] = t1 + t4;
-	fld	[_t1]
+	fld	_t1
-	fld	[_t4]
+	fld	_t4
 	fld	st0
 	fadd	st0, st2
 	fstp	qword [ebx+eax*8]
 ; f[l7] = t1 - t4;
-	mov	eax, [.l7]
+	mov	eax, _l7
 	fsubp	st1, st0
 	fstp	qword [ebx+eax*8]
 ; f[l6] = t2 + t3;
-	mov	eax, [.l6]
+	mov	eax, _l6
-	fld	[_t2]
+	fld	_t2
-	fld	[_t3]
+	fld	_t3
 	fld	st0
 	fadd	st0, st2
 	fstp	qword [ebx+eax*8]
 ; f[l8] = t2 - t3;
-	mov	eax, [.l8]
+	mov	eax, _l8
 	fsubp	st1, st0
 	fstp	qword [ebx+eax*8]
 ; t1 = f[l1] + t6;
-	mov	eax, [.l1]
+	mov	eax, _l1
 	fld	qword [ebx+eax*8]
-	fld	[_t6]
+	fld	_t6
 	fld	st0
 	fadd	st0, st2
-	fstp	[_t1]
+	fstp	_t1
 ; t2 = f[l1] - t6;
 	fsubp	st1, st0
-	fstp	[_t2]
+	fstp	_t2
 ; t3 =    t8 - t0;
-	fld	[_t8]
+	fld	_t8
-	fsub	[_t0]
+	fsub	_t0
-	fstp	[_t3]
+	fstp	_t3
 ; t4 =    t5 + t7;
-	fld	[_t5]
+	fld	_t5
-	fadd	[_t7]
+	fadd	_t7
-	fstp	[_t4]
+	fstp	_t4
 ; f[l1] = t1 + t4;
-	mov	eax, [.l1]
+	mov	eax, _l1
-	fld	[_t1]
+	fld	_t1
-	fld	[_t4]
+	fld	_t4
      fld     st0
 	fadd	st0, st2
 	fstp	qword [ebx+eax*8]
 ; f[l3] = t1 - t4;
-	mov	eax, [.l3]
+	mov	eax, _l3
 	fsubp	st1, st0
 	fstp	qword [ebx+eax*8]
 ; f[l2] = t2 + t3;
-	mov	eax, [.l2]
+	mov	eax, _l2
-	fld	[_t2]
+	fld	_t2
-	fld	[_t3]
+	fld	_t3
 	fld	st0
 	fadd	st0, st2
 	fstp	qword [ebx+eax*8]
 ; f[l4] = t2 - t3;
-	mov	eax, [.l4]
+	mov	eax, _l4
 	fsubp	st1, st0
 	fstp	qword [ebx+eax*8]
 ; 374  :       }
 	jmp	.next_k
 align 4
 .done_k:
 ; 375  :     }
-	add	ebx, [.d6]	; d6 = d5*8
+	add	ebx, _d6	; d6 = d5*8
-	cmp	ebx, [.end_of_array]
+	cmp	ebx, _end_of_array
 	jb	.next_j
 ; 376  :   }
-	mov	cx, [.step]
+	mov	cx, _step
 	jmp	.newstep
 .done:
-
+	mov	esp, ebp
 	pop	ebp
 ; 377  : }
 	ret
 align 4
 .l1   dd	0
 .l2   dd	0
 .l3   dd	0
 .l4   dd	0
 .l5   dd	0
 .l6   dd	0
 .l7   dd	0
 .l8   dd	0
 .l9   dd	0
 .l0   dd	0
 .d1   dd	0
 .d2   dd	0
 .d3   dd	0
 .d4   dd	0
 .d5   dd	0
 .d6   dd	0
 .j5   dd	0
 .jj   dd	0
 .end_of_array	dd	  0
 .step dw	0
 align 8
 		;=========== Step3 ends here ===========
-
+; =================================================================
 ;=================================================================
-;	syscall entry
+; parameters:
-;
+; -- [ebp+12]   = N
-_f  dd ?
+; -- [ebp+16]  = p
-_N  dd 1024	; number of points
+; -- [ebp+20]  = 4k-aligned data array  address
 ; -- [ebp+24]  = 4k-aligned SinCosTable address
 ; returns:
 ; -- nothing
 ; destroys:
 ; -- all GPRegs
 ;; ==========================
-_a		dd ?		; initial   data array
+align 4
 x      	dd 0    	; tranformed (float) data array
 _Cosins 	dd 0
 _Sines	dd 0
-FFT4:
+FHT_4:
-	or	al, al
+	push	ebp
-	jnz	.trans
+	mov	ebp, esp
-	mov	cl, Power_of_4
+
-	mov	eax, 1
+	mov	edx, [ebp+20]	; a
-	shl	eax, cl
+	mov	dl, byte[ebp+16]
 	shl	eax, cl
 	mov	[_N], eax
 	shl	eax, 2	; size of Sine table in bytes
 	add	eax, ebx
 	mov 	[_Sines], ebx
 	mov	[_Cosins], eax
 	cpuid
 	rdtsc
 	mov	[.time], eax
 	call	MakeSinCosTable
 	cpuid
 	rdtsc
 	sub	eax, [.time]
 	ret
 .trans:
 	mov	[x], 	ebx
 	mov	[_f], ebx
 	cli	;-----
 	cpuid
 	rdtsc
 	mov	[.time], eax
 	call BitInvert
-	call	step1
+	push	dword[ebp+20]	; a
-	call	step2
+	push	ecx		; N
 	call	step1		; 4-point transform
 	cmp	cl, 1
 	jz	.done
 	call	step2		; 16-point transform
 	cmp	byte[ebp+16],1	; p = 2 ?
 	jz	.done
 	pop	edx		; N
 	pop	ecx		; a
 	push	dword[ebp+24]	; t
 	push	ecx
 	push	dword[ebp+16]	; p
 	push	edx		; N
 	call	step3
-	cpuid
+.done:
-	rdtsc
+	mov	esp, ebp
-	sti	;----
+	pop	ebp
 	sub	eax, [.time]
 	ret
-.time	dd	0
+ret
--- a/kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM
+++ b/kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM
@ -0,0 +1,207 @@
 ;========================================================================
 ;=                                                                      =
 ;=         Fast Hartley Transform  routine demo for KolibriOS           =
 ;=                                                                      =
 ;=       Copyright (C) 2010, Artem Jerdev <kolibri@jerdev.co.uk>        =
 ;=                                                                      =
 ;=             refer to wiki.kolibtios.org for all details              =
 ;=                                                                      =
 ;========================================================================
 use32
 	       org    0x0
 	       db     'MENUET01'	      ; 8 byte id
 	       dd     0x01		      ; header version
 	       dd     START		      ; start of code
 	       dd     I_END		      ; size of image
 	       dd     0x100000		      ; memory for app
 	       dd     0xbfffc		      ; esp
 	       dd     0x0 , 0x0 	      ; I_Param , I_Icon
 include 'macros.inc'
 include 'debug.inc'
 include 'FHT4i.inc'
 START:				; start of execution
     call main
    mov  eax,-1 		; close this program
    int  0x40
 ;=============================================================
 ;Func: calculates a simple function
 ;      ff = (int)(500*exp(-t) * cos (2.5*t))
 ;               uses: eax, ebx
 ;------------
 Func:
 ; 9    : {
 ; 10   :   double x,t;
 ; 11   :   int f;
 ; 12   :
 ; 13   :   x = (i < N2) ? i : i - NUM_POINTS;
 	mov	eax, [ii]
 	cmp	eax, 512
 	jge	.index_negative
 	jmp	.index_correct
 .index_negative:
 	sub	eax, 1024
 .index_correct:
 	mov	[temp], eax
 ;        fild    [temp]
 ; 14   :   t = x / 16.0;
 ; f2xm1 argument (abs) must be less than 1, so
 	mov	[t_mod], eax
 	and	[t_mod], 0x0F	; x % 16
 	shr	eax, 4		; x / 16
 	mov	[t_div], eax
 	fild	[temp]
 ; 15   :   if (t<0) t = -t;
 	fabs
 exp_ok:
 ; 16   :   f = (int)(512*2^(-t) * cos (2.5*t));
 	fchs
 	f2xm1
 	fmul	[f500]
 	fstp	[tv93]
 	fld	[f2_5]
 	fmul	[tt]
 	fcos
 	fmul	[tv93]
 	fstp	[tt]
 	mov	bx, word[tt+6]
 	shr	bx,4
 	and	bx,0x07FF
 	add	ax,bx
 	shl	ax,4
 	and	word[tt+6], 0x800F
 	or	word[tt+6], ax
 	fld	[tt]
 	fstp   [ff]
 ; 17   :   return  f;
 ; 18   : }
 	ret
 ;---------------------------------------------------------
 ;       test data filler
 ;
 ;       uses eax, ebx, ecx
 FillData:
 ; 29   :    for (i=0; i<NUM_POINTS; i++)
 ; here : ecx = i
 	xor	ecx, ecx
 .funcloop:
 ; 30   :    {
 ; 31   :       ia[i] = Func(i);
 	mov	[ii], ecx
 	call	Func
 	fld	[ff]
 	fstp	qword [edx+ecx*8]
 ; 32   :    }
 	inc	ecx
 	cmp	ecx, [_in]	   ; ecx == N ?
 	jne	.funcloop
 	ret
 ;====================================================================
 ; main
 ;====================================================================
 align 4
 _ia	dd 0
 _ii	dd 0
 _ip	dd 0
 _in	dd 0
 _it	dd 0
 ;-----------------
 main:
 	 mov	eax, 68
 	 mov	ebx, 11
 	 int	0x40
 	fninit
 	 mov	cl,   2 	; power of 4
 	 mov	byte[_ip], cl
 	 mov	eax, 1
 	 shl	eax, cl
 	 shl	eax, cl
 	 mov	[_in], eax
 	 mov	dl, cl
 	 call	CreateSinCosTable
 	 mov	[_it], edx
 	 mov	ecx, [_in]
 	 shl	ecx, 3
 	 mov	ebx, 12
 	 mov	eax, 68
 	 int	0x40
 	 mov	[_ia], eax
 	 mov	edx, eax
 	call FillData
 	cpuid
 	rdtsc
 	mov	[t_0], eax
 	push	[_it]
 	push	[_ia]
 	push	[_ip]
 	push	[_in]
 ;        call    FHT_4
 	xor	eax, eax
 	syscall
 	add	esp, 16
 	cpuid
 	rdtsc
 	mov	[t_1], eax
 	sub	eax, [t_0]
 	debug_print_hex eax
 	print	'<- fht time'
 	mov	edx, [_it]
 	call	DestroySinCosTable
 	mov	ecx, [_ia]
 	mov	ebx, 13
 	mov	eax, 68
 	int	0x40
 	ret
 ; ========================================================
 ; static data
 ;----------------
 align 8
 ;f18     dq 0x4032000000000000
 f256	dq 256.01f
 f14_2	dq 14.2f
 f500	dq 0x407f400000000000
 f2_5	dq 0x4004000000000000
 tt	dq ?
 tv93	dq ?
 t_div	dd ?
 t_mod	dd ?
 temp	dd ?
 ff	dq ?	; return value (int)
 ii	dd ?	; argument (int) = array index
 t_1	dd ?
 t_0	dd ?
 fcontrol dw  0x0037f
 title	db ' Fast Hartley Transform Test - A.Jerdev 2010'
 I_END: