From 112a3665cb6587c0b03a5d82f23f07dc740c3a02 Mon Sep 17 00:00:00 2001 From: "Artem Jerdev (art_zh)" Date: Sun, 3 Oct 2010 23:11:42 +0000 Subject: [PATCH] an experimental kernel with a mad syscall and FHT inside git-svn-id: svn://kolibrios.org@1641 a494cfbc-eb01-0410-851d-a64ba20cac60 --- .../branches/Kolibri-A/trunk/bus/pci/PCIe.inc | 4 +- .../branches/Kolibri-A/trunk/core/syscall.inc | 228 ++++---- kernel/branches/Kolibri-A/trunk/kernel32.inc | 163 +++--- .../trunk/sound/{FFT.inc => FHT.INC} | 538 ++++++++++-------- .../Kolibri-A/utilities/FFT/FHT4B.ASM | 207 +++++++ 5 files changed, 661 insertions(+), 479 deletions(-) rename kernel/branches/Kolibri-A/trunk/sound/{FFT.inc => FHT.INC} (65%) create mode 100644 kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM diff --git a/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc b/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc index 323934ced8..72d2ed33c4 100644 --- a/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc +++ b/kernel/branches/Kolibri-A/trunk/bus/pci/PCIe.inc @@ -76,9 +76,6 @@ pci_ext_config: shl eax, 8 test eax, 0x000F0000 ; MMIO Base must be bus0-aligned jnz .no_pcie_cfg - -; -- it looks like a true PCIe config space; - ret ; <<<<<<<<<<< OK >>>>>>>>>>> .no_pcie_cfg: @@ -92,6 +89,7 @@ pci_ext_config: .pcie_failed: mov esi, boot_pcie_fail call boot_log + xor eax, eax ret ; <<<<<<<<< FAILURE >>>>>>>>> diff --git a/kernel/branches/Kolibri-A/trunk/core/syscall.inc b/kernel/branches/Kolibri-A/trunk/core/syscall.inc index caad5d8a44..a65f85bad2 100644 --- a/kernel/branches/Kolibri-A/trunk/core/syscall.inc +++ b/kernel/branches/Kolibri-A/trunk/core/syscall.inc @@ -1,4 +1,4 @@ -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; @@ -10,47 +10,16 @@ $Revision$ ; Old style system call converter align 16 cross_order: - ; load all registers in crossed order - mov eax, ebx - mov ebx, ecx - mov ecx, edx - mov edx, esi - mov esi, edi - movzx edi, byte[esp+28 + 4] + ; load all registers in crossed order + mov eax, ebx + mov ebx, ecx + mov ecx, edx + mov edx, esi + mov esi, edi + movzx edi, byte[esp+28 + 4] sub edi, 53 ; all zeroes before - call dword [servetable+edi*4] - ret - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; ;; -;; SYSENTER ENTRY ;; -;; (not used on AMD systems) ;; -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; - -;align 32 -;sysenter_entry: -; ; Настраиваем стек -; mov esp, [ss:tss._esp0] -; sti -; push ebp ; save app esp + 4 -; mov ebp, [ebp] ; ebp - original ebp -; ;------------------ -; pushad -; cld -; -; movzx eax, al -; call dword [servetable2 + eax * 4] - -; popad -; ;------------------ -; xchg ecx, [ss:esp] ; в вершин стека - app ecx, ecx - app esp + 4 -; sub ecx, 4 -; xchg edx, [ecx] ; edx - return point, & save original edx -; push edx -; mov edx, [ss:esp + 4] -; mov [ecx + 4], edx ; save original ecx -; pop edx -; sysexit + call dword [servetable+edi*4] + ret ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; @@ -60,27 +29,27 @@ cross_order: align 16 i40: - pushad - cld - and eax, 0x07F - call dword [servetable2 + eax * 4] - popad - iretd + pushad + cld + and eax, 0x07F + call dword [servetable2 + eax * 4] + popad + iretd ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; -;; SYSCALL ENTRY ;; +;; SYSCALL ENTRY -- NEW !!! ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + align 32 syscall_entry: -; push ecx - sti - and eax, 3 - call dword [servetable3 + eax * 4] - - ; pop ecx - sysret +; sti + push ecx + and eax, 3 + call dword [servetable3 + eax * 4] + pop ecx + sysret iglobal ;;;;;;;;;;;;;;;;;;;;;;;;;;;; @@ -89,18 +58,17 @@ iglobal align 4 servetable: - - dd socket ; 53-Socket interface + dd socket ; 53-Socket interface dd 0 dd 0 dd 0 dd 0 - dd file_system ; 58-Common file system interface + dd file_system ; 58-Common file system interface dd 0 dd 0 dd 0 - dd sys_pci ; 62-PCI functions - dd sys_msg_board ; 63-System message board + dd sys_pci ; 62-PCI functions + dd sys_msg_board ; 63-System message board ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; NEW SYSTEM FUNCTIONS TABLE ;; @@ -108,88 +76,88 @@ iglobal align 4 servetable2: - dd syscall_draw_window ; 0-DrawWindow - dd syscall_setpixel ; 1-SetPixel - dd sys_getkey ; 2-GetKey - dd sys_clock ; 3-GetTime - dd syscall_writetext ; 4-WriteText - dd delay_hs ; 5-DelayHs + dd syscall_draw_window ; 0-DrawWindow + dd syscall_setpixel ; 1-SetPixel + dd sys_getkey ; 2-GetKey + dd sys_clock ; 3-GetTime + dd syscall_writetext ; 4-WriteText + dd delay_hs ; 5-DelayHs dd syscall_openramdiskfile ; 6-OpenRamdiskFile - dd syscall_putimage ; 7-PutImage - dd syscall_button ; 8-DefineButton - dd sys_cpuusage ; 9-GetProcessInfo - dd sys_waitforevent ; 10-WaitForEvent - dd sys_getevent ; 11-CheckForEvent - dd sys_redrawstat ; 12-BeginDraw and EndDraw - dd syscall_drawrect ; 13-DrawRect - dd syscall_getscreensize ; 14-GetScreenSize - dd sys_background ; 15-bgr - dd sys_cachetodiskette ; 16-FlushFloppyCache - dd sys_getbutton ; 17-GetButton - dd sys_system ; 18-System Services - dd paleholder ; 19-reserved - dd sys_midi ; 20-ResetMidi and OutputMidi - dd sys_setup ; 21-SetMidiBase,SetKeymap,SetShiftKeymap,. - dd sys_settime ; 22-setting date,time,clock and alarm-clock + dd syscall_putimage ; 7-PutImage + dd syscall_button ; 8-DefineButton + dd sys_cpuusage ; 9-GetProcessInfo + dd sys_waitforevent ; 10-WaitForEvent + dd sys_getevent ; 11-CheckForEvent + dd sys_redrawstat ; 12-BeginDraw and EndDraw + dd syscall_drawrect ; 13-DrawRect + dd syscall_getscreensize ; 14-GetScreenSize + dd sys_background ; 15-bgr + dd sys_cachetodiskette ; 16-FlushFloppyCache + dd sys_getbutton ; 17-GetButton + dd sys_system ; 18-System Services + dd paleholder ; 19-reserved + dd sys_midi ; 20-ResetMidi and OutputMidi + dd sys_setup ; 21-SetMidiBase,SetKeymap,SetShiftKeymap,. + dd sys_settime ; 22-setting date,time,clock and alarm-clock dd sys_wait_event_timeout ; 23-TimeOutWaitForEvent - dd syscall_cdaudio ; 24-PlayCdTrack,StopCd and GetCdPlaylist - dd undefined_syscall ; 25-reserved - dd sys_getsetup ; 26-GetMidiBase,GetKeymap,GetShiftKeymap,. - dd undefined_syscall ; 27-reserved - dd undefined_syscall ; 28-reserved - dd sys_date ; 29-GetDate - dd sys_current_directory ; 30-Get/SetCurrentDirectory - dd undefined_syscall ; 31-reserved - dd undefined_syscall ; 32-reserved - dd undefined_syscall ; 33-reserved - dd undefined_syscall ; 34-reserved - dd syscall_getpixel ; 35-GetPixel - dd syscall_getarea ; 36-GetArea - dd readmousepos ; 37-GetMousePosition_ScreenRelative,. - dd syscall_drawline ; 38-DrawLine - dd sys_getbackground ; 39-GetBackgroundSize,ReadBgrData,. - dd set_app_param ; 40-WantEvents - dd syscall_getirqowner ; 41-GetIrqOwner - dd get_irq_data ; 42-ReadIrqData - dd sys_outport ; 43-SendDeviceData - dd sys_programirq ; 44-ProgramIrqs - dd reserve_free_irq ; 45-ReserveIrq and FreeIrq + dd syscall_cdaudio ; 24-PlayCdTrack,StopCd and GetCdPlaylist + dd undefined_syscall ; 25-reserved + dd sys_getsetup ; 26-GetMidiBase,GetKeymap,GetShiftKeymap,. + dd undefined_syscall ; 27-reserved + dd undefined_syscall ; 28-reserved + dd sys_date ; 29-GetDate + dd sys_current_directory ; 30-Get/SetCurrentDirectory + dd undefined_syscall ; 31-reserved + dd undefined_syscall ; 32-reserved + dd undefined_syscall ; 33-reserved + dd undefined_syscall ; 34-reserved + dd syscall_getpixel ; 35-GetPixel + dd syscall_getarea ; 36-GetArea + dd readmousepos ; 37-GetMousePosition_ScreenRelative,. + dd syscall_drawline ; 38-DrawLine + dd sys_getbackground ; 39-GetBackgroundSize,ReadBgrData,. + dd set_app_param ; 40-WantEvents + dd syscall_getirqowner ; 41-GetIrqOwner + dd get_irq_data ; 42-ReadIrqData + dd sys_outport ; 43-SendDeviceData + dd sys_programirq ; 44-ProgramIrqs + dd reserve_free_irq ; 45-ReserveIrq and FreeIrq dd syscall_reserveportarea ; 46-ReservePortArea and FreePortArea - dd display_number ; 47-WriteNum + dd display_number ; 47-WriteNum dd syscall_display_settings ; 48-SetRedrawType and SetButtonType dd sys_apm ; 49-Advanced Power Management (APM) dd syscall_set_window_shape ; 50-Window shape & scale - dd syscall_threads ; 51-Threads - dd stack_driver_stat ; 52-Stack driver status - dd cross_order ; 53-Socket interface - dd undefined_syscall ; 54-reserved + dd syscall_threads ; 51-Threads + dd stack_driver_stat ; 52-Stack driver status + dd cross_order ; 53-Socket interface + dd undefined_syscall ; 54-reserved dd sound_interface ; 55-Sound interface - dd undefined_syscall ; 56-reserved - dd sys_pcibios ; 57-PCI BIOS32 - dd cross_order ; 58-Common file system interface - dd undefined_syscall ; 59-reserved + dd undefined_syscall ; 56-reserved + dd sys_pcibios ; 57-PCI BIOS32 + dd cross_order ; 58-Common file system interface + dd undefined_syscall ; 59-reserved dd sys_IPC ; 60-Inter Process Communication - dd sys_gs ; 61-Direct graphics access - dd cross_order ; 62-PCI functions - dd cross_order ; 63-System message board - dd sys_resize_app_memory ; 64-Resize application memory usage - dd sys_putimage_palette ; 65-PutImagePalette - dd sys_process_def ; 66-Process definitions - keyboard - dd syscall_move_window ; 67-Window move or resize + dd sys_gs ; 61-Direct graphics access + dd cross_order ; 62-PCI functions + dd cross_order ; 63-System message board + dd sys_resize_app_memory ; 64-Resize application memory usage + dd sys_putimage_palette ; 65-PutImagePalette + dd sys_process_def ; 66-Process definitions - keyboard + dd syscall_move_window ; 67-Window move or resize dd f68 ; 68-Some internal services - dd sys_debug_services ; 69-Debug - dd file_system_lfn ; 70-Common file system interface, version 2 + dd sys_debug_services ; 69-Debug + dd file_system_lfn ; 70-Common file system interface, version 2 dd syscall_window_settings ; 71-Window settings - dd sys_sendwindowmsg ; 72-Send window message - times 127 - ( ($-servetable2) /4 ) dd undefined_syscall - dd sys_end ; -1-end application + dd sys_sendwindowmsg ; 72-Send window message + times 127 - ( ($-servetable2) /4 ) dd undefined_syscall + dd sys_end ; -1-end application align 4 servetable3: - - dd FFT4 ; 0 - dd FFT4 ; 1 - dd paleholder ; 2 - dd sys_end ; last + + dd FHT_4 ; 0 + dd FHT_4 ; 1 + dd paleholder ; 2 + dd sys_end ; last endg diff --git a/kernel/branches/Kolibri-A/trunk/kernel32.inc b/kernel/branches/Kolibri-A/trunk/kernel32.inc index 9bdd43c726..dbcdf360e6 100644 --- a/kernel/branches/Kolibri-A/trunk/kernel32.inc +++ b/kernel/branches/Kolibri-A/trunk/kernel32.inc @@ -16,30 +16,6 @@ $Revision$ -;struc db [a] { common . db a -; if ~used . -; display 'not used db: ',`.,13,10 -; end if } -;struc dw [a] { common . dw a -; if ~used . -; display 'not used dw: ',`.,13,10 -; end if } -;struc dd [a] { common . dd a -; if ~used . -; display 'not used dd: ',`.,13,10 -; end if } -;struc dp [a] { common . dp a -; if ~used . -; display 'not used dp: ',`.,13,10 -; end if } -;struc dq [a] { common . dq a -; if ~used . -; display 'not used dq: ',`.,13,10 -; end if } -;struc dt [a] { common . dt a -; if ~used . -; display 'not used dt: ',`.,13,10 -; end if } struc POINT { .x dd ? @@ -52,7 +28,7 @@ end virtual struc RECT { .left dd ? - .top dd ? + .top dd ? .right dd ? .bottom dd ? .sizeof: @@ -63,7 +39,7 @@ end virtual struc BOX { .left dd ? - .top dd ? + .top dd ? .width dd ? .height dd ? .sizeof: @@ -75,17 +51,17 @@ end virtual struc DISPMODE { .width rw 1 .height rw 1 - .bpp rw 1 + .bpp rw 1 .freq rw 1 } ; constants definition -WSTATE_NORMAL = 00000000b +WSTATE_NORMAL = 00000000b WSTATE_MAXIMIZED = 00000001b WSTATE_MINIMIZED = 00000010b WSTATE_ROLLEDUP = 00000100b -WSTATE_REDRAW = 00000001b +WSTATE_REDRAW = 00000001b WSTATE_WNDDRAWN = 00000010b WSTYLE_HASCAPTION = 00010000b @@ -94,13 +70,13 @@ WSTYLE_CLIENTRELATIVE = 00100000b struc TASKDATA { .event_mask dd ? - .pid dd ? - dw ? + .pid dd ? + dw ? .state db ? - db ? - dw ? + db ? + dw ? .wnd_number db ? - db ? + db ? .mem_start dd ? .counter_sum dd ? .counter_add dd ? @@ -110,24 +86,24 @@ virtual at 0 TASKDATA TASKDATA end virtual -TSTATE_RUNNING = 0 +TSTATE_RUNNING = 0 TSTATE_RUN_SUSPENDED = 1 TSTATE_WAIT_SUSPENDED = 2 -TSTATE_ZOMBIE = 3 +TSTATE_ZOMBIE = 3 TSTATE_TERMINATING = 4 -TSTATE_WAITING = 5 -TSTATE_FREE = 9 +TSTATE_WAITING = 5 +TSTATE_FREE = 9 ; structures definition struc WDATA { - .box BOX - .cl_workarea dd ? - .cl_titlebar dd ? - .cl_frames dd ? - .reserved db ? - .fl_wstate db ? - .fl_wdrawn db ? - .fl_redraw db ? + .box BOX + .cl_workarea dd ? + .cl_titlebar dd ? + .cl_frames dd ? + .reserved db ? + .fl_wstate db ? + .fl_wdrawn db ? + .fl_redraw db ? .sizeof: } virtual at 0 @@ -137,47 +113,47 @@ label WDATA.fl_wstyle byte at WDATA.cl_workarea + 3 struc APPDATA { - .app_name db 11 dup(?) - db 5 dup(?) + .app_name db 11 dup(?) + db 5 dup(?) - .fpu_state dd ? ;+16 - .ev_count_ dd ? ;unused ;+20 - .exc_handler dd ? ;+24 - .except_mask dd ? ;+28 - .pl0_stack dd ? ;unused ;+32 - .heap_base dd ? ;+36 - .heap_top dd ? ;+40 - .cursor dd ? ;+44 - .fd_ev dd ? ;+48 - .bk_ev dd ? ;+52 - .fd_obj dd ? ;+56 - .bk_obj dd ? ;+60 - .saved_esp dd ? ;+64 - .io_map rd 2 ;+68 - .dbg_state dd ? ;+76 - .cur_dir dd ? ;+80 - .wait_timeout dd ? ;+84 - .saved_esp0 dd ? ;+88 - .wait_begin dd ? ;+92 +++ - .wait_test dd ? ;+96 +++ - .wait_param dd ? ;+100 +++ - .tls_base dd ? ;+104 - .dlls_list_ptr dd ? ;+108 - db 16 dup(?) ;+112 + .fpu_state dd ? ;+16 + .ev_count_ dd ? ;unused ;+20 + .exc_handler dd ? ;+24 + .except_mask dd ? ;+28 + .pl0_stack dd ? ;unused ;+32 + .heap_base dd ? ;+36 + .heap_top dd ? ;+40 + .cursor dd ? ;+44 + .fd_ev dd ? ;+48 + .bk_ev dd ? ;+52 + .fd_obj dd ? ;+56 + .bk_obj dd ? ;+60 + .saved_esp dd ? ;+64 + .io_map rd 2 ;+68 + .dbg_state dd ? ;+76 + .cur_dir dd ? ;+80 + .wait_timeout dd ? ;+84 + .saved_esp0 dd ? ;+88 + .wait_begin dd ? ;+92 +++ + .wait_test dd ? ;+96 +++ + .wait_param dd ? ;+100 +++ + .tls_base dd ? ;+104 + .dlls_list_ptr dd ? ;+108 + db 16 dup(?) ;+112 - .wnd_shape dd ? ;+128 - .wnd_shape_scale dd ? ;+132 - dd ? ;+136 - .mem_size dd ? ;+140 - .saved_box BOX - .ipc_start dd ? - .ipc_size dd ? - .event_mask dd ? + .wnd_shape dd ? ;+128 + .wnd_shape_scale dd ? ;+132 + dd ? ;+136 + .mem_size dd ? ;+140 + .saved_box BOX + .ipc_start dd ? + .ipc_size dd ? + .event_mask dd ? .debugger_slot dd ? - dd ? + dd ? .keyboard_mode db ? - db 3 dup(?) - .dir_table dd ? + db 3 dup(?) + .dir_table dd ? .dbg_event_mem dd ? .dbg_regs: .dbg_regs.dr0 dd ? @@ -185,7 +161,7 @@ struc APPDATA .dbg_regs.dr2 dd ? .dbg_regs.dr3 dd ? .dbg_regs.dr7 dd ? - .wnd_caption dd ? + .wnd_caption dd ? .wnd_clientbox BOX } virtual at 0 @@ -211,7 +187,7 @@ include "core/sync.inc" ; macros for synhronization objects include "core/sys32.inc" ; process management include "core/sched.inc" ; process scheduling include "core/syscall.inc" ; system call -include "core/fpu.inc" ; all fpu/sse support +include "core/fpu.inc" ; all fpu/sse support include "core/memory.inc" include "core/heap.inc" ; kernel and app heap include "core/malloc.inc" ; small kernel heap @@ -220,7 +196,7 @@ include "core/dll.inc" include "core/peload.inc" ; include "core/exports.inc" include "core/string.inc" -include "core/v86.inc" ; virtual-8086 manager +include "core/v86.inc" ; virtual-8086 manager ; GUI stuff include "gui/window.inc" @@ -232,19 +208,19 @@ include "gui/button.inc" ; file system -include "fs/fs.inc" ; syscall -include "fs/fat32.inc" ; read / write for fat32 filesystem -include "fs/ntfs.inc" ; read / write for ntfs filesystem -include "fs/fat12.inc" ; read / write for fat12 filesystem +include "fs/fs.inc" ; syscall +include "fs/fat32.inc" ; read / write for fat32 filesystem +include "fs/ntfs.inc" ; read / write for ntfs filesystem +include "fs/fat12.inc" ; read / write for fat12 filesystem include "blkdev/rd.inc" ; ramdisk read /write include "fs/fs_lfn.inc" ; syscall, version 2 include "fs/iso9660.inc" ; read for iso9660 filesystem CD -include "fs/ext2.inc" ; read / write for ext2 filesystem +include "fs/ext2.inc" ; read / write for ext2 filesystem ; sound -include "sound/playnote.inc" ; player Note for Speaker PC -include "sound/FFT.inc" ; fast Fourier transform routines +include "sound/playnote.inc" ; player Note for Speaker PC +include "sound/FHT.inc" ; fast Fourier transform routines ; display @@ -311,3 +287,4 @@ include "core/ext_lib.inc" ; list of external functions include "imports.inc" + diff --git a/kernel/branches/Kolibri-A/trunk/sound/FFT.inc b/kernel/branches/Kolibri-A/trunk/sound/FHT.INC similarity index 65% rename from kernel/branches/Kolibri-A/trunk/sound/FFT.inc rename to kernel/branches/Kolibri-A/trunk/sound/FHT.INC index a3fd33373c..9115ce9e38 100644 --- a/kernel/branches/Kolibri-A/trunk/sound/FFT.inc +++ b/kernel/branches/Kolibri-A/trunk/sound/FHT.INC @@ -5,62 +5,51 @@ ; free KolibriOS version - not to be ported to other OSes ; ========================================================== -Power_of_4 equ 5 -NumPoints equ 1024 -N_2 equ NumPoints / 2 -N_4 equ NumPoints / 4 -;================================================================= ; global constants align 8 -_root dq 1.41421356237309504880169 ; = sqrt(2) -_root2 dq 0.70710678118654752440084 ; = sqrt(2)/2 -_c1 dq 0.92387953251128675612818 ; = cos(pi/8) -_s1 dq 0.38268343236508977172846 ; = sin(pi/8) -_dx dq 0.00613592315154296875 ; pi/512 +fht_r dq 1.41421356237309504880169 ; = sqrt(2) +fht_r2 dq 0.70710678118654752440084 ; = sqrt(2)/2 +fht_c1 dq 0.92387953251128675612818 ; = cos(pi/8) +fht_s1 dq 0.38268343236508977172846 ; = sin(pi/8) -;[_CosTable] dd 0 ; N_2 elements -;[_SinTable] dd 0 ; N_2 elements +;================================================================= +; parameter1: +; -- reg dl (bits[3:0]) = Power_of_4 +; -- reg edx && (-16) = 4k-aligned data array address +; returns: +; -- edx = Power_of_4 +; -- ecx = N +; destroys: +; -- eax, ebx, ecx, edx, esi ;; ========================== align 4 -MakeSinCosTable: - mov ebx, [_Sines] - mov ecx, [_Cosins] - xor eax, eax - fld [_dx] ; st : dx - fldz ; st : 0, dx -.loop: - fld st0 ; st : x, x, dx - FSINCOS ; st : cos, sin, x, dx - fstp qword [ecx+eax*8] ; st : sin, x, dx - fstp qword [ebx+eax*8] ; st : x, dx - fadd st0, st1 ; st : x+dx, dx - - inc eax - cmp eax, N_2 - jne .loop - fstp st0 ; st : dx - fstp st0 ; st : - ret - -; ================================================================ -align 4 BitInvert: - mov esi, [x] ; array of qwords - xor ecx, ecx ; index term + mov esi, edx + and esi, 0xFFFFFFF0 + and edx, 0x0F + push edx + mov cl, dl + xor eax, eax + inc eax + shl eax, cl + shl eax, cl + push eax + xor ecx, ecx ; index term +align 4 .newterm: inc ecx - cmp ecx, NumPoints + cmp ecx, [esp] ; N jge .done xor eax, eax mov edx, ecx xor bl, bl - +align 4 .do_invert: inc bl - cmp bl, Power_of_4 + cmp bl, byte[esp+4] ; Power_of_4 jg .switch mov bh, dl @@ -69,6 +58,7 @@ BitInvert: or al, bh shr edx, 2 jmp .do_invert +align 8 .switch: cmp eax, ecx @@ -80,17 +70,32 @@ BitInvert: fstp qword [esi+ecx*8] jmp .newterm +align 4 .done: + pop ecx + pop edx ret ;================================================================= + + +;================================================================= +; stdcall parameters: +; -- [esp+4] = N +; -- [esp+8] = 4k-aligned data array address +; returns: +; -- nothing +; destroys: +; -- ebx, esi +;; ========================== align 4 - step1: - mov esi, [x] - mov ebx, esi - add esi, NumPoints*8 + mov ebx, [esp+8] + mov esi, [esp+4] + shl esi, 3 + add esi, ebx +align 4 .loop: fld qword[ebx] fld qword[ebx+8] @@ -119,19 +124,65 @@ step1: add ebx, 32 cmp ebx, esi jnz .loop +ret - ret - - -; -;=========================================================================== -step2: ; Step2 +; local stack definitions ;=========================================================================== +_t0 equ dword [esp] +_t1 equ dword[esp+4] +_t2 equ dword[esp+8] +_t3 equ dword[esp+12] +_t4 equ dword[esp+16] +_t5 equ dword[esp+20] +_t6 equ dword[esp+24] +_t7 equ dword[esp+28] +_t8 equ dword[esp+32] +_t9 equ dword[esp+36] - mov eax, [_f] - mov ebx, eax - add eax, NumPoints*8 +_l1 equ dword[esp+40] +_l2 equ dword[esp+44] +_l3 equ dword[esp+48] +_l4 equ dword[esp+52] +_l5 equ dword[esp+56] +_l6 equ dword[esp+60] +_l7 equ dword[esp+64] +_l8 equ dword[esp+68] +_l9 equ dword[esp+72] +_l0 equ dword[esp+76] +_d1 equ dword[esp+80] +_d2 equ dword[esp+84] +_d3 equ dword[esp+88] +_d4 equ dword[esp+92] +_d5 equ dword[esp+96] +_d6 equ dword[esp+100] +_j5 equ dword[esp+104] +_jj equ dword[esp+108] +_end_of_array equ dword[esp+112] +_step equ word [esp+116] + +;================================================================= +; cdecl parameters: +; -- [ebp+8] = N +; -- [ebp+12] = 4k-aligned data array address +; returns: +; -- nothing +; destroys: +; -- eax, ebx +; locals: +; -- 10 stack-located dwords (_t0 ... _t9) +;; ========================== +align 4 +step2: + push ebp + mov ebp, esp + sub esp, 40 + mov ebx, [ebp+12] + mov eax, [ebp+ 8] + shl eax, 3 + add eax, ebx + +align 4 .loop_i: ; -- quad subelements +0, +4, +8 and +12 (simpliest operations) @@ -163,7 +214,7 @@ step2: ; Step2 ; -- even subelements +2, +6, +10 and +14 (2 multiplications needed) fld qword[ebx+8*2] fld qword[ebx+8*6] - fld [_root] + fld [fht_r] fmul st1, st0 ; st : r, t2, t1 fld qword[ebx+8*10] fxch st1 ; st : r, t3, t2, t1 @@ -194,20 +245,20 @@ step2: ; Step2 fsub st0, st1 fxch st1 faddp st2, st0 ; st : (f[l3]-f[l7]), (f[l3]+f[l7]) - fld [_root2] + fld [fht_r2] fmul st2, st0 fmulp st1, st0 ; st : t9, t6 fld qword[ebx+8*3] fld st0 fadd st0, st2 ; st : t1, f[l5], t9, t6 - fstp [_t1] + fstp _t1 fsub st0, st1 - fstp [_t2] - fstp [_t9] ; (t9 never used) - fstp [_t6] ; st : + fstp _t2 + fstp _t9 ; (t9 never used) + fstp _t6 ; st : - fld [_c1] - fld [_s1] + fld [fht_c1] + fld [fht_s1] fld qword[ebx+8*5] fld qword[ebx+8*7] fld st3 ; st: c1, f[l6], f[l2], s1, c1 @@ -215,13 +266,13 @@ step2: ; Step2 fld st1 ; st: f_6, f_2*c, f_6, f_2, s, c fmul st0, st4 ; st: f_6*s, f_2*c, f_6, f_2, s, c faddp st1, st0 ; st: t5, f_6, f_2, s, c - fstp [_t5] ; st: f_6, f_2, s, c + fstp _t5 ; st: f_6, f_2, s, c fld st3 ; st: c, f_6, f_2, s, c fmul st0, st1 fld st3 fmul st0, st3 ; st: f_2*s, f_6*c, f_6, f_2, s, c fsubp st1, st0 ; st: t8, f_6, f_2, s, c - fstp [_t8] ; st: f_6, f_2, s, c + fstp _t8 ; st: f_6, f_2, s, c fstp st0 ; st: f_2, s, c fstp st0 ; st: s, c @@ -232,51 +283,51 @@ step2: ; Step2 fld st3 fmul st0, st3 ; st: f_4*s, f_8*c, f_8, f_4, s, c faddp st1, st0 ; st: t7, f_8, f_4, s, c - fld [_t5] ; st: t5, t7, f_8, f_4, s, c + fld _t5 ; st: t5, t7, f_8, f_4, s, c fsub st0, st1 ; st: t4, t7, f_8, f_4, s, c - fstp [_t4] - fstp [_t7] ; st: f_8, f_4, s, c + fstp _t4 + fstp _t7 ; st: f_8, f_4, s, c fld st3 ; st: c, f_8, f_4, s, c fmul st0, st2 fld st3 fmul st0, st2 ; st: f_8*s, f_4*c, f_8, f_4, s, c fsubp st1, st0 ; st:-t0, f_8, f_4, s, c fchs - fld [_t8] + fld _t8 fchs ; st:-t8, t0, f_8, f_4, s, c fsub st0, st1 ; st: t3, t0, f_8, f_4, s, c - fstp [_t3] - fstp [_t0] ; st: f_8, f_4, s, c + fstp _t3 + fstp _t0 ; st: f_8, f_4, s, c fstp st0 ; st: f_4, s, c fstp st0 ; st: s, c fstp st0 ; st: c fstp st0 ; st: - fld [_t1] - fld [_t4] + fld _t1 + fld _t4 fld st1 fsub st0, st1 fstp qword[ebx+8*11] ; f[l7] = t1-t4 faddp st1, st0 fstp qword[ebx+8*3] ; f[l5] = t1+t4 - fld [_t2] - fld [_t3] + fld _t2 + fld _t3 fld st1 fsub st0, st1 fstp qword[ebx+8*15] ; f[l8] faddp st1, st0 fstp qword[ebx+8*7] ; f[l6] - fld [_t6] + fld _t6 fld qword[ebx+8] fld st1 fsub st0, st1 fxch st1 faddp st2, st0 ; st : t2, t1 - fld [_t8] - fsub [_t0] - fld [_t5] - fadd [_t7] ; st : t4, t3, t2, t1 + fld _t8 + fsub _t0 + fld _t5 + fadd _t7 ; st : t4, t3, t2, t1 fld st3 fsub st0, st1 @@ -294,36 +345,42 @@ step2: ; Step2 cmp ebx, eax jb .loop_i - ret - -align 8 ; shared local vars -_t0 dq 0 -_t1 dq 0 -_t2 dq 0 -_t3 dq 0 -_t4 dq 0 -_t5 dq 0 -_t6 dq 0 -_t7 dq 0 -_t8 dq 0 -_t9 dq 0 + mov esp, ebp + pop ebp +ret -;=================================================================== + +;================================================================= +; cdecl parameters: +; -- [ebp+8] = N +; -- [ebp+12] = p +; -- [ebp+16] = 4k-aligned data array address +; -- [ebp+20] = 4k-aligned SinCosTable address +; returns: +; -- nothing +; destroys: +; -- all GPRegs +; locals: +; -- 120 stack-located dwords (_t0 ... _t9, _l0..._step) +;; ========================== +align 4 step3: -;=================================================================== - + push ebp + mov ebp, esp + sub esp, 120 ; 283 : { ; 293 : for (l=3; l<=p; l++) mov cx, 0x0200 +align 4 .newstep: inc ch - cmp ch, Power_of_4 + cmp ch, byte[ebp+12] jg .done - mov [.step], cx + mov _step, cx ; 294 : { ; 295 : d1 = 1 << (l + l - 3); @@ -333,61 +390,63 @@ step3: sub cl, 3 mov edx, 1 shl edx, cl - mov [.d1], edx + mov _d1, edx ; 296 : d2 = d1 << 1; shl edx, 1 - mov [.d2], edx + mov _d2, edx mov eax, edx ; 297 : d3 = d2 << 1; shl edx, 1 - mov [.d3], edx + mov _d3, edx ; 298 : d4 = d2 + d3; add eax, edx - mov [.d4], eax + mov _d4, eax ; 299 : d5 = d3 << 1; shl edx, 1 - mov [.d5], edx + mov _d5, edx shl edx, 3 - mov [.d6], edx ; d6 = d5*8 to simplify index operations + mov _d6, edx ; d6 = d5*8 to simplify index operations ; 339 : j5 = N / d5; ; moved out of internal loop - mov cl, Power_of_4 + mov cl, [ebp+12] sub cl, ch add cl, cl mov edx, 1 shl edx, cl - mov [.j5], edx + mov _j5, edx ; 300 : ; 301 : for (j=0; j f[j+k] + mov _l1, edx ; [ebx+edx*8] --> f[j+k] ; l2 = l1 + d2; add edx, eax - mov [.l2], edx + mov _l2, edx ; l3 = l1 + d3; add edx, eax - mov [.l3], edx + mov _l3, edx ; l4 = l1 + d4; add edx, eax - mov [.l4], edx + mov _l4, edx ; l5 = j + d2 - k; mov edx, eax sub edx, ecx - mov [.l5], edx + mov _l5, edx ; l6 = l5 + d2; add edx, eax - mov [.l6], edx + mov _l6, edx ; l7 = l5 + d3; add edx, eax - mov [.l7], edx + mov _l7, edx ; l8 = l5 + d4; add edx, eax - mov [.l8], edx + mov _l8, edx ; 340 : j5 *= k; // add-substituted multiplication - mov eax, [.jj] - add eax, [.j5] - mov [.jj], eax + mov eax, _jj + add eax, _j5 + mov _jj, eax ; c1 = C[jj]; ; s1 = S[jj]; - mov edi, [_Cosins] + mov edi, [ebp+20] fld qword[edi+eax*8] - mov esi, [_Sines] + mov esi, [ebp+8] + shl esi, 2 + add esi, edi fld qword[esi+eax*8] ; st : s1, c1 ; t5 = f[l2] * c1 + f[l6] * s1; ; t8 = f[l6] * c1 - f[l2] * s1; - mov edx, [.l6] + mov edx, _l6 fld qword[ebx+edx*8] - mov edx, [.l2] + mov edx, _l2 fld st0 fmul st0, st2 fxch st1 @@ -521,10 +583,10 @@ step3: fmul st4, st0 fmulp st3, st0 ; st : f[l6]*c, f[l6]*s, f[l2]*s, f[l2]*c fsub st0, st2 ; st : t8, f[l6]*s, f[l2]*s, f[l2]*c - fstp [_t8] + fstp _t8 faddp st2, st0 ; st : f[l2]*s, t5 fstp st0 ; st : t5 - fstp [_t5] ; st : + fstp _t5 ; st : ; c2 = C[2*jj]; ; s2 = S[2*jj]; @@ -534,9 +596,9 @@ step3: ; t6 = f[l3] * c2 + f[l7] * s2; ; t9 = f[l7] * c2 - f[l3] * s2; - mov edx, [.l7] + mov edx, _l7 fld qword[ebx+edx*8] - mov edx, [.l3] + mov edx, _l3 fld st0 fmul st0, st2 fxch st1 @@ -545,22 +607,22 @@ step3: fmul st4, st0 fmulp st3, st0 ; st : f[l7]*c, f[l7]*s, f[l3]*s, f[l3]*c fsub st0, st2 ; st : t9, f[l7]*s, f[l3]*s, f[l3]*c - fstp [_t9] + fstp _t9 faddp st2, st0 ; st : f[l2]*s, t6 fstp st0 ; st : t6 - fstp [_t6] ; st : + fstp _t6 ; st : ; c3 = C[3*jj]; ; s3 = S[3*jj]; - add eax, [.jj] + add eax, _jj fld qword[edi+eax*8] fld qword[esi+eax*8] ; st : s3, c3 ; t7 = f[l4] * c3 + f[l8] * s3; ; t0 = f[l8] * c3 - f[l4] * s3; - mov edx, [.l8] + mov edx, _l8 fld qword[ebx+edx*8] - mov edx, [.l4] + mov edx, _l4 fld st0 fmul st0, st2 fxch st1 @@ -569,192 +631,162 @@ step3: fmul st4, st0 fmulp st3, st0 ; st : f[l8]*c, f[l8]*s, f[l4]*s, f[l4]*c fsub st0, st2 ; st : t9, f[l8]*s, f[l4]*s, f[l4]*c - fstp [_t0] + fstp _t0 faddp st2, st0 ; st : f[l2]*s, t7 fstp st0 ; st : t7 - fstp [_t7] ; st : + fstp _t7 ; st : ; t1 = f[l5] - t9; ; t2 = f[l5] + t9; - mov eax, [.l5] + mov eax, _l5 fld qword [ebx+eax*8] - fld [_t9] + fld _t9 fld st0 fadd st0, st2 - fstp [_t2] + fstp _t2 fsubp st1, st0 - fstp [_t1] + fstp _t1 ; t3 = - t8 - t0; - fld [_t8] - fadd [_t0] + fld _t8 + fadd _t0 fchs - fstp [_t3] + fstp _t3 ; t4 = t5 - t7; - fld [_t5] - fsub [_t7] - fstp [_t4] + fld _t5 + fsub _t7 + fstp _t4 ; f[l5] = t1 + t4; - fld [_t1] - fld [_t4] + fld _t1 + fld _t4 fld st0 fadd st0, st2 fstp qword [ebx+eax*8] ; f[l7] = t1 - t4; - mov eax, [.l7] + mov eax, _l7 fsubp st1, st0 fstp qword [ebx+eax*8] ; f[l6] = t2 + t3; - mov eax, [.l6] - fld [_t2] - fld [_t3] + mov eax, _l6 + fld _t2 + fld _t3 fld st0 fadd st0, st2 fstp qword [ebx+eax*8] ; f[l8] = t2 - t3; - mov eax, [.l8] + mov eax, _l8 fsubp st1, st0 fstp qword [ebx+eax*8] ; t1 = f[l1] + t6; - mov eax, [.l1] + mov eax, _l1 fld qword [ebx+eax*8] - fld [_t6] + fld _t6 fld st0 fadd st0, st2 - fstp [_t1] + fstp _t1 ; t2 = f[l1] - t6; fsubp st1, st0 - fstp [_t2] + fstp _t2 ; t3 = t8 - t0; - fld [_t8] - fsub [_t0] - fstp [_t3] + fld _t8 + fsub _t0 + fstp _t3 ; t4 = t5 + t7; - fld [_t5] - fadd [_t7] - fstp [_t4] + fld _t5 + fadd _t7 + fstp _t4 ; f[l1] = t1 + t4; - mov eax, [.l1] - fld [_t1] - fld [_t4] + mov eax, _l1 + fld _t1 + fld _t4 fld st0 fadd st0, st2 fstp qword [ebx+eax*8] ; f[l3] = t1 - t4; - mov eax, [.l3] + mov eax, _l3 fsubp st1, st0 fstp qword [ebx+eax*8] ; f[l2] = t2 + t3; - mov eax, [.l2] - fld [_t2] - fld [_t3] + mov eax, _l2 + fld _t2 + fld _t3 fld st0 fadd st0, st2 fstp qword [ebx+eax*8] ; f[l4] = t2 - t3; - mov eax, [.l4] + mov eax, _l4 fsubp st1, st0 fstp qword [ebx+eax*8] ; 374 : } jmp .next_k +align 4 .done_k: ; 375 : } - add ebx, [.d6] ; d6 = d5*8 - cmp ebx, [.end_of_array] + add ebx, _d6 ; d6 = d5*8 + cmp ebx, _end_of_array jb .next_j ; 376 : } - mov cx, [.step] + mov cx, _step jmp .newstep .done: - + mov esp, ebp + pop ebp ; 377 : } ret -align 4 -.l1 dd 0 -.l2 dd 0 -.l3 dd 0 -.l4 dd 0 -.l5 dd 0 -.l6 dd 0 -.l7 dd 0 -.l8 dd 0 -.l9 dd 0 -.l0 dd 0 -.d1 dd 0 -.d2 dd 0 -.d3 dd 0 -.d4 dd 0 -.d5 dd 0 -.d6 dd 0 -.j5 dd 0 -.jj dd 0 -.end_of_array dd 0 -.step dw 0 - -align 8 ;=========== Step3 ends here =========== - - - ; ================================================================= -; syscall entry -; -_f dd ? -_N dd 1024 ; number of points -_a dd ? ; initial data array -x dd 0 ; tranformed (float) data array -_Cosins dd 0 -_Sines dd 0 +;================================================================= +; parameters: +; -- [ebp+12] = N +; -- [ebp+16] = p +; -- [ebp+20] = 4k-aligned data array address +; -- [ebp+24] = 4k-aligned SinCosTable address +; returns: +; -- nothing +; destroys: +; -- all GPRegs +;; ========================== -FFT4: - or al, al - jnz .trans - mov cl, Power_of_4 - mov eax, 1 - shl eax, cl - shl eax, cl - mov [_N], eax - shl eax, 2 ; size of Sine table in bytes - add eax, ebx - mov [_Sines], ebx - mov [_Cosins], eax - cpuid - rdtsc - mov [.time], eax - call MakeSinCosTable - cpuid - rdtsc - sub eax, [.time] - ret -.trans: - mov [x], ebx - mov [_f], ebx - cli ;----- - cpuid - rdtsc - mov [.time], eax +align 4 + +FHT_4: + push ebp + mov ebp, esp + + mov edx, [ebp+20] ; a + mov dl, byte[ebp+16] call BitInvert - call step1 - call step2 + push dword[ebp+20] ; a + push ecx ; N + call step1 ; 4-point transform + cmp cl, 1 + jz .done + call step2 ; 16-point transform + cmp byte[ebp+16],1 ; p = 2 ? + jz .done + pop edx ; N + pop ecx ; a + push dword[ebp+24] ; t + push ecx + push dword[ebp+16] ; p + push edx ; N call step3 - cpuid - rdtsc - sti ;---- - sub eax, [.time] - ret +.done: + mov esp, ebp + pop ebp -.time dd 0 +ret diff --git a/kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM b/kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM new file mode 100644 index 0000000000..2326c871ab --- /dev/null +++ b/kernel/branches/Kolibri-A/utilities/FFT/FHT4B.ASM @@ -0,0 +1,207 @@ +;======================================================================== +;= = +;= Fast Hartley Transform routine demo for KolibriOS = +;= = +;= Copyright (C) 2010, Artem Jerdev = +;= = +;= refer to wiki.kolibtios.org for all details = +;= = +;======================================================================== + + + + +use32 + + org 0x0 + + db 'MENUET01' ; 8 byte id + dd 0x01 ; header version + dd START ; start of code + dd I_END ; size of image + dd 0x100000 ; memory for app + dd 0xbfffc ; esp + dd 0x0 , 0x0 ; I_Param , I_Icon + + +include 'macros.inc' +include 'debug.inc' +include 'FHT4i.inc' + + +START: ; start of execution + + call main + + + mov eax,-1 ; close this program + int 0x40 + + +;============================================================= +;Func: calculates a simple function +; ff = (int)(500*exp(-t) * cos (2.5*t)) +; uses: eax, ebx +;------------ +Func: + +; 9 : { + +; 10 : double x,t; +; 11 : int f; +; 12 : +; 13 : x = (i < N2) ? i : i - NUM_POINTS; + mov eax, [ii] + cmp eax, 512 + jge .index_negative + jmp .index_correct +.index_negative: + sub eax, 1024 +.index_correct: + mov [temp], eax +; fild [temp] + +; 14 : t = x / 16.0; +; f2xm1 argument (abs) must be less than 1, so + mov [t_mod], eax + and [t_mod], 0x0F ; x % 16 + shr eax, 4 ; x / 16 + mov [t_div], eax + fild [temp] + +; 15 : if (t<0) t = -t; + fabs +exp_ok: +; 16 : f = (int)(512*2^(-t) * cos (2.5*t)); + fchs + f2xm1 + fmul [f500] + fstp [tv93] + fld [f2_5] + fmul [tt] + fcos + fmul [tv93] + fstp [tt] + mov bx, word[tt+6] + shr bx,4 + and bx,0x07FF + add ax,bx + shl ax,4 + and word[tt+6], 0x800F + or word[tt+6], ax + fld [tt] + fstp [ff] + +; 17 : return f; +; 18 : } + ret +;--------------------------------------------------------- +; test data filler +; +; uses eax, ebx, ecx +FillData: +; 29 : for (i=0; i