an experimental kernel with a mad syscall and FHT inside

git-svn-id: svn://kolibrios.org@1641 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Artem Jerdev (art_zh) 2010-10-03 23:11:42 +00:00
parent abbc09c677
commit 112a3665cb
5 changed files with 661 additions and 479 deletions

View File

@ -76,9 +76,6 @@ pci_ext_config:
shl eax, 8
test eax, 0x000F0000 ; MMIO Base must be bus0-aligned
jnz .no_pcie_cfg
; -- it looks like a true PCIe config space;
ret ; <<<<<<<<<<< OK >>>>>>>>>>>
.no_pcie_cfg:
@ -92,6 +89,7 @@ pci_ext_config:
.pcie_failed:
mov esi, boot_pcie_fail
call boot_log
xor eax, eax
ret ; <<<<<<<<< FAILURE >>>>>>>>>

View File

@ -1,4 +1,4 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Copyright (C) KolibriOS team 2004-2007. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License ;;
@ -10,47 +10,16 @@ $Revision$
; Old style system call converter
align 16
cross_order:
; load all registers in crossed order
mov eax, ebx
mov ebx, ecx
mov ecx, edx
mov edx, esi
mov esi, edi
movzx edi, byte[esp+28 + 4]
; load all registers in crossed order
mov eax, ebx
mov ebx, ecx
mov ecx, edx
mov edx, esi
mov esi, edi
movzx edi, byte[esp+28 + 4]
sub edi, 53 ; all zeroes before
call dword [servetable+edi*4]
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; SYSENTER ENTRY ;;
;; (not used on AMD systems) ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;align 32
;sysenter_entry:
; ; Íàñòðàèâàåì ñòåê
; mov esp, [ss:tss._esp0]
; sti
; push ebp ; save app esp + 4
; mov ebp, [ebp] ; ebp - original ebp
; ;------------------
; pushad
; cld
;
; movzx eax, al
; call dword [servetable2 + eax * 4]
; popad
; ;------------------
; xchg ecx, [ss:esp] ; â âåðøèí ñòåêà - app ecx, ecx - app esp + 4
; sub ecx, 4
; xchg edx, [ecx] ; edx - return point, & save original edx
; push edx
; mov edx, [ss:esp + 4]
; mov [ecx + 4], edx ; save original ecx
; pop edx
; sysexit
call dword [servetable+edi*4]
ret
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
@ -60,27 +29,27 @@ cross_order:
align 16
i40:
pushad
cld
and eax, 0x07F
call dword [servetable2 + eax * 4]
popad
iretd
pushad
cld
and eax, 0x07F
call dword [servetable2 + eax * 4]
popad
iretd
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; SYSCALL ENTRY ;;
;; SYSCALL ENTRY -- NEW !!! ;;
;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
align 32
syscall_entry:
; push ecx
sti
and eax, 3
call dword [servetable3 + eax * 4]
; pop ecx
sysret
; sti
push ecx
and eax, 3
call dword [servetable3 + eax * 4]
pop ecx
sysret
iglobal
;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@ -89,18 +58,17 @@ iglobal
align 4
servetable:
dd socket ; 53-Socket interface
dd socket ; 53-Socket interface
dd 0
dd 0
dd 0
dd 0
dd file_system ; 58-Common file system interface
dd file_system ; 58-Common file system interface
dd 0
dd 0
dd 0
dd sys_pci ; 62-PCI functions
dd sys_msg_board ; 63-System message board
dd sys_pci ; 62-PCI functions
dd sys_msg_board ; 63-System message board
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; NEW SYSTEM FUNCTIONS TABLE ;;
@ -108,88 +76,88 @@ iglobal
align 4
servetable2:
dd syscall_draw_window ; 0-DrawWindow
dd syscall_setpixel ; 1-SetPixel
dd sys_getkey ; 2-GetKey
dd sys_clock ; 3-GetTime
dd syscall_writetext ; 4-WriteText
dd delay_hs ; 5-DelayHs
dd syscall_draw_window ; 0-DrawWindow
dd syscall_setpixel ; 1-SetPixel
dd sys_getkey ; 2-GetKey
dd sys_clock ; 3-GetTime
dd syscall_writetext ; 4-WriteText
dd delay_hs ; 5-DelayHs
dd syscall_openramdiskfile ; 6-OpenRamdiskFile
dd syscall_putimage ; 7-PutImage
dd syscall_button ; 8-DefineButton
dd sys_cpuusage ; 9-GetProcessInfo
dd sys_waitforevent ; 10-WaitForEvent
dd sys_getevent ; 11-CheckForEvent
dd sys_redrawstat ; 12-BeginDraw and EndDraw
dd syscall_drawrect ; 13-DrawRect
dd syscall_getscreensize ; 14-GetScreenSize
dd sys_background ; 15-bgr
dd sys_cachetodiskette ; 16-FlushFloppyCache
dd sys_getbutton ; 17-GetButton
dd sys_system ; 18-System Services
dd paleholder ; 19-reserved
dd sys_midi ; 20-ResetMidi and OutputMidi
dd sys_setup ; 21-SetMidiBase,SetKeymap,SetShiftKeymap,.
dd sys_settime ; 22-setting date,time,clock and alarm-clock
dd syscall_putimage ; 7-PutImage
dd syscall_button ; 8-DefineButton
dd sys_cpuusage ; 9-GetProcessInfo
dd sys_waitforevent ; 10-WaitForEvent
dd sys_getevent ; 11-CheckForEvent
dd sys_redrawstat ; 12-BeginDraw and EndDraw
dd syscall_drawrect ; 13-DrawRect
dd syscall_getscreensize ; 14-GetScreenSize
dd sys_background ; 15-bgr
dd sys_cachetodiskette ; 16-FlushFloppyCache
dd sys_getbutton ; 17-GetButton
dd sys_system ; 18-System Services
dd paleholder ; 19-reserved
dd sys_midi ; 20-ResetMidi and OutputMidi
dd sys_setup ; 21-SetMidiBase,SetKeymap,SetShiftKeymap,.
dd sys_settime ; 22-setting date,time,clock and alarm-clock
dd sys_wait_event_timeout ; 23-TimeOutWaitForEvent
dd syscall_cdaudio ; 24-PlayCdTrack,StopCd and GetCdPlaylist
dd undefined_syscall ; 25-reserved
dd sys_getsetup ; 26-GetMidiBase,GetKeymap,GetShiftKeymap,.
dd undefined_syscall ; 27-reserved
dd undefined_syscall ; 28-reserved
dd sys_date ; 29-GetDate
dd sys_current_directory ; 30-Get/SetCurrentDirectory
dd undefined_syscall ; 31-reserved
dd undefined_syscall ; 32-reserved
dd undefined_syscall ; 33-reserved
dd undefined_syscall ; 34-reserved
dd syscall_getpixel ; 35-GetPixel
dd syscall_getarea ; 36-GetArea
dd readmousepos ; 37-GetMousePosition_ScreenRelative,.
dd syscall_drawline ; 38-DrawLine
dd sys_getbackground ; 39-GetBackgroundSize,ReadBgrData,.
dd set_app_param ; 40-WantEvents
dd syscall_getirqowner ; 41-GetIrqOwner
dd get_irq_data ; 42-ReadIrqData
dd sys_outport ; 43-SendDeviceData
dd sys_programirq ; 44-ProgramIrqs
dd reserve_free_irq ; 45-ReserveIrq and FreeIrq
dd syscall_cdaudio ; 24-PlayCdTrack,StopCd and GetCdPlaylist
dd undefined_syscall ; 25-reserved
dd sys_getsetup ; 26-GetMidiBase,GetKeymap,GetShiftKeymap,.
dd undefined_syscall ; 27-reserved
dd undefined_syscall ; 28-reserved
dd sys_date ; 29-GetDate
dd sys_current_directory ; 30-Get/SetCurrentDirectory
dd undefined_syscall ; 31-reserved
dd undefined_syscall ; 32-reserved
dd undefined_syscall ; 33-reserved
dd undefined_syscall ; 34-reserved
dd syscall_getpixel ; 35-GetPixel
dd syscall_getarea ; 36-GetArea
dd readmousepos ; 37-GetMousePosition_ScreenRelative,.
dd syscall_drawline ; 38-DrawLine
dd sys_getbackground ; 39-GetBackgroundSize,ReadBgrData,.
dd set_app_param ; 40-WantEvents
dd syscall_getirqowner ; 41-GetIrqOwner
dd get_irq_data ; 42-ReadIrqData
dd sys_outport ; 43-SendDeviceData
dd sys_programirq ; 44-ProgramIrqs
dd reserve_free_irq ; 45-ReserveIrq and FreeIrq
dd syscall_reserveportarea ; 46-ReservePortArea and FreePortArea
dd display_number ; 47-WriteNum
dd display_number ; 47-WriteNum
dd syscall_display_settings ; 48-SetRedrawType and SetButtonType
dd sys_apm ; 49-Advanced Power Management (APM)
dd syscall_set_window_shape ; 50-Window shape & scale
dd syscall_threads ; 51-Threads
dd stack_driver_stat ; 52-Stack driver status
dd cross_order ; 53-Socket interface
dd undefined_syscall ; 54-reserved
dd syscall_threads ; 51-Threads
dd stack_driver_stat ; 52-Stack driver status
dd cross_order ; 53-Socket interface
dd undefined_syscall ; 54-reserved
dd sound_interface ; 55-Sound interface
dd undefined_syscall ; 56-reserved
dd sys_pcibios ; 57-PCI BIOS32
dd cross_order ; 58-Common file system interface
dd undefined_syscall ; 59-reserved
dd undefined_syscall ; 56-reserved
dd sys_pcibios ; 57-PCI BIOS32
dd cross_order ; 58-Common file system interface
dd undefined_syscall ; 59-reserved
dd sys_IPC ; 60-Inter Process Communication
dd sys_gs ; 61-Direct graphics access
dd cross_order ; 62-PCI functions
dd cross_order ; 63-System message board
dd sys_resize_app_memory ; 64-Resize application memory usage
dd sys_putimage_palette ; 65-PutImagePalette
dd sys_process_def ; 66-Process definitions - keyboard
dd syscall_move_window ; 67-Window move or resize
dd sys_gs ; 61-Direct graphics access
dd cross_order ; 62-PCI functions
dd cross_order ; 63-System message board
dd sys_resize_app_memory ; 64-Resize application memory usage
dd sys_putimage_palette ; 65-PutImagePalette
dd sys_process_def ; 66-Process definitions - keyboard
dd syscall_move_window ; 67-Window move or resize
dd f68 ; 68-Some internal services
dd sys_debug_services ; 69-Debug
dd file_system_lfn ; 70-Common file system interface, version 2
dd sys_debug_services ; 69-Debug
dd file_system_lfn ; 70-Common file system interface, version 2
dd syscall_window_settings ; 71-Window settings
dd sys_sendwindowmsg ; 72-Send window message
times 127 - ( ($-servetable2) /4 ) dd undefined_syscall
dd sys_end ; -1-end application
dd sys_sendwindowmsg ; 72-Send window message
times 127 - ( ($-servetable2) /4 ) dd undefined_syscall
dd sys_end ; -1-end application
align 4
servetable3:
dd FFT4 ; 0
dd FFT4 ; 1
dd paleholder ; 2
dd sys_end ; last
dd FHT_4 ; 0
dd FHT_4 ; 1
dd paleholder ; 2
dd sys_end ; last
endg

View File

@ -16,30 +16,6 @@
$Revision$
;struc db [a] { common . db a
; if ~used .
; display 'not used db: ',`.,13,10
; end if }
;struc dw [a] { common . dw a
; if ~used .
; display 'not used dw: ',`.,13,10
; end if }
;struc dd [a] { common . dd a
; if ~used .
; display 'not used dd: ',`.,13,10
; end if }
;struc dp [a] { common . dp a
; if ~used .
; display 'not used dp: ',`.,13,10
; end if }
;struc dq [a] { common . dq a
; if ~used .
; display 'not used dq: ',`.,13,10
; end if }
;struc dt [a] { common . dt a
; if ~used .
; display 'not used dt: ',`.,13,10
; end if }
struc POINT {
.x dd ?
@ -52,7 +28,7 @@ end virtual
struc RECT {
.left dd ?
.top dd ?
.top dd ?
.right dd ?
.bottom dd ?
.sizeof:
@ -63,7 +39,7 @@ end virtual
struc BOX {
.left dd ?
.top dd ?
.top dd ?
.width dd ?
.height dd ?
.sizeof:
@ -75,17 +51,17 @@ end virtual
struc DISPMODE {
.width rw 1
.height rw 1
.bpp rw 1
.bpp rw 1
.freq rw 1
}
; constants definition
WSTATE_NORMAL = 00000000b
WSTATE_NORMAL = 00000000b
WSTATE_MAXIMIZED = 00000001b
WSTATE_MINIMIZED = 00000010b
WSTATE_ROLLEDUP = 00000100b
WSTATE_REDRAW = 00000001b
WSTATE_REDRAW = 00000001b
WSTATE_WNDDRAWN = 00000010b
WSTYLE_HASCAPTION = 00010000b
@ -94,13 +70,13 @@ WSTYLE_CLIENTRELATIVE = 00100000b
struc TASKDATA
{
.event_mask dd ?
.pid dd ?
dw ?
.pid dd ?
dw ?
.state db ?
db ?
dw ?
db ?
dw ?
.wnd_number db ?
db ?
db ?
.mem_start dd ?
.counter_sum dd ?
.counter_add dd ?
@ -110,24 +86,24 @@ virtual at 0
TASKDATA TASKDATA
end virtual
TSTATE_RUNNING = 0
TSTATE_RUNNING = 0
TSTATE_RUN_SUSPENDED = 1
TSTATE_WAIT_SUSPENDED = 2
TSTATE_ZOMBIE = 3
TSTATE_ZOMBIE = 3
TSTATE_TERMINATING = 4
TSTATE_WAITING = 5
TSTATE_FREE = 9
TSTATE_WAITING = 5
TSTATE_FREE = 9
; structures definition
struc WDATA {
.box BOX
.cl_workarea dd ?
.cl_titlebar dd ?
.cl_frames dd ?
.reserved db ?
.fl_wstate db ?
.fl_wdrawn db ?
.fl_redraw db ?
.box BOX
.cl_workarea dd ?
.cl_titlebar dd ?
.cl_frames dd ?
.reserved db ?
.fl_wstate db ?
.fl_wdrawn db ?
.fl_redraw db ?
.sizeof:
}
virtual at 0
@ -137,47 +113,47 @@ label WDATA.fl_wstyle byte at WDATA.cl_workarea + 3
struc APPDATA
{
.app_name db 11 dup(?)
db 5 dup(?)
.app_name db 11 dup(?)
db 5 dup(?)
.fpu_state dd ? ;+16
.ev_count_ dd ? ;unused ;+20
.exc_handler dd ? ;+24
.except_mask dd ? ;+28
.pl0_stack dd ? ;unused ;+32
.heap_base dd ? ;+36
.heap_top dd ? ;+40
.cursor dd ? ;+44
.fd_ev dd ? ;+48
.bk_ev dd ? ;+52
.fd_obj dd ? ;+56
.bk_obj dd ? ;+60
.saved_esp dd ? ;+64
.io_map rd 2 ;+68
.dbg_state dd ? ;+76
.cur_dir dd ? ;+80
.wait_timeout dd ? ;+84
.saved_esp0 dd ? ;+88
.wait_begin dd ? ;+92 +++
.wait_test dd ? ;+96 +++
.wait_param dd ? ;+100 +++
.tls_base dd ? ;+104
.dlls_list_ptr dd ? ;+108
db 16 dup(?) ;+112
.fpu_state dd ? ;+16
.ev_count_ dd ? ;unused ;+20
.exc_handler dd ? ;+24
.except_mask dd ? ;+28
.pl0_stack dd ? ;unused ;+32
.heap_base dd ? ;+36
.heap_top dd ? ;+40
.cursor dd ? ;+44
.fd_ev dd ? ;+48
.bk_ev dd ? ;+52
.fd_obj dd ? ;+56
.bk_obj dd ? ;+60
.saved_esp dd ? ;+64
.io_map rd 2 ;+68
.dbg_state dd ? ;+76
.cur_dir dd ? ;+80
.wait_timeout dd ? ;+84
.saved_esp0 dd ? ;+88
.wait_begin dd ? ;+92 +++
.wait_test dd ? ;+96 +++
.wait_param dd ? ;+100 +++
.tls_base dd ? ;+104
.dlls_list_ptr dd ? ;+108
db 16 dup(?) ;+112
.wnd_shape dd ? ;+128
.wnd_shape_scale dd ? ;+132
dd ? ;+136
.mem_size dd ? ;+140
.saved_box BOX
.ipc_start dd ?
.ipc_size dd ?
.event_mask dd ?
.wnd_shape dd ? ;+128
.wnd_shape_scale dd ? ;+132
dd ? ;+136
.mem_size dd ? ;+140
.saved_box BOX
.ipc_start dd ?
.ipc_size dd ?
.event_mask dd ?
.debugger_slot dd ?
dd ?
dd ?
.keyboard_mode db ?
db 3 dup(?)
.dir_table dd ?
db 3 dup(?)
.dir_table dd ?
.dbg_event_mem dd ?
.dbg_regs:
.dbg_regs.dr0 dd ?
@ -185,7 +161,7 @@ struc APPDATA
.dbg_regs.dr2 dd ?
.dbg_regs.dr3 dd ?
.dbg_regs.dr7 dd ?
.wnd_caption dd ?
.wnd_caption dd ?
.wnd_clientbox BOX
}
virtual at 0
@ -211,7 +187,7 @@ include "core/sync.inc" ; macros for synhronization objects
include "core/sys32.inc" ; process management
include "core/sched.inc" ; process scheduling
include "core/syscall.inc" ; system call
include "core/fpu.inc" ; all fpu/sse support
include "core/fpu.inc" ; all fpu/sse support
include "core/memory.inc"
include "core/heap.inc" ; kernel and app heap
include "core/malloc.inc" ; small kernel heap
@ -220,7 +196,7 @@ include "core/dll.inc"
include "core/peload.inc" ;
include "core/exports.inc"
include "core/string.inc"
include "core/v86.inc" ; virtual-8086 manager
include "core/v86.inc" ; virtual-8086 manager
; GUI stuff
include "gui/window.inc"
@ -232,19 +208,19 @@ include "gui/button.inc"
; file system
include "fs/fs.inc" ; syscall
include "fs/fat32.inc" ; read / write for fat32 filesystem
include "fs/ntfs.inc" ; read / write for ntfs filesystem
include "fs/fat12.inc" ; read / write for fat12 filesystem
include "fs/fs.inc" ; syscall
include "fs/fat32.inc" ; read / write for fat32 filesystem
include "fs/ntfs.inc" ; read / write for ntfs filesystem
include "fs/fat12.inc" ; read / write for fat12 filesystem
include "blkdev/rd.inc" ; ramdisk read /write
include "fs/fs_lfn.inc" ; syscall, version 2
include "fs/iso9660.inc" ; read for iso9660 filesystem CD
include "fs/ext2.inc" ; read / write for ext2 filesystem
include "fs/ext2.inc" ; read / write for ext2 filesystem
; sound
include "sound/playnote.inc" ; player Note for Speaker PC
include "sound/FFT.inc" ; fast Fourier transform routines
include "sound/playnote.inc" ; player Note for Speaker PC
include "sound/FHT.inc" ; fast Fourier transform routines
; display
@ -311,3 +287,4 @@ include "core/ext_lib.inc"
; list of external functions
include "imports.inc"

View File

@ -5,62 +5,51 @@
; free KolibriOS version - not to be ported to other OSes
; ==========================================================
Power_of_4 equ 5
NumPoints equ 1024
N_2 equ NumPoints / 2
N_4 equ NumPoints / 4
;=================================================================
; global constants
align 8
_root dq 1.41421356237309504880169 ; = sqrt(2)
_root2 dq 0.70710678118654752440084 ; = sqrt(2)/2
_c1 dq 0.92387953251128675612818 ; = cos(pi/8)
_s1 dq 0.38268343236508977172846 ; = sin(pi/8)
_dx dq 0.00613592315154296875 ; pi/512
fht_r dq 1.41421356237309504880169 ; = sqrt(2)
fht_r2 dq 0.70710678118654752440084 ; = sqrt(2)/2
fht_c1 dq 0.92387953251128675612818 ; = cos(pi/8)
fht_s1 dq 0.38268343236508977172846 ; = sin(pi/8)
;[_CosTable] dd 0 ; N_2 elements
;[_SinTable] dd 0 ; N_2 elements
;=================================================================
; parameter1:
; -- reg dl (bits[3:0]) = Power_of_4
; -- reg edx && (-16) = 4k-aligned data array address
; returns:
; -- edx = Power_of_4
; -- ecx = N
; destroys:
; -- eax, ebx, ecx, edx, esi
;; ==========================
align 4
MakeSinCosTable:
mov ebx, [_Sines]
mov ecx, [_Cosins]
xor eax, eax
fld [_dx] ; st : dx
fldz ; st : 0, dx
.loop:
fld st0 ; st : x, x, dx
FSINCOS ; st : cos, sin, x, dx
fstp qword [ecx+eax*8] ; st : sin, x, dx
fstp qword [ebx+eax*8] ; st : x, dx
fadd st0, st1 ; st : x+dx, dx
inc eax
cmp eax, N_2
jne .loop
fstp st0 ; st : dx
fstp st0 ; st : <empty>
ret
; ================================================================
align 4
BitInvert:
mov esi, [x] ; array of qwords
xor ecx, ecx ; index term
mov esi, edx
and esi, 0xFFFFFFF0
and edx, 0x0F
push edx
mov cl, dl
xor eax, eax
inc eax
shl eax, cl
shl eax, cl
push eax
xor ecx, ecx ; index term
align 4
.newterm:
inc ecx
cmp ecx, NumPoints
cmp ecx, [esp] ; N
jge .done
xor eax, eax
mov edx, ecx
xor bl, bl
align 4
.do_invert:
inc bl
cmp bl, Power_of_4
cmp bl, byte[esp+4] ; Power_of_4
jg .switch
mov bh, dl
@ -69,6 +58,7 @@ BitInvert:
or al, bh
shr edx, 2
jmp .do_invert
align 8
.switch:
cmp eax, ecx
@ -80,17 +70,32 @@ BitInvert:
fstp qword [esi+ecx*8]
jmp .newterm
align 4
.done:
pop ecx
pop edx
ret
;=================================================================
;=================================================================
; stdcall parameters:
; -- [esp+4] = N
; -- [esp+8] = 4k-aligned data array address
; returns:
; -- nothing
; destroys:
; -- ebx, esi
;; ==========================
align 4
step1:
mov esi, [x]
mov ebx, esi
add esi, NumPoints*8
mov ebx, [esp+8]
mov esi, [esp+4]
shl esi, 3
add esi, ebx
align 4
.loop:
fld qword[ebx]
fld qword[ebx+8]
@ -119,19 +124,65 @@ step1:
add ebx, 32
cmp ebx, esi
jnz .loop
ret
ret
;
;===========================================================================
step2: ; Step2
; local stack definitions
;===========================================================================
_t0 equ dword [esp]
_t1 equ dword[esp+4]
_t2 equ dword[esp+8]
_t3 equ dword[esp+12]
_t4 equ dword[esp+16]
_t5 equ dword[esp+20]
_t6 equ dword[esp+24]
_t7 equ dword[esp+28]
_t8 equ dword[esp+32]
_t9 equ dword[esp+36]
mov eax, [_f]
mov ebx, eax
add eax, NumPoints*8
_l1 equ dword[esp+40]
_l2 equ dword[esp+44]
_l3 equ dword[esp+48]
_l4 equ dword[esp+52]
_l5 equ dword[esp+56]
_l6 equ dword[esp+60]
_l7 equ dword[esp+64]
_l8 equ dword[esp+68]
_l9 equ dword[esp+72]
_l0 equ dword[esp+76]
_d1 equ dword[esp+80]
_d2 equ dword[esp+84]
_d3 equ dword[esp+88]
_d4 equ dword[esp+92]
_d5 equ dword[esp+96]
_d6 equ dword[esp+100]
_j5 equ dword[esp+104]
_jj equ dword[esp+108]
_end_of_array equ dword[esp+112]
_step equ word [esp+116]
;=================================================================
; cdecl parameters:
; -- [ebp+8] = N
; -- [ebp+12] = 4k-aligned data array address
; returns:
; -- nothing
; destroys:
; -- eax, ebx
; locals:
; -- 10 stack-located dwords (_t0 ... _t9)
;; ==========================
align 4
step2:
push ebp
mov ebp, esp
sub esp, 40
mov ebx, [ebp+12]
mov eax, [ebp+ 8]
shl eax, 3
add eax, ebx
align 4
.loop_i:
; -- quad subelements +0, +4, +8 and +12 (simpliest operations)
@ -163,7 +214,7 @@ step2: ; Step2
; -- even subelements +2, +6, +10 and +14 (2 multiplications needed)
fld qword[ebx+8*2]
fld qword[ebx+8*6]
fld [_root]
fld [fht_r]
fmul st1, st0 ; st : r, t2, t1
fld qword[ebx+8*10]
fxch st1 ; st : r, t3, t2, t1
@ -194,20 +245,20 @@ step2: ; Step2
fsub st0, st1
fxch st1
faddp st2, st0 ; st : (f[l3]-f[l7]), (f[l3]+f[l7])
fld [_root2]
fld [fht_r2]
fmul st2, st0
fmulp st1, st0 ; st : t9, t6
fld qword[ebx+8*3]
fld st0
fadd st0, st2 ; st : t1, f[l5], t9, t6
fstp [_t1]
fstp _t1
fsub st0, st1
fstp [_t2]
fstp [_t9] ; (t9 never used)
fstp [_t6] ; st : <empty>
fstp _t2
fstp _t9 ; (t9 never used)
fstp _t6 ; st : <empty>
fld [_c1]
fld [_s1]
fld [fht_c1]
fld [fht_s1]
fld qword[ebx+8*5]
fld qword[ebx+8*7]
fld st3 ; st: c1, f[l6], f[l2], s1, c1
@ -215,13 +266,13 @@ step2: ; Step2
fld st1 ; st: f_6, f_2*c, f_6, f_2, s, c
fmul st0, st4 ; st: f_6*s, f_2*c, f_6, f_2, s, c
faddp st1, st0 ; st: t5, f_6, f_2, s, c
fstp [_t5] ; st: f_6, f_2, s, c
fstp _t5 ; st: f_6, f_2, s, c
fld st3 ; st: c, f_6, f_2, s, c
fmul st0, st1
fld st3
fmul st0, st3 ; st: f_2*s, f_6*c, f_6, f_2, s, c
fsubp st1, st0 ; st: t8, f_6, f_2, s, c
fstp [_t8] ; st: f_6, f_2, s, c
fstp _t8 ; st: f_6, f_2, s, c
fstp st0 ; st: f_2, s, c
fstp st0 ; st: s, c
@ -232,51 +283,51 @@ step2: ; Step2
fld st3
fmul st0, st3 ; st: f_4*s, f_8*c, f_8, f_4, s, c
faddp st1, st0 ; st: t7, f_8, f_4, s, c
fld [_t5] ; st: t5, t7, f_8, f_4, s, c
fld _t5 ; st: t5, t7, f_8, f_4, s, c
fsub st0, st1 ; st: t4, t7, f_8, f_4, s, c
fstp [_t4]
fstp [_t7] ; st: f_8, f_4, s, c
fstp _t4
fstp _t7 ; st: f_8, f_4, s, c
fld st3 ; st: c, f_8, f_4, s, c
fmul st0, st2
fld st3
fmul st0, st2 ; st: f_8*s, f_4*c, f_8, f_4, s, c
fsubp st1, st0 ; st:-t0, f_8, f_4, s, c
fchs
fld [_t8]
fld _t8
fchs ; st:-t8, t0, f_8, f_4, s, c
fsub st0, st1 ; st: t3, t0, f_8, f_4, s, c
fstp [_t3]
fstp [_t0] ; st: f_8, f_4, s, c
fstp _t3
fstp _t0 ; st: f_8, f_4, s, c
fstp st0 ; st: f_4, s, c
fstp st0 ; st: s, c
fstp st0 ; st: c
fstp st0 ; st: <empty>
fld [_t1]
fld [_t4]
fld _t1
fld _t4
fld st1
fsub st0, st1
fstp qword[ebx+8*11] ; f[l7] = t1-t4
faddp st1, st0
fstp qword[ebx+8*3] ; f[l5] = t1+t4
fld [_t2]
fld [_t3]
fld _t2
fld _t3
fld st1
fsub st0, st1
fstp qword[ebx+8*15] ; f[l8]
faddp st1, st0
fstp qword[ebx+8*7] ; f[l6]
fld [_t6]
fld _t6
fld qword[ebx+8]
fld st1
fsub st0, st1
fxch st1
faddp st2, st0 ; st : t2, t1
fld [_t8]
fsub [_t0]
fld [_t5]
fadd [_t7] ; st : t4, t3, t2, t1
fld _t8
fsub _t0
fld _t5
fadd _t7 ; st : t4, t3, t2, t1
fld st3
fsub st0, st1
@ -294,36 +345,42 @@ step2: ; Step2
cmp ebx, eax
jb .loop_i
ret
align 8 ; shared local vars
_t0 dq 0
_t1 dq 0
_t2 dq 0
_t3 dq 0
_t4 dq 0
_t5 dq 0
_t6 dq 0
_t7 dq 0
_t8 dq 0
_t9 dq 0
mov esp, ebp
pop ebp
ret
;===================================================================
;=================================================================
; cdecl parameters:
; -- [ebp+8] = N
; -- [ebp+12] = p
; -- [ebp+16] = 4k-aligned data array address
; -- [ebp+20] = 4k-aligned SinCosTable address
; returns:
; -- nothing
; destroys:
; -- all GPRegs
; locals:
; -- 120 stack-located dwords (_t0 ... _t9, _l0..._step)
;; ==========================
align 4
step3:
;===================================================================
push ebp
mov ebp, esp
sub esp, 120
; 283 : {
; 293 : for (l=3; l<=p; l++)
mov cx, 0x0200
align 4
.newstep:
inc ch
cmp ch, Power_of_4
cmp ch, byte[ebp+12]
jg .done
mov [.step], cx
mov _step, cx
; 294 : {
; 295 : d1 = 1 << (l + l - 3);
@ -333,61 +390,63 @@ step3:
sub cl, 3
mov edx, 1
shl edx, cl
mov [.d1], edx
mov _d1, edx
; 296 : d2 = d1 << 1;
shl edx, 1
mov [.d2], edx
mov _d2, edx
mov eax, edx
; 297 : d3 = d2 << 1;
shl edx, 1
mov [.d3], edx
mov _d3, edx
; 298 : d4 = d2 + d3;
add eax, edx
mov [.d4], eax
mov _d4, eax
; 299 : d5 = d3 << 1;
shl edx, 1
mov [.d5], edx
mov _d5, edx
shl edx, 3
mov [.d6], edx ; d6 = d5*8 to simplify index operations
mov _d6, edx ; d6 = d5*8 to simplify index operations
; 339 : j5 = N / d5; ; moved out of internal loop
mov cl, Power_of_4
mov cl, [ebp+12]
sub cl, ch
add cl, cl
mov edx, 1
shl edx, cl
mov [.j5], edx
mov _j5, edx
; 300 :
; 301 : for (j=0; j<N; j+=d5)
mov esi, [_f]
mov ebx, esi
add esi, NumPoints*8
mov [.end_of_array], esi
mov ebx, [ebp+16]
mov esi, [ebp+8]
shl esi, 3
add esi, ebx
mov _end_of_array, esi
align 4
.next_j:
; {
; t1 = f[j] + f[j+d2];
mov eax, [.d2]
mov eax, _d2
fld qword[ebx]
fld qword[ebx+eax*8]
fld st1
fadd st0, st1
fstp [_t1]
fstp _t1
; t2 = f[j] - f[j+d2];
fsubp st1, st0
fstp [_t2]
fstp _t2
; t3 = f[j+d3] + f[j+d4];
mov edi, [.d3]
mov edi, _d3
fld qword[ebx+edi*8]
mov edx, [.d4]
mov edx, _d4
fld qword[ebx+edx*8]
fld st1
fsub st0, st1 ; st : t4, f4, f3
@ -398,7 +457,7 @@ step3:
; f[j+d4] = t2 - t4;
; f[j+d3] = t2 + t4;
fld [_t2]
fld _t2
fld st0
fsub st0, st2 ; st : f4, t2, t4, t3
fstp qword[ebx+edx*8] ; st : t2, t4, t3
@ -407,7 +466,7 @@ step3:
; f[j+d2] = t1 - t3;
; f[j] = t1 + t3;
fld [_t1]
fld _t1
fst st1
fsub st0, st2 ; st : f2, t1, t3
fstp qword[ebx+eax*8] ; st : t1, t3
@ -416,7 +475,7 @@ step3:
fstp st0
; jj = j + d1; / ??
mov edi, [.d1]
mov edi, _d1
shl edi, 3 ; = d1*8
mov edx, edi
mov eax, edi
@ -432,7 +491,7 @@ step3:
; t2 = f[jj+d2] * r;
fld qword [edi+eax]
fld [_root]
fld [fht_r]
fmul st1, st0 ; st : r, t2, t3, t1
; t4 = f[jj+d4] * r
fmul qword [edx+eax] ; st : t4, t2, t3, t1
@ -461,58 +520,61 @@ step3:
; for (k=1; k<d1; k++)
xor ecx, ecx ; ecx = k
mov [.jj], ecx
mov _jj, ecx
align 4
.next_k:
inc ecx
cmp ecx, [.d1]
cmp ecx, _d1
jge .done_k
; {
mov eax, [.d2] ; the sector increment
mov eax, _d2 ; the sector increment
; l1 = j + k;
mov edx, ecx
mov [.l1], edx ; [ebx+edx*8] --> f[j+k]
mov _l1, edx ; [ebx+edx*8] --> f[j+k]
; l2 = l1 + d2;
add edx, eax
mov [.l2], edx
mov _l2, edx
; l3 = l1 + d3;
add edx, eax
mov [.l3], edx
mov _l3, edx
; l4 = l1 + d4;
add edx, eax
mov [.l4], edx
mov _l4, edx
; l5 = j + d2 - k;
mov edx, eax
sub edx, ecx
mov [.l5], edx
mov _l5, edx
; l6 = l5 + d2;
add edx, eax
mov [.l6], edx
mov _l6, edx
; l7 = l5 + d3;
add edx, eax
mov [.l7], edx
mov _l7, edx
; l8 = l5 + d4;
add edx, eax
mov [.l8], edx
mov _l8, edx
; 340 : j5 *= k; // add-substituted multiplication
mov eax, [.jj]
add eax, [.j5]
mov [.jj], eax
mov eax, _jj
add eax, _j5
mov _jj, eax
; c1 = C[jj];
; s1 = S[jj];
mov edi, [_Cosins]
mov edi, [ebp+20]
fld qword[edi+eax*8]
mov esi, [_Sines]
mov esi, [ebp+8]
shl esi, 2
add esi, edi
fld qword[esi+eax*8] ; st : s1, c1
; t5 = f[l2] * c1 + f[l6] * s1;
; t8 = f[l6] * c1 - f[l2] * s1;
mov edx, [.l6]
mov edx, _l6
fld qword[ebx+edx*8]
mov edx, [.l2]
mov edx, _l2
fld st0
fmul st0, st2
fxch st1
@ -521,10 +583,10 @@ step3:
fmul st4, st0
fmulp st3, st0 ; st : f[l6]*c, f[l6]*s, f[l2]*s, f[l2]*c
fsub st0, st2 ; st : t8, f[l6]*s, f[l2]*s, f[l2]*c
fstp [_t8]
fstp _t8
faddp st2, st0 ; st : f[l2]*s, t5
fstp st0 ; st : t5
fstp [_t5] ; st : <empty>
fstp _t5 ; st : <empty>
; c2 = C[2*jj];
; s2 = S[2*jj];
@ -534,9 +596,9 @@ step3:
; t6 = f[l3] * c2 + f[l7] * s2;
; t9 = f[l7] * c2 - f[l3] * s2;
mov edx, [.l7]
mov edx, _l7
fld qword[ebx+edx*8]
mov edx, [.l3]
mov edx, _l3
fld st0
fmul st0, st2
fxch st1
@ -545,22 +607,22 @@ step3:
fmul st4, st0
fmulp st3, st0 ; st : f[l7]*c, f[l7]*s, f[l3]*s, f[l3]*c
fsub st0, st2 ; st : t9, f[l7]*s, f[l3]*s, f[l3]*c
fstp [_t9]
fstp _t9
faddp st2, st0 ; st : f[l2]*s, t6
fstp st0 ; st : t6
fstp [_t6] ; st : <empty>
fstp _t6 ; st : <empty>
; c3 = C[3*jj];
; s3 = S[3*jj];
add eax, [.jj]
add eax, _jj
fld qword[edi+eax*8]
fld qword[esi+eax*8] ; st : s3, c3
; t7 = f[l4] * c3 + f[l8] * s3;
; t0 = f[l8] * c3 - f[l4] * s3;
mov edx, [.l8]
mov edx, _l8
fld qword[ebx+edx*8]
mov edx, [.l4]
mov edx, _l4
fld st0
fmul st0, st2
fxch st1
@ -569,192 +631,162 @@ step3:
fmul st4, st0
fmulp st3, st0 ; st : f[l8]*c, f[l8]*s, f[l4]*s, f[l4]*c
fsub st0, st2 ; st : t9, f[l8]*s, f[l4]*s, f[l4]*c
fstp [_t0]
fstp _t0
faddp st2, st0 ; st : f[l2]*s, t7
fstp st0 ; st : t7
fstp [_t7] ; st : <empty>
fstp _t7 ; st : <empty>
; t1 = f[l5] - t9;
; t2 = f[l5] + t9;
mov eax, [.l5]
mov eax, _l5
fld qword [ebx+eax*8]
fld [_t9]
fld _t9
fld st0
fadd st0, st2
fstp [_t2]
fstp _t2
fsubp st1, st0
fstp [_t1]
fstp _t1
; t3 = - t8 - t0;
fld [_t8]
fadd [_t0]
fld _t8
fadd _t0
fchs
fstp [_t3]
fstp _t3
; t4 = t5 - t7;
fld [_t5]
fsub [_t7]
fstp [_t4]
fld _t5
fsub _t7
fstp _t4
; f[l5] = t1 + t4;
fld [_t1]
fld [_t4]
fld _t1
fld _t4
fld st0
fadd st0, st2
fstp qword [ebx+eax*8]
; f[l7] = t1 - t4;
mov eax, [.l7]
mov eax, _l7
fsubp st1, st0
fstp qword [ebx+eax*8]
; f[l6] = t2 + t3;
mov eax, [.l6]
fld [_t2]
fld [_t3]
mov eax, _l6
fld _t2
fld _t3
fld st0
fadd st0, st2
fstp qword [ebx+eax*8]
; f[l8] = t2 - t3;
mov eax, [.l8]
mov eax, _l8
fsubp st1, st0
fstp qword [ebx+eax*8]
; t1 = f[l1] + t6;
mov eax, [.l1]
mov eax, _l1
fld qword [ebx+eax*8]
fld [_t6]
fld _t6
fld st0
fadd st0, st2
fstp [_t1]
fstp _t1
; t2 = f[l1] - t6;
fsubp st1, st0
fstp [_t2]
fstp _t2
; t3 = t8 - t0;
fld [_t8]
fsub [_t0]
fstp [_t3]
fld _t8
fsub _t0
fstp _t3
; t4 = t5 + t7;
fld [_t5]
fadd [_t7]
fstp [_t4]
fld _t5
fadd _t7
fstp _t4
; f[l1] = t1 + t4;
mov eax, [.l1]
fld [_t1]
fld [_t4]
mov eax, _l1
fld _t1
fld _t4
fld st0
fadd st0, st2
fstp qword [ebx+eax*8]
; f[l3] = t1 - t4;
mov eax, [.l3]
mov eax, _l3
fsubp st1, st0
fstp qword [ebx+eax*8]
; f[l2] = t2 + t3;
mov eax, [.l2]
fld [_t2]
fld [_t3]
mov eax, _l2
fld _t2
fld _t3
fld st0
fadd st0, st2
fstp qword [ebx+eax*8]
; f[l4] = t2 - t3;
mov eax, [.l4]
mov eax, _l4
fsubp st1, st0
fstp qword [ebx+eax*8]
; 374 : }
jmp .next_k
align 4
.done_k:
; 375 : }
add ebx, [.d6] ; d6 = d5*8
cmp ebx, [.end_of_array]
add ebx, _d6 ; d6 = d5*8
cmp ebx, _end_of_array
jb .next_j
; 376 : }
mov cx, [.step]
mov cx, _step
jmp .newstep
.done:
mov esp, ebp
pop ebp
; 377 : }
ret
align 4
.l1 dd 0
.l2 dd 0
.l3 dd 0
.l4 dd 0
.l5 dd 0
.l6 dd 0
.l7 dd 0
.l8 dd 0
.l9 dd 0
.l0 dd 0
.d1 dd 0
.d2 dd 0
.d3 dd 0
.d4 dd 0
.d5 dd 0
.d6 dd 0
.j5 dd 0
.jj dd 0
.end_of_array dd 0
.step dw 0
align 8
;=========== Step3 ends here ===========
; =================================================================
; syscall entry
;
_f dd ?
_N dd 1024 ; number of points
_a dd ? ; initial data array
x dd 0 ; tranformed (float) data array
_Cosins dd 0
_Sines dd 0
;=================================================================
; parameters:
; -- [ebp+12] = N
; -- [ebp+16] = p
; -- [ebp+20] = 4k-aligned data array address
; -- [ebp+24] = 4k-aligned SinCosTable address
; returns:
; -- nothing
; destroys:
; -- all GPRegs
;; ==========================
FFT4:
or al, al
jnz .trans
mov cl, Power_of_4
mov eax, 1
shl eax, cl
shl eax, cl
mov [_N], eax
shl eax, 2 ; size of Sine table in bytes
add eax, ebx
mov [_Sines], ebx
mov [_Cosins], eax
cpuid
rdtsc
mov [.time], eax
call MakeSinCosTable
cpuid
rdtsc
sub eax, [.time]
ret
.trans:
mov [x], ebx
mov [_f], ebx
cli ;-----
cpuid
rdtsc
mov [.time], eax
align 4
FHT_4:
push ebp
mov ebp, esp
mov edx, [ebp+20] ; a
mov dl, byte[ebp+16]
call BitInvert
call step1
call step2
push dword[ebp+20] ; a
push ecx ; N
call step1 ; 4-point transform
cmp cl, 1
jz .done
call step2 ; 16-point transform
cmp byte[ebp+16],1 ; p = 2 ?
jz .done
pop edx ; N
pop ecx ; a
push dword[ebp+24] ; t
push ecx
push dword[ebp+16] ; p
push edx ; N
call step3
cpuid
rdtsc
sti ;----
sub eax, [.time]
ret
.done:
mov esp, ebp
pop ebp
.time dd 0
ret

View File

@ -0,0 +1,207 @@
;========================================================================
;= =
;= Fast Hartley Transform routine demo for KolibriOS =
;= =
;= Copyright (C) 2010, Artem Jerdev <kolibri@jerdev.co.uk> =
;= =
;= refer to wiki.kolibtios.org for all details =
;= =
;========================================================================
use32
org 0x0
db 'MENUET01' ; 8 byte id
dd 0x01 ; header version
dd START ; start of code
dd I_END ; size of image
dd 0x100000 ; memory for app
dd 0xbfffc ; esp
dd 0x0 , 0x0 ; I_Param , I_Icon
include 'macros.inc'
include 'debug.inc'
include 'FHT4i.inc'
START: ; start of execution
call main
mov eax,-1 ; close this program
int 0x40
;=============================================================
;Func: calculates a simple function
; ff = (int)(500*exp(-t) * cos (2.5*t))
; uses: eax, ebx
;------------
Func:
; 9 : {
; 10 : double x,t;
; 11 : int f;
; 12 :
; 13 : x = (i < N2) ? i : i - NUM_POINTS;
mov eax, [ii]
cmp eax, 512
jge .index_negative
jmp .index_correct
.index_negative:
sub eax, 1024
.index_correct:
mov [temp], eax
; fild [temp]
; 14 : t = x / 16.0;
; f2xm1 argument (abs) must be less than 1, so
mov [t_mod], eax
and [t_mod], 0x0F ; x % 16
shr eax, 4 ; x / 16
mov [t_div], eax
fild [temp]
; 15 : if (t<0) t = -t;
fabs
exp_ok:
; 16 : f = (int)(512*2^(-t) * cos (2.5*t));
fchs
f2xm1
fmul [f500]
fstp [tv93]
fld [f2_5]
fmul [tt]
fcos
fmul [tv93]
fstp [tt]
mov bx, word[tt+6]
shr bx,4
and bx,0x07FF
add ax,bx
shl ax,4
and word[tt+6], 0x800F
or word[tt+6], ax
fld [tt]
fstp [ff]
; 17 : return f;
; 18 : }
ret
;---------------------------------------------------------
; test data filler
;
; uses eax, ebx, ecx
FillData:
; 29 : for (i=0; i<NUM_POINTS; i++)
; here : ecx = i
xor ecx, ecx
.funcloop:
; 30 : {
; 31 : ia[i] = Func(i);
mov [ii], ecx
call Func
fld [ff]
fstp qword [edx+ecx*8]
; 32 : }
inc ecx
cmp ecx, [_in] ; ecx == N ?
jne .funcloop
ret
;====================================================================
; main
;====================================================================
align 4
_ia dd 0
_ii dd 0
_ip dd 0
_in dd 0
_it dd 0
;-----------------
main:
mov eax, 68
mov ebx, 11
int 0x40
fninit
mov cl, 2 ; power of 4
mov byte[_ip], cl
mov eax, 1
shl eax, cl
shl eax, cl
mov [_in], eax
mov dl, cl
call CreateSinCosTable
mov [_it], edx
mov ecx, [_in]
shl ecx, 3
mov ebx, 12
mov eax, 68
int 0x40
mov [_ia], eax
mov edx, eax
call FillData
cpuid
rdtsc
mov [t_0], eax
push [_it]
push [_ia]
push [_ip]
push [_in]
; call FHT_4
xor eax, eax
syscall
add esp, 16
cpuid
rdtsc
mov [t_1], eax
sub eax, [t_0]
debug_print_hex eax
print '<- fht time'
mov edx, [_it]
call DestroySinCosTable
mov ecx, [_ia]
mov ebx, 13
mov eax, 68
int 0x40
ret
; ========================================================
; static data
;----------------
align 8
;f18 dq 0x4032000000000000
f256 dq 256.01f
f14_2 dq 14.2f
f500 dq 0x407f400000000000
f2_5 dq 0x4004000000000000
tt dq ?
tv93 dq ?
t_div dd ?
t_mod dd ?
temp dd ?
ff dq ? ; return value (int)
ii dd ? ; argument (int) = array index
t_1 dd ?
t_0 dd ?
fcontrol dw 0x0037f
title db ' Fast Hartley Transform Test - A.Jerdev 2010'
I_END: