VESA 32bpp optimisation

git-svn-id: svn://kolibrios.org@1707 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
Artem Jerdev (art_zh) 2010-11-23 10:44:43 +00:00
parent 3d0324ed25
commit 1feef39077
4 changed files with 486 additions and 605 deletions

View File

@ -154,7 +154,7 @@ drm:
add eax,ecx ; save picture under mouse
add ebx,edx
push ecx
call getpixel
call get_pixel
mov [COLOR_TEMP],ecx
pop ecx
mov eax,edx

View File

@ -379,8 +379,8 @@ high_code:
; === EGA, VGA & Vesa 1.2 modes not supported ===
setvesa20:
v20ga32:
mov [PUTPIXEL],dword Vesa20_putpixel32
mov [GETPIXEL],dword Vesa20_getpixel32
mov [PUTPIXEL],dword put_pixel
mov [GETPIXEL],dword get_pixel
; -------- Fast System Call init ----------
.SEnP:

View File

@ -143,14 +143,14 @@
;
; 0x80010000 -> 6CBFF kernel, 32-bit run-time code (up to 371 Kb)
; in the current version:
; -> 01726 16-bit code end
; -> 01828 16-bit data end
; -> 11828 32-bit code start
; -> 2E19E 32-bit code end
; -> 314F8..end_of_kernel zero-filled zone after preinit_mem
; -> 34DFB uninitialized globals start
; -> 3CFEA end_of_kernel
; -> 3D000 not used (190k)
; -> 00B37 16-bit code end
; -> 00C40 16-bit data end
; -> 10C40 32-bit code start
; -> 2D582 32-bit code end
; -> 30918..end_of_kernel zero-filled zone after preinit_mem
; -> 3421B uninitialized globals start
; -> 3C40A end_of_kernel
; -> 3D000 not used (194k)
; 0x80050000 -> 090000 zero-filled zone after preinit_mem
; 0x8006CC00 -> 6DBFF stack at boot time (4Kb)
;

View File

@ -20,17 +20,6 @@
$Revision$
; If you're planning to write your own video driver I suggest
; you replace the VESA12.INC file and see those instructions.
;Screen_Max_X equ 0xfe00
;Screen_Max_Y equ 0xfe04
;BytesPerScanLine equ 0xfe08
;LFBAddress equ 0xfe80
;ScreenBPP equ 0xfbf1
;*************************************************
; getpixel
;
@ -41,26 +30,12 @@ $Revision$
; ret:
; ecx = 00 RR GG BB
getpixel:
push eax ebx edx edi
call dword [GETPIXEL]
pop edi edx ebx eax
ret
Vesa20_getpixel24:
; eax = x
; ebx = y
imul ebx, [BytesPerScanLine] ; ebx = y * y multiplier
lea edi, [eax+eax*2] ; edi = x*3
add edi, ebx ; edi = x*3+(y*y multiplier)
mov ecx, [LFB_BASE+edi]
and ecx, 0xffffff
ret
Vesa20_getpixel32:
imul ebx, [BytesPerScanLine] ; ebx = y * y multiplier
lea edi, [ebx+eax*4] ; edi = x*4+(y*y multiplier)
mov ecx, [LFB_BASE+edi]
get_pixel:
mov ecx, [BytesPerScanLine]
imul ecx, ebx
lea ecx, [ecx+eax*4] ; ecx = x*4+(y*y multiplier)
mov ecx, [ecx+LFB_BASE]
and ecx, 0xffffff
ret
@ -129,10 +104,7 @@ vesa20_putimage:
mov [putimg.abs_cy], ebx
; real_sx = MIN(wnd_sx-image_cx, image_sx);
mov ebx, [eax-twdw + WDATA.box.width] ; ebx = wnd_sx
; \begin{diamond}[20.08.2006]
; note that WDATA.box.width is one pixel less than real window x-size
inc ebx
; \end{diamond}[20.08.2006]
inc ebx ; WDATA.box.width is one pixel less than real window x-size
sub ebx, [putimg.image_cx]
ja @f
add esp, putimg.stack_data
@ -146,9 +118,7 @@ vesa20_putimage:
mov [putimg.real_sx], ebx
; init real_sy
mov ebx, [eax-twdw + WDATA.box.height] ; ebx = wnd_sy
; \begin{diamond}[20.08.2006]
inc ebx
; \end{diamond}[20.08.2006]
sub ebx, [putimg.image_cy]
ja @f
add esp, putimg.stack_data
@ -164,8 +134,6 @@ vesa20_putimage:
mov eax, [putimg.image_sx]
mov ecx, [putimg.real_sx]
sub eax, ecx
;; imul eax, [putimg.source_bpp]
; lea eax, [eax + eax * 2]
call esi
add eax, [putimg.arg_0]
mov [putimg.line_increment], eax
@ -176,9 +144,8 @@ vesa20_putimage:
mov [putimg.winmap_newline], eax
; screen new line increment
mov eax, [BytesPerScanLine]
movzx ebx, byte [ScreenBPP]
shr ebx, 3
imul ecx, ebx
shl ecx, 1
shl ecx, 1
sub eax, ecx
mov [putimg.screen_newline], eax
; pointer to image
@ -187,9 +154,8 @@ vesa20_putimage:
mov edx, [putimg.abs_cy]
imul edx, [BytesPerScanLine]
mov eax, [putimg.abs_cx]
movzx ebx, byte [ScreenBPP]
shr ebx, 3
imul eax, ebx
shl eax, 1
shl eax, 1
add edx, eax
; pointer to pixel map
mov eax, [putimg.abs_cy]
@ -200,59 +166,12 @@ vesa20_putimage:
xchg eax, ebp
; get process number
mov ebx, [CURRENT_TASK]
cmp byte [ScreenBPP], 32
je put_image_end_32
;put_image_end_24:
mov edi, [putimg.real_sy]
align 4
.new_line:
mov ecx, [putimg.real_sx]
; push ebp edx
align 4
.new_x:
push [putimg.edi]
mov eax, [putimg.ebp+4]
call eax
cmp [ebp], bl
jne .skip
; mov eax, [esi] ; eax = RRBBGGRR
mov [LFB_BASE+edx], ax
shr eax, 16
mov [LFB_BASE+edx+2], al
.skip:
; add esi, 3 ;[putimg.source_bpp]
add edx, 3
inc ebp
dec ecx
jnz .new_x
; pop edx ebp
add esi, [putimg.line_increment]
add edx, [putimg.screen_newline] ;[BytesPerScanLine]
add ebp, [putimg.winmap_newline] ;[Screen_Max_X]
; inc ebp
cmp [putimg.ebp], putimage_get1bpp
jz .correct
cmp [putimg.ebp], putimage_get2bpp
jz .correct
cmp [putimg.ebp], putimage_get4bpp
jnz @f
.correct:
mov eax, [putimg.edi]
mov byte [eax], 80h
@@:
dec edi
jnz .new_line
.finish:
add esp, putimg.stack_data
popad
ret
put_image_end_32:
mov edi, [putimg.real_sy]
align 4
.new_line:
mov ecx, [putimg.real_sx]
; push ebp edx
align 4
.new_x:
push [putimg.edi]
@ -260,19 +179,15 @@ align 4
call eax
cmp [ebp], bl
jne .skip
; mov eax, [esi] ; ecx = RRBBGGRR
mov [LFB_BASE+edx], eax
.skip:
; add esi, [putimg.source_bpp]
add edx, 4
inc ebp
dec ecx
jnz .new_x
; pop edx ebp
add esi, [putimg.line_increment]
add edx, [putimg.screen_newline] ;[BytesPerScanLine]
add ebp, [putimg.winmap_newline] ;[Screen_Max_X]
; inc ebp
cmp [putimg.ebp], putimage_get1bpp
jz .correct
cmp [putimg.ebp], putimage_get2bpp
@ -288,11 +203,8 @@ align 4
.finish:
add esp, putimg.stack_data
popad
; call VGA__putimage
; mov [EGA_counter],1
ret
;*************************************************
align 4
__sys_putpixel:
@ -302,55 +214,44 @@ __sys_putpixel:
; ecx = ?? RR GG BB ; 0x01000000 negation
; edi = 0x00000001 force
;;; mov [novesachecksum], dword 0
pushad
cmp [Screen_Max_X], eax
jb .exit
cmp [Screen_Max_Y], ebx
jb .exit
.check_forced:
test edi,1 ; force ?
jnz .forced
jnz .checked
; not forced:
push eax
.not_forced:
push edx
mov edx,[_display.width] ; screen x size
imul edx, ebx
add eax, [_WinMapAddress]
add edx, [_WinMapAddress]
movzx edx, byte [eax+edx]
cmp edx, [CURRENT_TASK]
pop eax
pop edx
jne .exit
.forced:
; check if negation
; OK to set pixel
.checked:
push ebx
imul ebx, [BytesPerScanLine]
lea ebx, [ebx+eax*4]
test ecx,0x01000000
jz .noneg
call getpixel
mov ecx, [LFB_BASE+ebx]
not ecx
mov [esp+32-8],ecx
and ecx, 0x01FFFFFF
.noneg:
; OK to set pixel
call dword [PUTPIXEL] ; call the real put_pixel function
mov [LFB_BASE+ebx], ecx
pop ebx
.exit:
popad
ret
align 4
Vesa20_putpixel24:
; eax = x
; ebx = y
imul ebx, [BytesPerScanLine] ; ebx = y * y multiplier
lea edi, [eax+eax*2] ; edi = x*3
mov eax, [esp+32-8+4]
mov [LFB_BASE+ebx+edi], ax
shr eax, 16
mov [LFB_BASE+ebx+edi+2], al
ret
align 4
Vesa20_putpixel32:
put_pixel: ; left for compatibility with Vesa20_putpixel32
; eax = x
; ebx = y
imul ebx, [BytesPerScanLine] ; ebx = y * y multiplier
@ -359,6 +260,7 @@ Vesa20_putpixel32:
mov [LFB_BASE+edi], eax
ret
;*************************************************
;align 4
@ -492,50 +394,117 @@ align 4
.exit:
add esp, 6*4
popa
; dec [mouse_pause]
call [draw_pointer]
ret
align 4
hline:
; draw an horizontal line
; ------------ draw a horizontal line -------------
; eax = x1
; edx = x2
; ebx = y
; ecx = color
; edi = force ?
push eax edx
cmp edx, eax ; make sure x2 is above x1
cmp ebx, [Screen_Max_Y]
jge .out
push eax ebp esi ebx edx
mov ebp, [_display.width] ; ebp = screen co-ords base
imul ebp, ebx
add ebp, [_WinMapAddress]
cmp edx, eax ; to make sure x2 > x1
jge @f
xchg eax, edx
align 4
@@:
call [putpixel]
cmp eax, [Screen_Max_X]
jge .exit
imul ebx, [BytesPerScanLine]
add ebx, LFB_BASE
cmp edx, [Screen_Max_X] ; last check
jb .draw
mov edx, [Screen_Max_X]
.draw: ; -- the line ---
test edi,1 ; forced ?
jnz .checked
; check whether the line covered by other windows
movzx esi, byte [ebp+eax]
cmp esi, [CURRENT_TASK]
jne .nextpixel
.checked:
test ecx,0x01000000
jz .noneg
mov ecx, [ebx+eax*4]
not ecx
and ecx, 0x01FFFFFF ; keep bit[24] high !
.noneg:
mov [ebx+eax*4], ecx
.nextpixel:
inc eax
cmp eax, edx
jle @b
pop edx eax
jle .draw
.exit:
pop edx ebx esi ebp eax
.out:
ret
align 4
vline:
; draw a vertical line
; --------- draw a vertical line ------------
; eax = x
; ebx = y1
; edx = y2
; ecx = color
; edi = force ?
push ebx edx
cmp edx, ebx ; make sure y2 is above y1
cmp eax, [Screen_Max_X]
jge .out
push eax ebp esi ebx edx
mov ebp, [_display.width] ; ebp = screen co-ords base
imul ebp, ebx
add ebp, [_WinMapAddress]
add ebp, eax
cmp edx, ebx ; to make sure y2 > y1
jge @f
xchg ebx, edx
align 4
@@:
call [putpixel]
cmp ebx, [Screen_Max_Y]
jge .exit
push ebx
imul ebx, [BytesPerScanLine]
shl eax, 1
shl eax, 1
add eax, ebx
add eax, LFB_BASE
pop ebx ; restore ebx = y1
cmp edx, [Screen_Max_Y] ; the last check
jb .draw
mov edx, [Screen_Max_Y] ; to prevent off-screen drawing
.draw: ; (vertical line itself)
test edi,1 ; forced ?
jnz .checked
; check whether the line covered by other windows
movzx esi, byte [ebp]
cmp esi, [CURRENT_TASK]
jne .nextpixel
.checked:
test ecx,0x01000000
jz .noneg
mov ecx, [eax]
not ecx
and ecx, 0x01FFFFFF ; keep bit[24] high !
.noneg:
mov [eax], ecx
.nextpixel:
add eax, [BytesPerScanLine]
add ebp, [_display.width]
inc ebx
cmp ebx, edx
jle @b
pop edx ebx
jle .draw
.exit:
pop edx ebx esi ebp eax
.out:
ret
@ -570,9 +539,9 @@ vesa20_drawbar:
sub esp, drbar.stack_data
mov [drbar.color], edi
sub edx, ebx
jle .exit ;// mike.dld, 2005-01-29
jle .exit
sub ecx, eax
jle .exit ;// mike.dld, 2005-01-29
jle .exit
mov [drbar.bar_sy], edx
mov [drbar.bar_sx], ecx
mov [drbar.bar_cx], eax
@ -584,13 +553,11 @@ vesa20_drawbar:
mov [drbar.abs_cy], ebx
; real_sx = MIN(wnd_sx-bar_cx, bar_sx);
mov ebx, [edi-twdw + WDATA.box.width] ; ebx = wnd_sx
; \begin{diamond}[20.08.2006]
; note that WDATA.box.width is one pixel less than real window x-size
inc ebx
; \end{diamond}[20.08.2006]
sub ebx, [drbar.bar_cx]
ja @f
.exit: ;// mike.dld, 2005-01-29
.exit:
add esp, drbar.stack_data
popad
xor eax, eax
@ -604,9 +571,7 @@ vesa20_drawbar:
mov [drbar.real_sx], ebx
; real_sy = MIN(wnd_sy-bar_cy, bar_sy);
mov ebx, [edi-twdw + WDATA.box.height] ; ebx = wnd_sy
; \begin{diamond}[20.08.2006]
inc ebx
; \end{diamond}
sub ebx, [drbar.bar_cy]
ja @f
add esp, drbar.stack_data
@ -627,9 +592,8 @@ vesa20_drawbar:
mov [drbar.line_inc_map], eax
; line_inc_scr
mov eax, [drbar.real_sx]
movzx ebx, byte [ScreenBPP]
shr ebx, 3
imul eax, ebx
shl eax, 1
shl eax, 1
neg eax
add eax, [BytesPerScanLine]
mov [drbar.line_inc_scr], eax
@ -637,9 +601,8 @@ vesa20_drawbar:
mov edx, [drbar.abs_cy]
imul edx, [BytesPerScanLine]
mov eax, [drbar.abs_cx]
; movzx ebx, byte [ScreenBPP]
; shr ebx, 3
imul eax, ebx
shl eax, 1
shl eax, 1
add edx, eax
; pointer to pixel map
mov eax, [drbar.abs_cy]
@ -650,12 +613,8 @@ vesa20_drawbar:
xchg eax, ebp
; get process number
mov ebx, [CURRENT_TASK]
cmp byte [ScreenBPP], 24
jne draw_bar_end_32
draw_bar_end_24:
mov eax, [drbar.color] ;; BBGGRR00
mov bh, al ;; bh = BB
shr eax, 8 ;; eax = RRGG
draw_bar_end_32:
; eax - color high RRGG
; bl - process num
; bh - color low BB
@ -663,42 +622,6 @@ draw_bar_end_24:
; edx - pointer to screen
; esi - counter
; edi - counter
mov esi, [drbar.real_sy]
align 4
.new_y:
mov edi, [drbar.real_sx]
align 4
.new_x:
cmp byte [ebp], bl
jne .skip
mov [LFB_BASE+edx], bh
mov [LFB_BASE+edx + 1], ax
.skip:
; add pixel
add edx, 3
inc ebp
dec edi
jnz .new_x
; add line
add edx, [drbar.line_inc_scr]
add ebp, [drbar.line_inc_map]
; <Ivan 15.10.04> drawing gradient bars
test eax, 0x00800000
jz @f
test bh, bh
jz @f
dec bh
@@:
; </Ivan 15.10.04>
dec esi
jnz .new_y
add esp, drbar.stack_data
popad
xor eax, eax
ret
draw_bar_end_32:
mov eax, [drbar.color] ;; BBGGRR00
mov esi, [drbar.real_sy]
align 4
@ -731,8 +654,10 @@ align 4
jnz .new_y
add esp, drbar.stack_data
popad
xor eax, eax
ret
align 4
vesa20_drawbackground_tiled:
call [_display.disable_mouse]
@ -749,10 +674,7 @@ dp2:
add ebp, eax
add ebp, eax
add ebp, eax
cmp [ScreenBPP], byte 24 ; 24 or 32 bpp ? - x size
jz @f
add ebp, eax
@@:
add ebp, LFB_BASE
; ebp:=Y*BytesPerScanLine+X*BytesPerPixel+AddrLFB
call calculate_edi
@ -796,9 +718,7 @@ nbgp:
add esi, 3
add edi, 3
@@:
cmp [ScreenBPP], byte 25 ; 24 or 32 bpp?
sbb edi, -1 ; +1 for 32 bpp
; I do not use 'inc eax' because this is slightly slower then 'add eax,1'
inc edi ; +1 for 32 bpp
add ebp, edx
add eax, edx
cmp eax, [draw_data+32+RECT.right]
@ -855,10 +775,8 @@ vesa20_drawbackground_stretch:
add ebp, eax
add ebp, eax
add ebp, eax
cmp [ScreenBPP], byte 24 ; 24 or 32 bpp ? - x size
jz @f
add ebp, eax
@@:
; ebp:=Y*BytesPerScanLine+X*BytesPerPixel+AddrLFB
call calculate_edi
xchg edi, ebp
@ -941,11 +859,10 @@ sdp3a:
mov [LFB_BASE+edi+2], al
snbgp:
cmp [ScreenBPP], byte 25
sbb edi, -4
add ebp, 1
add edi, 4
inc ebp
mov eax, [esp+20]
add eax, 1
inc eax
mov [esp+20], eax
add esi, 4
cmp eax, [draw_data+32+RECT.right]
@ -953,7 +870,7 @@ snbgp:
sdp4:
; next y
mov ebx, [esp+24]
add ebx, 1
inc ebx
mov [esp+24], ebx
cmp ebx, [draw_data+32+RECT.bottom]
ja sdpdone
@ -961,14 +878,11 @@ sdp4:
sub eax, [draw_data+32+RECT.left]
sub ebp, eax
add ebp, [Screen_Max_X]
add ebp, 1
inc ebp
sub edi, eax
sub edi, eax
sub edi, eax
cmp [ScreenBPP], byte 24
jz @f
sub edi, eax
@@:
add edi, [BytesPerScanLine]
; restore ecx,edx; advance esi to next background line
mov eax, [esp+28]
@ -1017,7 +931,7 @@ smooth_line:
@@:
stosd
mov eax, [esp+20+8]
add eax, 1
inc eax
mov [esp+20+8], eax
cmp eax, [draw_data+32+RECT.right]
ja @f
@ -1035,38 +949,6 @@ smooth_line:
align 16
overlapping_of_points:
if 0
; this version of procedure works, but is slower than next version
push ecx edx
mov edx, eax
push esi
shr ecx, 24
mov esi, ecx
mov ecx, ebx
movzx ebx, dl
movzx eax, cl
sub eax, ebx
movzx ebx, dh
imul eax, esi
add dl, ah
movzx eax, ch
sub eax, ebx
imul eax, esi
add dh, ah
ror ecx, 16
ror edx, 16
movzx eax, cl
movzx ebx, dl
sub eax, ebx
imul eax, esi
pop esi
add dl, ah
mov eax, edx
pop edx
ror eax, 16
pop ecx
ret
else
push ecx edx
mov edx, eax
push esi
@ -1094,7 +976,6 @@ else
ror eax, 16
pop ecx
ret
end if
iglobal
align 4