turbocat ea1a60faa3 Upload DGEN port source
git-svn-id: svn://kolibrios.org@9837 a494cfbc-eb01-0410-851d-a64ba20cac60
2022-06-15 18:25:17 +00:00

1095 lines
16 KiB
NASM

bits 32
section .text
%define which [ebp+36] ; int which
%define line [ebp+40] ; int line
%define where [ebp+44] ; unsigned char *where
%define vram [ebp+24] ; unsigned char *vram
%define reg [ebp+28] ; unsigned char reg[0x20]
%define highpal [ebp+32] ; unsigned int *highpal
;%define cache_align times ($$-$) & 3 nop ; Align to 4-byte boundary
;%define cache_align times ($$-$) & 7 nop ; Align to 8-byte boundary
%define cache_align times ($$-$) & 31 nop ; Align to 32-byte boundary
global asm_tiles_init
global drawtile1_solid
global drawtile1
global drawtile2_solid
global drawtile2
global drawtile3_solid
global drawtile3
global drawtile4_solid
global drawtile4
; Neat utility macro
%macro triple_xor 2
xor %1, %2 ; Triple XOR for a neat register exchange ;)
xor %2, %1
xor %1, %2
%endmacro
%macro blit_pixel1 1-* ; 8bpp blitting, solid
mov eax, ebx
and eax, %1
%if %0 > 1
shr eax, byte %2
%endif
or eax, [esi]
mov byte [edi], al
inc edi
%endmacro
%macro blit_pixel1_trans 1-* ; 8bpp blitting, transparent
mov eax, ebx
and eax, %1
jz %%trans
%if %0 > 1
shr eax, byte %2
%endif
or eax, [esi]
mov byte [edi], al
%%trans:
inc edi
%endmacro
%macro blit_pixel2 1-* ; 16bpp blitting, solid
mov eax, ebx
and eax, %1
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4]
mov eax, [edx]
mov word [edi], ax
add edi, byte 2
%endmacro
%macro blit_pixel2_trans 1-* ; 16bpp blitting, transparent
mov eax, ebx
and eax, %1
jz %%trans
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4]
mov eax, [edx]
mov word [edi], ax
%%trans:
add edi, byte 2
%endmacro
%macro blit_pixel3 1-* ; 24bpp blitting, solid
mov eax, ebx
and eax, %1
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4+1]
mov ax, word [edx]
mov word [edi], ax
add edi, 2
dec edx
mov al, byte [edx]
mov byte [edi], al
inc edi
%endmacro
%macro blit_pixel3_trans 1-* ; 24bpp blitting, transparent
mov eax, ebx
and eax, %1
jz %%trans
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4+1]
mov ax, word [edx]
mov word [edi], ax
add edi, 2
dec edx
mov al, byte [edx]
mov byte [edi], al
inc edi
jmp %%next
%%trans:
add edi, byte 3
%%next:
%endmacro
%macro blit_pixel4 1-* ; 32bpp blitting, solid
mov eax, ebx
and eax, %1
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4]
mov eax, [edx]
mov [edi], eax
add edi, byte 4
%endmacro
%macro blit_pixel4_trans 1-* ; 32bpp blitting, transparent
mov eax, ebx
and eax, %1
jz %%trans
%if %0 > 1
shr eax, byte %2
%endif
lea edx, [esi+eax*4]
mov eax, [edx]
mov [edi], eax
%%trans:
add edi, byte 4
%endmacro
; ----------------------------------------
; int _asm_tiles_init
; (unsigned char *vram, unsigned char *reg, unsigned char *highpal)
; ----------------------------------------
cache_align
asm_tiles_init:
push eax
push ebx
push edx
push esp
push ebp
mov ebp, esp
mov eax, vram
mov ebx, reg
mov edx, highpal
mov [__vram], eax
mov [__reg], ebx
mov [__highpal], edx
pop ebp
pop esp
pop edx
pop ebx
pop eax
ret
cache_align
; ----------------------------------------
; int _drawtile1_solid
; (int which, int line, unsigned char *where)
; ----------------------------------------
cache_align
drawtile1_solid:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
mov edi, [__reg]
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel1 0x0f000000, 24 ; pixel 8
blit_pixel1 0xf0000000, 28 ; ..... 7
blit_pixel1 0x000f0000, 16 ; ..... 6
blit_pixel1 0x00f00000, 20 ; ..... 5
blit_pixel1 0x00000f00, 8 ; ..... 4
blit_pixel1 0x0000f000, 12 ; ..... 3
blit_pixel1 0x0000000f ; ..... 2
blit_pixel1 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel1 0x000000f0, 4 ; pixel 1
blit_pixel1 0x0000000f ; ..... 2
blit_pixel1 0x0000f000, 12 ; ..... 3
blit_pixel1 0x00000f00, 8 ; ..... 4
blit_pixel1 0x00f00000, 20 ; ..... 5
blit_pixel1 0x000f0000, 16 ; ..... 6
blit_pixel1 0xf0000000, 28 ; ..... 7
blit_pixel1 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
popad
ret
cache_align
; ----------------------------------------
drawtile1:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
test ebx, ebx
jz near .cleanup ; Don't waste time if the tile is blank!
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel1_trans 0x0f000000, 24 ; pixel 8
blit_pixel1_trans 0xf0000000, 28 ; ..... 7
blit_pixel1_trans 0x000f0000, 16 ; ..... 6
blit_pixel1_trans 0x00f00000, 20 ; ..... 5
blit_pixel1_trans 0x00000f00, 8 ; ..... 4
blit_pixel1_trans 0x0000f000, 12 ; ..... 3
blit_pixel1_trans 0x0000000f ; ..... 2
blit_pixel1_trans 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel1_trans 0x000000f0, 4 ; pixel 1
blit_pixel1_trans 0x0000000f ; ..... 2
blit_pixel1_trans 0x0000f000, 12 ; ..... 3
blit_pixel1_trans 0x00000f00, 8 ; ..... 4
blit_pixel1_trans 0x00f00000, 20 ; ..... 5
blit_pixel1_trans 0x000f0000, 16 ; ..... 6
blit_pixel1_trans 0xf0000000, 28 ; ..... 7
blit_pixel1_trans 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
popad
ret
cache_align
; ----------------------------------------
cache_align
drawtile2_solid:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov ecx, esi
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
; -
mov edi, [__reg]
mov edx, [edi + 7]
push dword [esi]
and edx, 0x3f
mov eax, [ecx + edx*4]
mov [esi], eax
; -
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel2 0x0f000000, 24 ; pixel 8
blit_pixel2 0xf0000000, 28 ; ..... 7
blit_pixel2 0x000f0000, 16 ; ..... 6
blit_pixel2 0x00f00000, 20 ; ..... 5
blit_pixel2 0x00000f00, 8 ; ..... 4
blit_pixel2 0x0000f000, 12 ; ..... 3
blit_pixel2 0x0000000f ; ..... 2
blit_pixel2 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel2 0x000000f0, 4 ; pixel 1
blit_pixel2 0x0000000f ; ..... 2
blit_pixel2 0x0000f000, 12 ; ..... 3
blit_pixel2 0x00000f00, 8 ; ..... 4
blit_pixel2 0x00f00000, 20 ; ..... 5
blit_pixel2 0x000f0000, 16 ; ..... 6
blit_pixel2 0xf0000000, 28 ; ..... 7
blit_pixel2 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
pop dword [esi]
popad
ret
cache_align
; ----------------------------------------
cache_align
drawtile2:
pushad
mov ebp, esp
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov eax, ebx
shr eax, 7
and eax, 0xc0
add esi, eax
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
test ebx, ebx
jz near .cleanup ; Don't waste time if the tile is blank!
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel2_trans 0x0f000000, 24 ; pixel 8
blit_pixel2_trans 0xf0000000, 28 ; ..... 7
blit_pixel2_trans 0x000f0000, 16 ; ..... 6
blit_pixel2_trans 0x00f00000, 20 ; ..... 5
blit_pixel2_trans 0x00000f00, 8 ; ..... 4
blit_pixel2_trans 0x0000f000, 12 ; ..... 3
blit_pixel2_trans 0x0000000f ; ..... 2
blit_pixel2_trans 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel2_trans 0x000000f0, 4 ; pixel 1
blit_pixel2_trans 0x0000000f ; ..... 2
blit_pixel2_trans 0x0000f000, 12 ; ..... 3
blit_pixel2_trans 0x00000f00, 8 ; ..... 4
blit_pixel2_trans 0x00f00000, 20 ; ..... 5
blit_pixel2_trans 0x000f0000, 16 ; ..... 6
blit_pixel2_trans 0xf0000000, 28 ; ..... 7
blit_pixel2_trans 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
popad
ret
cache_align
; ----------------------------------------
drawtile3_solid:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov ecx, esi
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
; -
mov edi, [__reg]
mov edx, [edi + 7]
push dword [esi]
and edx, 0x3f
mov eax, [ecx + edx*4]
mov [esi], eax
; -
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel3 0x0f000000, 24 ; pixel 8
blit_pixel3 0xf0000000, 28 ; ..... 7
blit_pixel3 0x000f0000, 16 ; ..... 6
blit_pixel3 0x00f00000, 20 ; ..... 5
blit_pixel3 0x00000f00, 8 ; ..... 4
blit_pixel3 0x0000f000, 12 ; ..... 3
blit_pixel3 0x0000000f ; ..... 2
blit_pixel3 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel3 0x000000f0, 4 ; pixel 1
blit_pixel3 0x0000000f ; ..... 2
blit_pixel3 0x0000f000, 12 ; ..... 3
blit_pixel3 0x00000f00, 8 ; ..... 4
blit_pixel3 0x00f00000, 20 ; ..... 5
blit_pixel3 0x000f0000, 16 ; ..... 6
blit_pixel3 0xf0000000, 28 ; ..... 7
blit_pixel3 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
pop dword [esi]
popad
ret
cache_align
; ----------------------------------------
drawtile3:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
test ebx, ebx
jz near .cleanup ; Don't waste time if the tile is blank!
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel3_trans 0x0f000000, 24 ; pixel 8
blit_pixel3_trans 0xf0000000, 28 ; ..... 7
blit_pixel3_trans 0x000f0000, 16 ; ..... 6
blit_pixel3_trans 0x00f00000, 20 ; ..... 5
blit_pixel3_trans 0x00000f00, 8 ; ..... 4
blit_pixel3_trans 0x0000f000, 12 ; ..... 3
blit_pixel3_trans 0x0000000f ; ..... 2
blit_pixel3_trans 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel3_trans 0x000000f0, 4 ; pixel 1
blit_pixel3_trans 0x0000000f ; ..... 2
blit_pixel3_trans 0x0000f000, 12 ; ..... 3
blit_pixel3_trans 0x00000f00, 8 ; ..... 4
blit_pixel3_trans 0x00f00000, 20 ; ..... 5
blit_pixel3_trans 0x000f0000, 16 ; ..... 6
blit_pixel3_trans 0xf0000000, 28 ; ..... 7
blit_pixel3_trans 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
popad
ret
cache_align
; ----------------------------------------
drawtile4_solid:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov ecx, esi
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
; -
mov edi, [__reg]
mov edx, [edi + 7]
push dword [esi]
and edx, 0x3f
mov eax, [ecx + edx*4]
mov [esi], eax
; -
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel4 0x0f000000, 24 ; pixel 8
blit_pixel4 0xf0000000, 28 ; ..... 7
blit_pixel4 0x000f0000, 16 ; ..... 6
blit_pixel4 0x00f00000, 20 ; ..... 5
blit_pixel4 0x00000f00, 8 ; ..... 4
blit_pixel4 0x0000f000, 12 ; ..... 3
blit_pixel4 0x0000000f ; ..... 2
blit_pixel4 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel4 0x000000f0, 4 ; pixel 1
blit_pixel4 0x0000000f ; ..... 2
blit_pixel4 0x0000f000, 12 ; ..... 3
blit_pixel4 0x00000f00, 8 ; ..... 4
blit_pixel4 0x00f00000, 20 ; ..... 5
blit_pixel4 0x000f0000, 16 ; ..... 6
blit_pixel4 0xf0000000, 28 ; ..... 7
blit_pixel4 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
pop dword [esi]
popad
ret
cache_align
; ----------------------------------------
drawtile4:
pushad
mov ebp, esp
.setup:
.get_pal:
mov ebx, which
mov esi, [__highpal]
mov eax, ebx
shr eax, byte 7
and eax, 0xc0
add esi, eax
push esi
.check_y_flip:
mov eax, ebx
xor ecx, ecx
mov edx, line
test eax, 0x1000
jz .check_interlace
.y_flipped:
xor edx, byte 7
cache_align
.check_interlace:
mov esi, [__reg]
mov cl, [esi+12]
mov esi, [__vram]
and eax, 0x7ff
test cl, byte 0x2
jz .no_interlace
.interlace:
lea edx, [edx*8]
shl eax, 6
jmp .check_x_flip
cache_align
.no_interlace:
lea edx, [edx*4]
shl eax, 5
cache_align
.check_x_flip:
add eax, edx
mov edi, where
lea esi, [esi+eax]
mov ebx, [esi]
pop esi
test ebx, ebx
jz near .cleanup ; Don't waste time if the tile is blank!
mov eax, which
test eax, 0x800
jz near .x_not_flipped
.x_flipped:
blit_pixel4_trans 0x0f000000, 24 ; pixel 8
blit_pixel4_trans 0xf0000000, 28 ; ..... 7
blit_pixel4_trans 0x000f0000, 16 ; ..... 6
blit_pixel4_trans 0x00f00000, 20 ; ..... 5
blit_pixel4_trans 0x00000f00, 8 ; ..... 4
blit_pixel4_trans 0x0000f000, 12 ; ..... 3
blit_pixel4_trans 0x0000000f ; ..... 2
blit_pixel4_trans 0x000000f0, 4 ; ..... 1
jmp .cleanup
cache_align
.x_not_flipped:
blit_pixel4_trans 0x000000f0, 4 ; pixel 1
blit_pixel4_trans 0x0000000f ; ..... 2
blit_pixel4_trans 0x0000f000, 12 ; ..... 3
blit_pixel4_trans 0x00000f00, 8 ; ..... 4
blit_pixel4_trans 0x00f00000, 20 ; ..... 5
blit_pixel4_trans 0x000f0000, 16 ; ..... 6
blit_pixel4_trans 0xf0000000, 28 ; ..... 7
blit_pixel4_trans 0x0f000000, 24 ; ..... 8
cache_align
.cleanup:
popad
ret
cache_align
section .data
__vram dd 0
__reg dd 0
__highpal dd 0
; ----------------------------------------
%ifdef NASM_STACK_NOEXEC
section .note.GNU-stack noalloc noexec nowrite progbits
%endif