SDL for NewLibc:

- Updated Hermes library (from SDL 1.2.15) to fix SDL_Flip crash when using bpp = 32.
 - Updated example


git-svn-id: svn://kolibrios.org@9172 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
turbocat 2021-09-01 23:14:36 +00:00
parent 2a2b320fa0
commit 20cc0935c0
11 changed files with 473 additions and 408 deletions

View File

@ -27,7 +27,7 @@ FOLDERS = {
for i,v in ipairs(FOLDERS) do
compile_gcc(v .. "*.c", v .. "%B.o")
tup.append_table(OBJS,
tup.foreach_rule(v .. "*.asm", "nasm -f coff -o %o %f", v .. "%B.o")
tup.foreach_rule(v .. "*.asm", "nasm -i hermes -f coff -o %o %f", v .. "%B.o")
)
end
tup.rule(OBJS, "kos32-ar rcs %o %f", {"../../../lib/libSDLn.a", "../../../lib/<libSDLn>"})

View File

@ -6,7 +6,6 @@
Please refer to the file COPYING.LIB contained in the distribution for
licensing conditions
*/
#ifndef __HERMES_HEAD_MMX__
#define __HERMES_HEAD_MMX__
@ -45,26 +44,24 @@ void ConvertMMXp32_16RGB555();
/* Fix the underscore business with ELF compilers */
#if defined(__ELF__) && defined(__GNUC__)
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C)
#ifdef __cplusplus
extern "C" {
#endif
void ConvertMMX(HermesConverterInterface *) __attribute__ ((alias ("_ConvertMMX")));
#if 0
void ClearMMX_32(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_32")));
void ClearMMX_24(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_24")));
void ClearMMX_16(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_16")));
void ClearMMX_8(HermesClearInterface *) __attribute__ ((alias ("_ClearMMX_8")));
extern void _ConvertMMX(HermesConverterInterface *);
extern void _ConvertMMXpII32_24RGB888();
extern void _ConvertMMXpII32_16RGB565();
extern void _ConvertMMXpII32_16BGR565();
extern void _ConvertMMXpII32_16RGB555();
extern void _ConvertMMXpII32_16BGR555();
void ConvertMMXp32_16RGB555() __attribute__ ((alias ("_ConvertMMXp32_16RGB555")));
#endif
void ConvertMMXpII32_24RGB888() __attribute__ ((alias ("_ConvertMMXpII32_24RGB888")));
void ConvertMMXpII32_16RGB565() __attribute__ ((alias ("_ConvertMMXpII32_16RGB565")));
void ConvertMMXpII32_16BGR565() __attribute__ ((alias ("_ConvertMMXpII32_16BGR565")));
void ConvertMMXpII32_16RGB555() __attribute__ ((alias ("_ConvertMMXpII32_16RGB555")));
void ConvertMMXpII32_16BGR555() __attribute__ ((alias ("_ConvertMMXpII32_16BGR555")));
#define ConvertMMX _ConvertMMX
#define ConvertMMXpII32_24RGB888 _ConvertMMXpII32_24RGB888
#define ConvertMMXpII32_16RGB565 _ConvertMMXpII32_16RGB565
#define ConvertMMXpII32_16BGR565 _ConvertMMXpII32_16BGR565
#define ConvertMMXpII32_16RGB555 _ConvertMMXpII32_16RGB555
#define ConvertMMXpII32_16BGR555 _ConvertMMXpII32_16BGR555
#ifdef __cplusplus
}

View File

@ -60,10 +60,10 @@ void ConvertX86pI8_32();
void ConvertX86pI8_24();
void ConvertX86pI8_16();
extern int32 ConvertX86p16_32RGB888_LUT_X86[512];
extern int32 ConvertX86p16_32BGR888_LUT_X86[512];
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512];
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512];
extern int ConvertX86p16_32RGB888_LUT_X86[512];
extern int ConvertX86p16_32BGR888_LUT_X86[512];
extern int ConvertX86p16_32RGBA888_LUT_X86[512];
extern int ConvertX86p16_32BGRA888_LUT_X86[512];
#ifdef __cplusplus
}
@ -74,61 +74,52 @@ extern int32 ConvertX86p16_32BGRA888_LUT_X86[512];
/* Now fix up the ELF underscore problem */
#if defined(__ELF__) && defined(__GNUC__)
#if (defined(__ELF__) && defined(__GNUC__)) || defined(__SUNPRO_C)
#ifdef __cplusplus
extern "C" {
#endif
int Hermes_X86_CPU() __attribute__ ((alias ("_Hermes_X86_CPU")));
extern int _Hermes_X86_CPU();
void ConvertX86(HermesConverterInterface *) __attribute__ ((alias ("_ConvertX86")));
extern void _ConvertX86(HermesConverterInterface *);
#if 0
void ClearX86_32(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_32")));
void ClearX86_24(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_24")));
void ClearX86_16(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_16")));
void ClearX86_8(HermesClearInterface *) __attribute__ ((alias ("_ClearX86_8")));
#endif
extern void _ConvertX86p32_32BGR888();
extern void _ConvertX86p32_32RGBA888();
extern void _ConvertX86p32_32BGRA888();
extern void _ConvertX86p32_24RGB888();
extern void _ConvertX86p32_24BGR888();
extern void _ConvertX86p32_16RGB565();
extern void _ConvertX86p32_16BGR565();
extern void _ConvertX86p32_16RGB555();
extern void _ConvertX86p32_16BGR555();
extern void _ConvertX86p32_8RGB332();
void ConvertX86p32_32BGR888() __attribute__ ((alias ("_ConvertX86p32_32BGR888")));
void ConvertX86p32_32RGBA888() __attribute__ ((alias ("_ConvertX86p32_32RGBA888")));
void ConvertX86p32_32BGRA888() __attribute__ ((alias ("_ConvertX86p32_32BGRA888")));
void ConvertX86p32_24RGB888() __attribute__ ((alias ("_ConvertX86p32_24RGB888")));
void ConvertX86p32_24BGR888() __attribute__ ((alias ("_ConvertX86p32_24BGR888")));
void ConvertX86p32_16RGB565() __attribute__ ((alias ("_ConvertX86p32_16RGB565")));
void ConvertX86p32_16BGR565() __attribute__ ((alias ("_ConvertX86p32_16BGR565")));
void ConvertX86p32_16RGB555() __attribute__ ((alias ("_ConvertX86p32_16RGB555")));
void ConvertX86p32_16BGR555() __attribute__ ((alias ("_ConvertX86p32_16BGR555")));
void ConvertX86p32_8RGB332() __attribute__ ((alias ("_ConvertX86p32_8RGB332")));
extern void _ConvertX86p16_16BGR565();
extern void _ConvertX86p16_16RGB555();
extern void _ConvertX86p16_16BGR555();
extern void _ConvertX86p16_8RGB332();
#if 0
void ConvertX86p16_32RGB888() __attribute__ ((alias ("_ConvertX86p16_32RGB888")));
void ConvertX86p16_32BGR888() __attribute__ ((alias ("_ConvertX86p16_32BGR888")));
void ConvertX86p16_32RGBA888() __attribute__ ((alias ("_ConvertX86p16_32RGBA888")));
void ConvertX86p16_32BGRA888() __attribute__ ((alias ("_ConvertX86p16_32BGRA888")));
void ConvertX86p16_24RGB888() __attribute__ ((alias ("_ConvertX86p16_24RGB888")));
void ConvertX86p16_24BGR888() __attribute__ ((alias ("_ConvertX86p16_24BGR888")));
#endif
void ConvertX86p16_16BGR565() __attribute__ ((alias ("_ConvertX86p16_16BGR565")));
void ConvertX86p16_16RGB555() __attribute__ ((alias ("_ConvertX86p16_16RGB555")));
void ConvertX86p16_16BGR555() __attribute__ ((alias ("_ConvertX86p16_16BGR555")));
void ConvertX86p16_8RGB332() __attribute__ ((alias ("_ConvertX86p16_8RGB332")));
#if 0
void CopyX86p_4byte() __attribute__ ((alias ("_CopyX86p_4byte")));
void CopyX86p_3byte() __attribute__ ((alias ("_CopyX86p_3byte")));
void CopyX86p_2byte() __attribute__ ((alias ("_CopyX86p_2byte")));
void CopyX86p_1byte() __attribute__ ((alias ("_CopyX86p_1byte")));
#define Hermes_X86_CPU _Hermes_X86_CPU
void ConvertX86pI8_32() __attribute__ ((alias ("_ConvertX86pI8_32")));
void ConvertX86pI8_24() __attribute__ ((alias ("_ConvertX86pI8_24")));
void ConvertX86pI8_16() __attribute__ ((alias ("_ConvertX86pI8_16")));
#define ConvertX86 _ConvertX86
#define ConvertX86p32_32BGR888 _ConvertX86p32_32BGR888
#define ConvertX86p32_32RGBA888 _ConvertX86p32_32RGBA888
#define ConvertX86p32_32BGRA888 _ConvertX86p32_32BGRA888
#define ConvertX86p32_24RGB888 _ConvertX86p32_24RGB888
#define ConvertX86p32_24BGR888 _ConvertX86p32_24BGR888
#define ConvertX86p32_16RGB565 _ConvertX86p32_16RGB565
#define ConvertX86p32_16BGR565 _ConvertX86p32_16BGR565
#define ConvertX86p32_16RGB555 _ConvertX86p32_16RGB555
#define ConvertX86p32_16BGR555 _ConvertX86p32_16BGR555
#define ConvertX86p32_8RGB332 _ConvertX86p32_8RGB332
#define ConvertX86p16_16BGR565 _ConvertX86p16_16BGR565
#define ConvertX86p16_16RGB555 _ConvertX86p16_16RGB555
#define ConvertX86p16_16BGR555 _ConvertX86p16_16BGR555
#define ConvertX86p16_8RGB332 _ConvertX86p16_8RGB332
extern int32 ConvertX86p16_32RGB888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGB888_LUT_X86")));
extern int32 ConvertX86p16_32BGR888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGR888_LUT_X86")));
extern int32 ConvertX86p16_32RGBA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32RGBA888_LUT_X86")));
extern int32 ConvertX86p16_32BGRA888_LUT_X86[512] __attribute__ ((alias ("_ConvertX86p16_32BGRA888_LUT_X86")));
#endif
#ifdef __cplusplus
}

View File

@ -0,0 +1,15 @@
; Some common macros for hermes nasm code
%macro SDL_FUNC 1
%ifdef HIDDEN_VISIBILITY
GLOBAL %1:function hidden
%else
GLOBAL %1
%endif
%endmacro
%ifdef __OS2__
; declare segments with proper attributes for OS/2 386 builds:
SEGMENT .data CLASS=DATA ALIGN=16 USE32 FLAT
SEGMENT .text CLASS=CODE ALIGN=16 USE32 FLAT
%endif

View File

@ -9,9 +9,9 @@
BITS 32
GLOBAL _ConvertMMX
GLOBAL _mmxreturn
%include "common.inc"
SDL_FUNC _ConvertMMX
SECTION .text
@ -50,9 +50,8 @@ _ConvertMMX:
y_loop:
mov ecx,[ebp+4]
jmp [ebp+32]
call [ebp+32]
_mmxreturn:
add esi,[ebp+12]
add edi,[ebp+28]
@ -70,5 +69,6 @@ endconvert:
ret
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View File

@ -20,40 +20,51 @@
BITS 32
%include "common.inc"
GLOBAL _ConvertMMXpII32_24RGB888
GLOBAL _ConvertMMXpII32_16RGB565
GLOBAL _ConvertMMXpII32_16BGR565
GLOBAL _ConvertMMXpII32_16RGB555
GLOBAL _ConvertMMXpII32_16BGR555
SDL_FUNC _ConvertMMXpII32_24RGB888
SDL_FUNC _ConvertMMXpII32_16RGB565
SDL_FUNC _ConvertMMXpII32_16BGR565
SDL_FUNC _ConvertMMXpII32_16RGB555
SDL_FUNC _ConvertMMXpII32_16BGR555
EXTERN _mmxreturn
SECTION .data
ALIGN 8
;; Macros for conversion routines
;; Constants for conversion routines
%macro _push_immq_mask 1
push dword %1
push dword %1
%endmacro
mmx32_rgb888_mask dd 00ffffffh,00ffffffh
%macro load_immq 2
_push_immq_mask %2
movq %1, [esp]
%endmacro
mmx32_rgb565_b dd 000000f8h, 000000f8h
mmx32_rgb565_g dd 0000fc00h, 0000fc00h
mmx32_rgb565_r dd 00f80000h, 00f80000h
%macro pand_immq 2
_push_immq_mask %2
pand %1, [esp]
%endmacro
mmx32_rgb555_rb dd 00f800f8h,00f800f8h
mmx32_rgb555_g dd 0000f800h,0000f800h
mmx32_rgb555_mul dd 20000008h,20000008h
mmx32_bgr555_mul dd 00082000h,00082000h
%define CLEANUP_IMMQ_LOADS(num) \
add esp, byte 8 * num
%define mmx32_rgb888_mask 00ffffffh
%define mmx32_rgb565_b 000000f8h
%define mmx32_rgb565_g 0000fc00h
%define mmx32_rgb565_r 00f80000h
%define mmx32_rgb555_rb 00f800f8h
%define mmx32_rgb555_g 0000f800h
%define mmx32_rgb555_mul 20000008h
%define mmx32_bgr555_mul 00082000h
SECTION .text
_ConvertMMXpII32_24RGB888:
; set up mm6 as the mask, mm7 as zero
movq mm6, qword [mmx32_rgb888_mask]
load_immq mm6, mmx32_rgb888_mask
CLEANUP_IMMQ_LOADS(1)
pxor mm7, mm7
mov edx, ecx ; save ecx
@ -108,16 +119,17 @@ _ConvertMMXpII32_24RGB888:
dec ecx
jnz .L3
.L4:
jmp _mmxreturn
retn
_ConvertMMXpII32_16RGB565:
; set up masks
movq mm5, [mmx32_rgb565_b]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_r]
load_immq mm5, mmx32_rgb565_b
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_r
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
@ -176,14 +188,15 @@ _ConvertMMXpII32_16RGB565:
jnz .L3
.L4:
jmp _mmxreturn
retn
_ConvertMMXpII32_16BGR565:
movq mm5, [mmx32_rgb565_r]
movq mm6, [mmx32_rgb565_g]
movq mm7, [mmx32_rgb565_b]
load_immq mm5, mmx32_rgb565_r
load_immq mm6, mmx32_rgb565_g
load_immq mm7, mmx32_rgb565_b
CLEANUP_IMMQ_LOADS(3)
mov edx, ecx
shr ecx, 2
@ -245,7 +258,7 @@ _ConvertMMXpII32_16BGR565:
jnz .L3
.L4:
jmp _mmxreturn
retn
_ConvertMMXpII32_16BGR555:
@ -253,7 +266,7 @@ _ConvertMMXpII32_16BGR555:
; except it uses a different multiplier for the pmaddwd
; instruction. cool huh.
movq mm7, qword [mmx32_bgr555_mul]
load_immq mm7, mmx32_bgr555_mul
jmp _convert_bgr555_cheat
; This is the same as the Intel version.. they obviously went to
@ -263,15 +276,16 @@ _ConvertMMXpII32_16BGR555:
; (I think) a more accurate name..
_ConvertMMXpII32_16RGB555:
movq mm7,qword [mmx32_rgb555_mul]
load_immq mm7, mmx32_rgb555_mul
_convert_bgr555_cheat:
movq mm6,qword [mmx32_rgb555_g]
load_immq mm6, mmx32_rgb555_g
CLEANUP_IMMQ_LOADS(2)
mov edx,ecx ; Save ecx
and ecx,BYTE 0fffffff8h ; clear lower three bits
and ecx,DWORD 0fffffff8h ; clear lower three bits
jnz .L_OK
jmp .L2
jmp near .L2
.L_OK:
@ -280,12 +294,14 @@ _convert_bgr555_cheat:
movq mm0,[esi]
movq mm3,mm2
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
movq mm1,mm0
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pmaddwd mm3,mm7
CLEANUP_IMMQ_LOADS(2)
pmaddwd mm1,mm7
pand mm2,mm6
@ -302,13 +318,13 @@ _convert_bgr555_cheat:
movq mm0,mm4
psrld mm1,6
pand mm0,qword [mmx32_rgb555_rb]
pand_immq mm0, mmx32_rgb555_rb
packssdw mm1,mm3
movq mm3,mm5
pmaddwd mm0,mm7
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
pand mm4,mm6
movq [edi],mm1
@ -329,12 +345,14 @@ _convert_bgr555_cheat:
movq mm3,mm2
movq mm1,mm0
pand mm3,qword [mmx32_rgb555_rb]
pand_immq mm3, mmx32_rgb555_rb
packssdw mm5,mm4
pand mm1,qword [mmx32_rgb555_rb]
pand_immq mm1, mmx32_rgb555_rb
pand mm2,mm6
CLEANUP_IMMQ_LOADS(4)
movq [edi+8],mm5
pmaddwd mm3,mm7
@ -380,7 +398,8 @@ _convert_bgr555_cheat:
jnz .L3
.L4:
jmp _mmxreturn
retn
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View File

@ -1,126 +1,123 @@
;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
GLOBAL _ConvertX86
GLOBAL _x86return
GLOBAL _Hermes_X86_CPU
SECTION .data
cpu_flags dd 0
SECTION .text
;; _ConvertX86:
;; [ESP+8] ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;; 0: void *s_pixels
;; 4: int s_width
;; 8: int s_height
;; 12: int s_add
;; 16: void *d_pixels
;; 20: int d_width
;; 24: int d_height
;; 28: int d_add
;; 32: void (*converter_function)()
;; 36: int32 *lookup
_ConvertX86:
push ebp
mov ebp,esp
; Save the registers used by the blitters, necessary for optimized code
pusha
mov eax,[ebp+8]
cmp dword [eax+4],BYTE 0
je endconvert
mov ebp,eax
mov esi,[ebp+0]
mov edi,[ebp+16]
y_loop:
mov ecx,[ebp+4]
jmp [ebp+32]
_x86return:
add esi,[ebp+12]
add edi,[ebp+28]
dec dword [ebp+8]
jnz y_loop
; Restore the registers used by the blitters, necessary for optimized code
popa
pop ebp
endconvert:
ret
;; Hermes_X86_CPU returns the CPUID flags in eax
_Hermes_X86_CPU:
pushfd
pop eax
mov ecx,eax
xor eax,040000h
push eax
popfd
pushfd
pop eax
xor eax,ecx
jz .L1 ; Processor is 386
push ecx
popfd
mov eax,ecx
xor eax,200000h
push eax
popfd
pushfd
pop eax
xor eax,ecx
je .L1
pusha
mov eax,1
cpuid
mov [cpu_flags],edx
popa
mov eax,[cpu_flags]
.L1:
xor eax,eax
ret
;
; x86 format converters for HERMES
; Some routines Copyright (c) 1998 Christian Nentwich (brn@eleet.mcb.at)
; This source code is licensed under the GNU LGPL
;
; Please refer to the file COPYING.LIB contained in the distribution for
; licensing conditions
;
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
%include "common.inc"
SDL_FUNC _ConvertX86
SDL_FUNC _Hermes_X86_CPU
SECTION .data
cpu_flags dd 0
SECTION .text
;; _ConvertX86:
;; [ESP+8] ConverterInfo*
;; --------------------------------------------------------------------------
;; ConverterInfo (ebp+..)
;; 0: void *s_pixels
;; 4: int s_width
;; 8: int s_height
;; 12: int s_add
;; 16: void *d_pixels
;; 20: int d_width
;; 24: int d_height
;; 28: int d_add
;; 32: void (*converter_function)()
;; 36: int32 *lookup
_ConvertX86:
push ebp
mov ebp,esp
; Save the registers used by the blitters, necessary for optimized code
pusha
mov eax,[ebp+8]
cmp dword [eax+4],BYTE 0
je endconvert
mov ebp,eax
mov esi,[ebp+0]
mov edi,[ebp+16]
y_loop:
mov ecx,[ebp+4]
call [ebp+32]
add esi,[ebp+12]
add edi,[ebp+28]
dec dword [ebp+8]
jnz y_loop
; Restore the registers used by the blitters, necessary for optimized code
popa
pop ebp
endconvert:
ret
_Hermes_X86_CPU:
pushfd
pop eax
mov ecx,eax
xor eax,040000h
push eax
popfd
pushfd
pop eax
xor eax,ecx
jz .L1 ; Processor is 386
push ecx
popfd
mov eax,ecx
xor eax,200000h
push eax
popfd
pushfd
pop eax
xor eax,ecx
je .L1
pusha
mov eax,1
cpuid
mov [cpu_flags],edx
popa
mov eax,[cpu_flags]
.L1:
ret
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View File

@ -10,28 +10,19 @@
; Used with permission.
;
BITS 32
GLOBAL _ConvertX86p16_32RGB888
GLOBAL _ConvertX86p16_32BGR888
GLOBAL _ConvertX86p16_32RGBA888
GLOBAL _ConvertX86p16_32BGRA888
GLOBAL _ConvertX86p16_24RGB888
GLOBAL _ConvertX86p16_24BGR888
GLOBAL _ConvertX86p16_16BGR565
GLOBAL _ConvertX86p16_16RGB555
GLOBAL _ConvertX86p16_16BGR555
GLOBAL _ConvertX86p16_8RGB332
%include "common.inc"
SDL_FUNC _ConvertX86p16_16BGR565
SDL_FUNC _ConvertX86p16_16RGB555
SDL_FUNC _ConvertX86p16_16BGR555
SDL_FUNC _ConvertX86p16_8RGB332
EXTERN _ConvertX86
EXTERN _x86return
SECTION .text
_ConvertX86p16_16BGR565:
; check short
@ -39,7 +30,7 @@ _ConvertX86p16_16BGR565:
ja .L3
.L1 ; short loop
.L1: ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
@ -56,10 +47,10 @@ _ConvertX86p16_16BGR565:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
@ -79,7 +70,7 @@ _ConvertX86p16_16BGR565:
add edi,BYTE 2
dec ecx
.L4 ; save count
.L4: ; save count
push ecx
; unroll twice
@ -93,8 +84,8 @@ _ConvertX86p16_16BGR565:
neg ecx
jmp SHORT .L6
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
.L5: mov [edi+ecx*4-4],eax
.L6: mov eax,[esi+ecx*4]
mov ebx,[esi+ecx*4]
and eax,07E007E0h
@ -134,8 +125,8 @@ _ConvertX86p16_16BGR565:
add esi,BYTE 2
add edi,BYTE 2
.L7
jmp _x86return
.L7:
retn
@ -149,7 +140,7 @@ _ConvertX86p16_16RGB555:
ja .L3
.L1 ; short loop
.L1: ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
@ -163,10 +154,10 @@ _ConvertX86p16_16RGB555:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
@ -183,7 +174,7 @@ _ConvertX86p16_16RGB555:
add edi,BYTE 2
dec ecx
.L4 ; save ebp
.L4: ; save ebp
push ebp
; save count
@ -200,7 +191,7 @@ _ConvertX86p16_16RGB555:
xor ebp,ebp
sub ebp,ecx
.L5 mov eax,[esi+ebp*8] ; agi?
.L5: mov eax,[esi+ebp*8] ; agi?
mov ecx,[esi+ebp*8+4]
mov ebx,eax
@ -226,7 +217,7 @@ _ConvertX86p16_16RGB555:
; tail
pop ecx
.L6 and ecx,BYTE 11b
.L6: and ecx,BYTE 11b
jz .L7
mov al,[esi]
mov ah,[esi+1]
@ -242,8 +233,8 @@ _ConvertX86p16_16RGB555:
dec ecx
jmp SHORT .L6
.L7 pop ebp
jmp _x86return
.L7: pop ebp
retn
@ -257,7 +248,7 @@ _ConvertX86p16_16BGR555:
ja .L3
.L1 ; short loop
.L1: ; short loop
mov al,[esi]
mov ah,[esi+1]
mov ebx,eax
@ -276,10 +267,10 @@ _ConvertX86p16_16BGR555:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov eax,edi
and eax,BYTE 11b
jz .L4
@ -301,7 +292,7 @@ _ConvertX86p16_16BGR555:
add edi,BYTE 2
dec ecx
.L4 ; save count
.L4: ; save count
push ecx
; unroll twice
@ -315,8 +306,8 @@ _ConvertX86p16_16BGR555:
neg ecx
jmp SHORT .L6
.L5 mov [edi+ecx*4-4],eax
.L6 mov eax,[esi+ecx*4]
.L5: mov [edi+ecx*4-4],eax
.L6: mov eax,[esi+ecx*4]
shr eax,1
mov ebx,[esi+ecx*4]
@ -360,8 +351,8 @@ _ConvertX86p16_16BGR555:
add esi,BYTE 2
add edi,BYTE 2
.L7
jmp _x86return
.L7:
retn
@ -375,7 +366,7 @@ _ConvertX86p16_8RGB332:
ja .L3
.L1 ; short loop
.L1: ; short loop
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
@ -393,10 +384,10 @@ _ConvertX86p16_8RGB332:
inc edi
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 mov eax,edi
.L3: mov eax,edi
and eax,BYTE 11b
jz .L4
mov al,[esi+0]
@ -417,7 +408,7 @@ _ConvertX86p16_8RGB332:
dec ecx
jmp SHORT .L3
.L4 ; save ebp
.L4: ; save ebp
push ebp
; save count
@ -431,7 +422,7 @@ _ConvertX86p16_8RGB332:
mov bl,[esi+1]
mov dh,[esi+2]
.L5 shl edx,16
.L5: shl edx,16
mov bh,[esi+3]
shl ebx,16
@ -472,7 +463,7 @@ _ConvertX86p16_8RGB332:
and ecx,BYTE 11b
jz .L7
.L6 ; tail
.L6: ; tail
mov al,[esi+0]
mov ah,[esi+1]
mov ebx,eax
@ -491,6 +482,9 @@ _ConvertX86p16_8RGB332:
dec ecx
jnz .L6
.L7 pop ebp
jmp _x86return
.L7: pop ebp
retn
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View File

@ -9,25 +9,23 @@
; Most routines are (c) Glenn Fiedler (ptc@gaffer.org), used with permission
;
BITS 32
GLOBAL _ConvertX86p32_32BGR888
GLOBAL _ConvertX86p32_32RGBA888
GLOBAL _ConvertX86p32_32BGRA888
GLOBAL _ConvertX86p32_24RGB888
GLOBAL _ConvertX86p32_24BGR888
GLOBAL _ConvertX86p32_16RGB565
GLOBAL _ConvertX86p32_16BGR565
GLOBAL _ConvertX86p32_16RGB555
GLOBAL _ConvertX86p32_16BGR555
GLOBAL _ConvertX86p32_8RGB332
%include "common.inc"
SDL_FUNC _ConvertX86p32_32BGR888
SDL_FUNC _ConvertX86p32_32RGBA888
SDL_FUNC _ConvertX86p32_32BGRA888
SDL_FUNC _ConvertX86p32_24RGB888
SDL_FUNC _ConvertX86p32_24BGR888
SDL_FUNC _ConvertX86p32_16RGB565
SDL_FUNC _ConvertX86p32_16BGR565
SDL_FUNC _ConvertX86p32_16RGB555
SDL_FUNC _ConvertX86p32_16BGR555
SDL_FUNC _ConvertX86p32_8RGB332
EXTERN _x86return
SECTION .text
;; _Convert_*
;; Paramters:
;; ESI = source
@ -43,7 +41,7 @@ _ConvertX86p32_32BGR888:
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
.L1: ; short loop
mov edx,[esi]
bswap edx
ror edx,8
@ -52,10 +50,10 @@ _ConvertX86p32_32BGR888:
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; save ebp
.L3: ; save ebp
push ebp
; unroll four times
@ -65,7 +63,7 @@ _ConvertX86p32_32BGR888:
; save count
push ecx
.L4 mov eax,[esi]
.L4: mov eax,[esi]
mov ebx,[esi+4]
bswap eax
@ -102,7 +100,7 @@ _ConvertX86p32_32BGR888:
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
.L5: ; tail loop
mov edx,[esi]
bswap edx
ror edx,8
@ -112,8 +110,8 @@ _ConvertX86p32_32BGR888:
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
.L6: pop ebp
retn
@ -124,7 +122,7 @@ _ConvertX86p32_32RGBA888:
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
.L1: ; short loop
mov edx,[esi]
rol edx,8
mov [edi],edx
@ -132,10 +130,10 @@ _ConvertX86p32_32RGBA888:
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; save ebp
.L3: ; save ebp
push ebp
; unroll four times
@ -145,7 +143,7 @@ _ConvertX86p32_32RGBA888:
; save count
push ecx
.L4 mov eax,[esi]
.L4: mov eax,[esi]
mov ebx,[esi+4]
rol eax,8
@ -174,7 +172,7 @@ _ConvertX86p32_32RGBA888:
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
.L5: ; tail loop
mov edx,[esi]
rol edx,8
mov [edi],edx
@ -183,8 +181,8 @@ _ConvertX86p32_32RGBA888:
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
.L6: pop ebp
retn
@ -195,7 +193,7 @@ _ConvertX86p32_32BGRA888:
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
.L1: ; short loop
mov edx,[esi]
bswap edx
mov [edi],edx
@ -203,10 +201,10 @@ _ConvertX86p32_32BGRA888:
add edi,BYTE 4
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; save ebp
.L3: ; save ebp
push ebp
; unroll four times
@ -216,7 +214,7 @@ _ConvertX86p32_32BGRA888:
; save count
push ecx
.L4 mov eax,[esi]
.L4: mov eax,[esi]
mov ebx,[esi+4]
mov ecx,[esi+8]
@ -247,7 +245,7 @@ _ConvertX86p32_32BGRA888:
and ecx,BYTE 11b
jz .L6
.L5 ; tail loop
.L5: ; tail loop
mov edx,[esi]
bswap edx
mov [edi],edx
@ -256,8 +254,8 @@ _ConvertX86p32_32BGRA888:
dec ecx
jnz .L5
.L6 pop ebp
jmp _x86return
.L6: pop ebp
retn
@ -270,7 +268,7 @@ _ConvertX86p32_24RGB888:
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
.L1: ; short loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
@ -281,10 +279,10 @@ _ConvertX86p32_24RGB888:
add edi,BYTE 3
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
@ -299,7 +297,7 @@ _ConvertX86p32_24RGB888:
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
.L4: ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
@ -307,7 +305,7 @@ _ConvertX86p32_24RGB888:
; save count
push ecx
.L5 mov eax,[esi] ; first dword eax = [A][R][G][B]
.L5: mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
shl eax,8 ; eax = [R][G][B][.]
@ -341,7 +339,7 @@ _ConvertX86p32_24RGB888:
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
.L6: ; tail loop
mov al,[esi]
mov bl,[esi+1]
mov dl,[esi+2]
@ -353,8 +351,8 @@ _ConvertX86p32_24RGB888:
dec ecx
jnz .L6
.L7 pop ebp
jmp _x86return
.L7: pop ebp
retn
@ -367,8 +365,7 @@ _ConvertX86p32_24BGR888:
cmp ecx,BYTE 32
ja .L3
.L1 ; short loop
.L1: ; short loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
@ -379,10 +376,10 @@ _ConvertX86p32_24BGR888:
add edi,BYTE 3
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov edx,edi
and edx,BYTE 11b
jz .L4
@ -397,7 +394,7 @@ _ConvertX86p32_24BGR888:
dec ecx
jmp SHORT .L3
.L4 ; unroll 4 times
.L4: ; unroll 4 times
push ebp
mov ebp,ecx
shr ebp,2
@ -405,10 +402,10 @@ _ConvertX86p32_24BGR888:
; save count
push ecx
.L5
.L5:
mov eax,[esi] ; first dword eax = [A][R][G][B]
mov ebx,[esi+4] ; second dword ebx = [a][r][g][b]
bswap eax ; eax = [B][G][R][A]
bswap ebx ; ebx = [b][g][r][a]
@ -441,7 +438,7 @@ _ConvertX86p32_24BGR888:
and ecx,BYTE 11b
jz .L7
.L6 ; tail loop
.L6: ; tail loop
mov dl,[esi]
mov bl,[esi+1]
mov al,[esi+2]
@ -453,9 +450,9 @@ _ConvertX86p32_24BGR888:
dec ecx
jnz .L6
.L7
.L7:
pop ebp
jmp _x86return
retn
@ -467,7 +464,7 @@ _ConvertX86p32_16RGB565:
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
.L1: ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
@ -484,10 +481,10 @@ _ConvertX86p32_16RGB565:
jnz .L1
.L2: ; End of short loop
jmp _x86return
retn
.L3 ; head
.L3: ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
@ -570,7 +567,7 @@ _ConvertX86p32_16RGB565:
add edi,BYTE 2
.L7:
jmp _x86return
retn
@ -583,7 +580,7 @@ _ConvertX86p32_16BGR565:
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
.L1: ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
@ -598,10 +595,10 @@ _ConvertX86p32_16BGR565:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
@ -619,7 +616,7 @@ _ConvertX86p32_16BGR565:
add edi,BYTE 2
dec ecx
.L4 ; save count
.L4: ; save count
push ecx
; unroll twice
@ -633,9 +630,9 @@ _ConvertX86p32_16BGR565:
neg ecx
jmp SHORT .L6
.L5
.L5:
mov [edi+ecx*4-4],eax
.L6
.L6:
mov edx,[esi+ecx*8+4]
mov bh,[esi+ecx*8+4]
@ -683,8 +680,8 @@ _ConvertX86p32_16BGR565:
add esi,BYTE 4
add edi,BYTE 2
.L7
jmp _x86return
.L7:
retn
@ -697,7 +694,7 @@ _ConvertX86p32_16RGB555:
cmp ecx,BYTE 16
ja .L3
.L1 ; short loop
.L1: ; short loop
mov bl,[esi+0] ; blue
mov al,[esi+1] ; green
mov ah,[esi+2] ; red
@ -712,10 +709,10 @@ _ConvertX86p32_16RGB555:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
@ -733,7 +730,7 @@ _ConvertX86p32_16RGB555:
add edi,BYTE 2
dec ecx
.L4 ; save count
.L4: ; save count
push ecx
; unroll twice
@ -747,9 +744,9 @@ _ConvertX86p32_16RGB555:
neg ecx
jmp SHORT .L6
.L5
.L5:
mov [edi+ecx*4-4],eax
.L6
.L6:
mov eax,[esi+ecx*8]
shr ah,3
@ -794,8 +791,8 @@ _ConvertX86p32_16RGB555:
add esi,BYTE 4
add edi,BYTE 2
.L7
jmp _x86return
.L7:
retn
@ -809,7 +806,7 @@ _ConvertX86p32_16BGR555:
ja .L3
.L1 ; short loop
.L1: ; short loop
mov ah,[esi+0] ; blue
mov al,[esi+1] ; green
mov bl,[esi+2] ; red
@ -824,10 +821,10 @@ _ConvertX86p32_16BGR555:
add edi,BYTE 2
dec ecx
jnz .L1
.L2
jmp _x86return
.L2:
retn
.L3 ; head
.L3: ; head
mov ebx,edi
and ebx,BYTE 11b
jz .L4
@ -845,7 +842,7 @@ _ConvertX86p32_16BGR555:
add edi,BYTE 2
dec ecx
.L4 ; save count
.L4: ; save count
push ecx
; unroll twice
@ -859,9 +856,9 @@ _ConvertX86p32_16BGR555:
neg ecx
jmp SHORT .L6
.L5
.L5:
mov [edi+ecx*4-4],eax
.L6
.L6:
mov edx,[esi+ecx*8+4]
mov bh,[esi+ecx*8+4]
@ -909,8 +906,8 @@ _ConvertX86p32_16BGR555:
add esi,BYTE 4
add edi,BYTE 2
.L7
jmp _x86return
.L7:
retn
@ -922,7 +919,7 @@ _ConvertX86p32_16BGR555:
_ConvertX86p32_8RGB332:
.L_ALIGNED
.L_ALIGNED:
push ecx
shr ecx,2 ; We will draw 4 pixels at once
@ -1040,4 +1037,8 @@ _ConvertX86p32_8RGB332:
jnz .L3
.L4:
jmp _x86return
retn
%ifidn __OUTPUT_FORMAT__,elf32
section .note.GNU-stack noalloc noexec nowrite progbits
%endif

View File

@ -14,8 +14,8 @@ default: fire
fire: $(OBJECTS) Makefile
$(CC) $(CFLAGS) $(INCLUDES) -o sdltest.o sdltest.c
$(CC) $(CFLAGS) $(INCLUDES) -o testbitmap.o testbitmap.c
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o sdltest sdltest.o -lgcc -lSDL -lc.dll -lc -lsound
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o testbitmap testbitmap.o -lgcc -lSDL -lc.dll -lc -lsound
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o sdltest sdltest.o -lgcc -lSDLn -lc.dll -lsound
$(LD) $(LDFLAGS) $(LIBPATH) --subsystem native -o testbitmap testbitmap.o -lgcc -lSDLn -lc.dll -lsound
kos32-strip -s sdltest -o sdltest
kos32-strip -s testbitmap -o testbitmap
objcopy testbitmap -O binary

View File

@ -1,28 +1,79 @@
#include "SDL.h"
#include <stdlib.h>
SDL_Surface* screen;
static int done = 0;
int main()
#define WIDTH 640
#define HEIGHT 480
#define BPP 4
#define DEPTH 32
void setpixel(SDL_Surface *screen, int x, int y, Uint8 r, Uint8 g, Uint8 b)
{
SDL_Event event;
if(SDL_Init(SDL_INIT_VIDEO) < 0) exit(0);
atexit(SDL_Quit);
screen = SDL_SetVideoMode(320, 200, 8, SDL_SWSURFACE);
while(!done)
{
while(SDL_PollEvent(&event))
{
switch(event.type)
{
case SDL_KEYDOWN:
case SDL_QUIT:
done=1;
break;
default:
break;
}
}
}
Uint32 *pixmem32;
Uint32 colour;
colour = SDL_MapRGB( screen->format, r, g, b );
pixmem32 = (Uint32*) screen->pixels + y + x;
*pixmem32 = colour;
}
void DrawScreen(SDL_Surface* screen, int h)
{
int x, y, ytimesw;
if(SDL_MUSTLOCK(screen))
{
if(SDL_LockSurface(screen) < 0) return;
}
for(y = 0; y < screen->h; y++ )
{
ytimesw = y*screen->pitch/BPP;
for( x = 0; x < screen->w; x++ )
{
setpixel(screen, x, ytimesw, (x*x)/256+3*y+h, (y*y)/256+x+h, h);
}
}
if(SDL_MUSTLOCK(screen)) SDL_UnlockSurface(screen);
SDL_Flip(screen);
}
int main(int argc, char* argv[])
{
SDL_Surface *screen;
SDL_Event event;
int keypress = 0;
int h=0;
if (SDL_Init(SDL_INIT_VIDEO) < 0 ) return 1;
if (!(screen = SDL_SetVideoMode(WIDTH, HEIGHT, DEPTH, SDL_FULLSCREEN|SDL_HWSURFACE)))
{
SDL_Quit();
return 1;
}
while(!keypress)
{
DrawScreen(screen,h++);
while(SDL_PollEvent(&event))
{
switch (event.type)
{
case SDL_QUIT:
keypress = 1;
break;
case SDL_KEYDOWN:
keypress = 1;
break;
}
}
}
SDL_Quit();
return 0;
}