- Added libunicode.asm to parse UTF-8 strings. - Implemented count_utf8_codepoints to skip continuation bytes. - Implemented count_utf8_graphemes to handle ZWJ (E2 80 8D) and combining marks (CC/CD). - Added console.asm to the examples folder to test and print the results. - Submitted for GSoC qualification task.
71 lines
1.5 KiB
NASM
71 lines
1.5 KiB
NASM
format binary as ""
|
||
use32
|
||
org 0x0
|
||
|
||
db 'MENUET01'
|
||
dd 0x01, START, I_END
|
||
dd 0x100000 ; 1MB Memory
|
||
dd 0x100000 ; Stack pointer
|
||
dd 0x0
|
||
dd 0x0
|
||
|
||
include '../../../../proc32.inc'
|
||
include '../../../../macros.inc'
|
||
include '../../../../dll.inc'
|
||
include '../libunicode.asm'
|
||
|
||
START:
|
||
stdcall dll.Load, import_table
|
||
test eax, eax
|
||
jnz EXIT ; If 0, jump to exit
|
||
|
||
push window_title
|
||
push -1
|
||
push -1
|
||
push -1
|
||
push -1
|
||
call [con_init]
|
||
|
||
push my_text
|
||
call [con_write_asciiz]
|
||
|
||
mov eax, test_combo
|
||
call count_utf8_codepoints
|
||
|
||
push eax
|
||
push fmt_codepoints
|
||
call [con_printf]
|
||
add esp, 8
|
||
|
||
mov eax, test_combo
|
||
call count_utf8_gramphene
|
||
|
||
push eax
|
||
push fmt_graphemes
|
||
call [con_printf]
|
||
add esp, 8
|
||
|
||
push 0
|
||
call [con_exit]
|
||
|
||
EXIT:
|
||
mcall -1 ; Exit cleanly
|
||
|
||
; DATA SECTION
|
||
|
||
window_title db 'Debug Console', 0
|
||
my_text db 'Console loaded successfully!', 10, 0
|
||
fmt_codepoints db "Total Codepoints: %d", 10, 0
|
||
fmt_graphemes db "Total Graphemes: %d", 10, 0
|
||
unitxt db 'AП👨👩👦qwerty', 0
|
||
test_tech db 'c', 'a', 'f', 'e', 0xCC, 0x81, 0
|
||
test_combo db 'A', 0xD0, 0x9F, 0xF0, 0x9F, 0x91, 0xA9, 0xE2, 0x80, 0x8D, 0xF0, 0x9F, 0x92, 0xBB, 'e', 0xCC, 0x81, 0
|
||
|
||
align 4
|
||
import_table:
|
||
library console, '/sys/lib/console.obj'
|
||
import console, con_init, 'con_init', con_write_asciiz, 'con_write_asciiz', con_exit, 'con_exit', \
|
||
con_printf, 'con_printf'
|
||
|
||
I_END:
|