files
libstr/libstr.asm
2025-06-05 00:30:09 +05:00

170 lines
3.9 KiB
NASM

format MS COFF
public @EXPORT as 'EXPORTS'
include '../../../../proc32.inc'
include '../../../../macros.inc'
section '.flat' code readable align 16
proc libstr._.init
; Library entry point (called after library load)
;
;> eax = memory allocation routine <mem.alloc*>
;> ebx = memory freeing routine <mem.free*>
;> ecx = memory reallocation routine <mem.realloc*>
;> edx = library loading routine <dll.load*>
;
;< eax = 1 (fail) / 0 (ok) (library initialization result)
mov [mem.alloc], eax
mov [mem.free], ebx
mov [mem.realloc], ecx
mov [dll.load], edx
xor eax, eax
ret
endp
proc str.len _string
; Returns the length of the string in bytes
;
; > _string = pointer to null-terminated string
;
; < eax = string length without null-char
push edi
or ecx, -1
mov edi, [_string]
xor eax, eax
cld
repnz scasb
inc ecx
not ecx
mov eax, ecx
pop edi
ret
endp
proc str.utf8.len _string
; Returns the number of Unicode characters
;
; > _string = pointer to utf-8 coded Unicode string
;
; < eax = Unicode characters count
push esi
mov esi, [_string]
xor ecx, ecx
.scan:
lodsb
test al, al
jz .end
and al, 0xc0
cmp al, 0x80
je .scan
inc ecx
jmp .scan
.end:
mov eax, ecx
pop esi
ret
endp
proc str.utf8.validate _string
; Validates a UTF-8 encoded string and return its size in bytes
;
; > _string = pointer to utf-8 coded Unicode string
; > _max = upper limit on the returned string length
;
; < eax = size in bytes
push esi edi
xor edi, edi
mov esi, [_string]
.loop:
cmp byte [esi], 0
jz .exit
call utf8_num_bytes
test eax, eax
jz .error
add edi, eax
jmp .loop
.exit:
mov eax, edi
pop edi esi
ret
.error:
xor eax, eax
pop edi esi
ret
endp
proc str.utf8.num_bytes, _string
; Returns size of utf-8 sequence
;
; > _string = pointer to utf-8 coded Unicode string
;
; < eax = size in bytes 1 to 4 if sequence is valid, 0 otherwise
push esi
mov esi, [_string]
call utf8_num_bytes
pop esi
ret
endp
proc utf8_num_bytes
; > esi = pointer to string
;
; < eax = size in bytes of the first character, 0 if the sequence is invalid
; < esi = pointer to the next character if eax > 0, otherwise undefined
mov ecx, 1
cld
lodsb
test al, al
jz .invalid
test al, 0x80
jz .valid
mov ah, al ; save original prefix
mov dh, 0xe0 ; initial mask
mov dl, 0xc0 ; initial expected prefix
.loop:
inc ecx
lodsb
test al, al
jz .invalid
and al, 0xc0 ; 11xxxxxxb mask for continuation
cmp al, 0x80 ; 10xxxxxxb
jne .invalid ; invalid sequence
mov al, ah
and al, dh
cmp al, dl
je .valid
cmp ecx, 4
jae .invalid
sar dl, 1 ; 11xxxxxxb -> 111xxxxxb expand mask
sar dh, 1 ; 10xxxxxxb -> 110xxxxxb expand prefix
jmp .loop
.valid:
mov eax, ecx
ret
.invalid:
xor eax, eax
ret
endp
section '.data' data readable writable align 16
mem.alloc dd ?
mem.free dd ?
mem.realloc dd ?
dll.load dd ?
align 16
@EXPORT:
export \
libstr._.init, 'lib_init', \
0x00010000, 'version', \
str.len, 'str_len', \
str.utf8.len, 'str_utf8_len', \
str.utf8.num_bytes, 'str_utf8_num_bytes',\
str.utf8.validate, 'str_utf8_validate'