169 lines
3.9 KiB
NASM
169 lines
3.9 KiB
NASM
format MS COFF
|
|
|
|
public @EXPORT as 'EXPORTS'
|
|
|
|
include '../../../../proc32.inc'
|
|
include '../../../../macros.inc'
|
|
|
|
section '.flat' code readable align 16
|
|
|
|
proc libstr._.init
|
|
; Library entry point (called after library load)
|
|
;
|
|
;> eax = memory allocation routine <mem.alloc*>
|
|
;> ebx = memory freeing routine <mem.free*>
|
|
;> ecx = memory reallocation routine <mem.realloc*>
|
|
;> edx = library loading routine <dll.load*>
|
|
;
|
|
;< eax = 1 (fail) / 0 (ok) (library initialization result)
|
|
mov [mem.alloc], eax
|
|
mov [mem.free], ebx
|
|
mov [mem.realloc], ecx
|
|
mov [dll.load], edx
|
|
xor eax, eax
|
|
ret
|
|
endp
|
|
|
|
proc str.len _string
|
|
; Returns the length of the string in bytes
|
|
;
|
|
; > _string = pointer to null-terminated string
|
|
;
|
|
; < eax = string length without null-char
|
|
push edi
|
|
or ecx, -1
|
|
mov edi, [_string]
|
|
xor eax, eax
|
|
cld
|
|
repnz scasb
|
|
inc ecx
|
|
not ecx
|
|
mov eax, ecx
|
|
pop edi
|
|
ret
|
|
endp
|
|
|
|
proc str.utf8.len _string
|
|
; Returns the number of Unicode characters
|
|
;
|
|
; > _string = pointer to utf-8 coded Unicode string
|
|
;
|
|
; < eax = Unicode characters count
|
|
push esi
|
|
mov esi, [_string]
|
|
xor ecx, ecx
|
|
.scan:
|
|
lodsb
|
|
test al, al
|
|
jz .end
|
|
and al, 0xc0
|
|
cmp al, 0x80
|
|
je .scan
|
|
inc ecx
|
|
jmp .scan
|
|
.end:
|
|
mov eax, ecx
|
|
pop esi
|
|
ret
|
|
endp
|
|
|
|
proc str.utf8.validate _string
|
|
; Validates a UTF-8 encoded string and return its size in bytes
|
|
;
|
|
; > _string = pointer to utf-8 coded Unicode string
|
|
;
|
|
; < eax = size in bytes
|
|
push esi edi
|
|
xor edi, edi
|
|
mov esi, [_string]
|
|
.loop:
|
|
cmp byte [esi], 0
|
|
jz .exit
|
|
call utf8_num_bytes
|
|
test eax, eax
|
|
jz .error
|
|
add edi, eax
|
|
jmp .loop
|
|
.exit:
|
|
mov eax, edi
|
|
pop edi esi
|
|
ret
|
|
.error:
|
|
xor eax, eax
|
|
pop edi esi
|
|
ret
|
|
endp
|
|
|
|
proc str.utf8.num_bytes, _string
|
|
; Returns size of utf-8 sequence
|
|
;
|
|
; > _string = pointer to utf-8 coded Unicode string
|
|
;
|
|
; < eax = size in bytes 1 to 4 if sequence is valid, 0 otherwise
|
|
push esi
|
|
mov esi, [_string]
|
|
call utf8_num_bytes
|
|
pop esi
|
|
ret
|
|
endp
|
|
|
|
|
|
proc utf8_num_bytes
|
|
; > esi = pointer to string
|
|
;
|
|
; < eax = size in bytes of the first character, 0 if the sequence is invalid
|
|
; < esi = pointer to the next character if eax > 0, otherwise undefined
|
|
mov ecx, 1
|
|
cld
|
|
lodsb
|
|
test al, al
|
|
jz .invalid
|
|
test al, 0x80
|
|
jz .valid
|
|
mov ah, al ; save original prefix
|
|
mov dh, 0xe0 ; initial mask
|
|
mov dl, 0xc0 ; initial expected prefix
|
|
.loop:
|
|
inc ecx
|
|
lodsb
|
|
test al, al
|
|
jz .invalid
|
|
and al, 0xc0 ; 11xxxxxxb mask for continuation
|
|
cmp al, 0x80 ; 10xxxxxxb
|
|
jne .invalid ; invalid sequence
|
|
mov al, ah
|
|
and al, dh
|
|
cmp al, dl
|
|
je .valid
|
|
cmp ecx, 4
|
|
jae .invalid
|
|
sar dl, 1 ; 11xxxxxxb -> 111xxxxxb expand mask
|
|
sar dh, 1 ; 10xxxxxxb -> 110xxxxxb expand prefix
|
|
jmp .loop
|
|
.valid:
|
|
mov eax, ecx
|
|
ret
|
|
.invalid:
|
|
xor eax, eax
|
|
ret
|
|
endp
|
|
|
|
|
|
section '.data' data readable writable align 16
|
|
|
|
mem.alloc dd ?
|
|
mem.free dd ?
|
|
mem.realloc dd ?
|
|
dll.load dd ?
|
|
|
|
align 16
|
|
@EXPORT:
|
|
|
|
export \
|
|
libstr._.init, 'lib_init', \
|
|
0x00010000, 'version', \
|
|
str.len, 'str_len', \
|
|
str.utf8.len, 'str_utf8_len', \
|
|
str.utf8.num_bytes, 'str_utf8_num_bytes',\
|
|
str.utf8.validate, 'str_utf8_validate'
|