Add initial version
This commit is contained in:
169
libstr.asm
Normal file
169
libstr.asm
Normal file
@@ -0,0 +1,169 @@
|
||||
format MS COFF
|
||||
|
||||
public @EXPORT as 'EXPORTS'
|
||||
|
||||
include '../../../../proc32.inc'
|
||||
include '../../../../macros.inc'
|
||||
|
||||
section '.flat' code readable align 16
|
||||
|
||||
proc libstr._.init
|
||||
; Library entry point (called after library load)
|
||||
;
|
||||
;> eax = memory allocation routine <mem.alloc*>
|
||||
;> ebx = memory freeing routine <mem.free*>
|
||||
;> ecx = memory reallocation routine <mem.realloc*>
|
||||
;> edx = library loading routine <dll.load*>
|
||||
;
|
||||
;< eax = 1 (fail) / 0 (ok) (library initialization result)
|
||||
mov [mem.alloc], eax
|
||||
mov [mem.free], ebx
|
||||
mov [mem.realloc], ecx
|
||||
mov [dll.load], edx
|
||||
xor eax, eax
|
||||
ret
|
||||
endp
|
||||
|
||||
proc str.len _string
|
||||
; Returns the length of the string in bytes
|
||||
;
|
||||
; > _string = pointer to null-terminated string
|
||||
;
|
||||
; < eax = string length without null-char
|
||||
push edi
|
||||
or ecx, -1
|
||||
mov edi, [_string]
|
||||
xor eax, eax
|
||||
cld
|
||||
repnz scasb
|
||||
inc ecx
|
||||
not ecx
|
||||
mov eax, ecx
|
||||
pop edi
|
||||
ret
|
||||
endp
|
||||
|
||||
proc str.utf8.len _string
|
||||
; Returns the number of Unicode characters
|
||||
;
|
||||
; > _string = pointer to utf-8 coded Unicode string
|
||||
;
|
||||
; < eax = Unicode characters count
|
||||
push esi
|
||||
mov esi, [_string]
|
||||
xor ecx, ecx
|
||||
.scan:
|
||||
lodsb
|
||||
test al, al
|
||||
jz .end
|
||||
and al, 0xc0
|
||||
cmp al, 0x80
|
||||
je .scan
|
||||
inc ecx
|
||||
jmp .scan
|
||||
.end:
|
||||
mov eax, ecx
|
||||
pop esi
|
||||
ret
|
||||
endp
|
||||
|
||||
proc str.utf8.validate _string
|
||||
; Validates a UTF-8 encoded string and return its size in bytes
|
||||
;
|
||||
; > _string = pointer to utf-8 coded Unicode string
|
||||
; > _max = upper limit on the returned string length
|
||||
;
|
||||
; < eax = size in bytes
|
||||
push esi edi
|
||||
xor edi, edi
|
||||
mov esi, [_string]
|
||||
.loop:
|
||||
cmp byte [esi], 0
|
||||
jz .exit
|
||||
call utf8_num_bytes
|
||||
test eax, eax
|
||||
jz .error
|
||||
add edi, eax
|
||||
jmp .loop
|
||||
.exit:
|
||||
mov eax, edi
|
||||
pop edi esi
|
||||
ret
|
||||
.error:
|
||||
xor eax, eax
|
||||
pop edi esi
|
||||
ret
|
||||
endp
|
||||
|
||||
proc str.utf8.num_bytes, _string
|
||||
; Returns size of utf-8 sequence
|
||||
;
|
||||
; > _string = pointer to utf-8 coded Unicode string
|
||||
;
|
||||
; < eax = size in bytes 1 to 4 if sequence is valid, 0 otherwise
|
||||
push esi
|
||||
mov esi, [_string]
|
||||
call utf8_num_bytes
|
||||
pop esi
|
||||
ret
|
||||
endp
|
||||
|
||||
|
||||
proc utf8_num_bytes
|
||||
; > esi = pointer to string
|
||||
;
|
||||
; < eax = size in bytes of the first character, 0 if the sequence is invalid
|
||||
; < esi = pointer to the next character if eax > 0, otherwise undefined
|
||||
mov ecx, 1
|
||||
cld
|
||||
lodsb
|
||||
test al, al
|
||||
jz .invalid
|
||||
test al, 0x80
|
||||
jz .valid
|
||||
mov ah, al ; save original prefix
|
||||
mov dh, 0xe0 ; initial mask
|
||||
mov dl, 0xc0 ; initial expected prefix
|
||||
.loop:
|
||||
inc ecx
|
||||
lodsb
|
||||
test al, al
|
||||
jz .invalid
|
||||
and al, 0xc0 ; 11xxxxxxb mask for continuation
|
||||
cmp al, 0x80 ; 10xxxxxxb
|
||||
jne .invalid ; invalid sequence
|
||||
mov al, ah
|
||||
and al, dh
|
||||
cmp al, dl
|
||||
je .valid
|
||||
cmp ecx, 4
|
||||
jae .invalid
|
||||
sar dl, 1 ; 11xxxxxxb -> 111xxxxxb expand mask
|
||||
sar dh, 1 ; 10xxxxxxb -> 110xxxxxb expand prefix
|
||||
jmp .loop
|
||||
.valid:
|
||||
mov eax, ecx
|
||||
ret
|
||||
.invalid:
|
||||
xor eax, eax
|
||||
ret
|
||||
endp
|
||||
|
||||
|
||||
section '.data' data readable writable align 16
|
||||
|
||||
mem.alloc dd ?
|
||||
mem.free dd ?
|
||||
mem.realloc dd ?
|
||||
dll.load dd ?
|
||||
|
||||
align 16
|
||||
@EXPORT:
|
||||
|
||||
export \
|
||||
libstr._.init, 'lib_init', \
|
||||
0x00010000, 'version', \
|
||||
str.len, 'str_len', \
|
||||
str.utf8.len, 'str_utf8_len', \
|
||||
str.utf8.num_bytes, 'str_utf8_num_bytes',\
|
||||
str.utf8.validate, 'str_utf8_validate'
|
||||
Reference in New Issue
Block a user