format MS COFF public @EXPORT as 'EXPORTS' include '../../../../proc32.inc' include '../../../../macros.inc' section '.flat' code readable align 16 proc libstr._.init ; Library entry point (called after library load) ; ;> eax = memory allocation routine ;> ebx = memory freeing routine ;> ecx = memory reallocation routine ;> edx = library loading routine ; ;< eax = 1 (fail) / 0 (ok) (library initialization result) mov [mem.alloc], eax mov [mem.free], ebx mov [mem.realloc], ecx mov [dll.load], edx xor eax, eax ret endp proc str.len _string ; Returns the length of the string in bytes ; ; > _string = pointer to null-terminated string ; ; < eax = string length without null-char push edi or ecx, -1 mov edi, [_string] xor eax, eax cld repnz scasb inc ecx not ecx mov eax, ecx pop edi ret endp proc str.utf8.len _string ; Returns the number of Unicode characters ; ; > _string = pointer to utf-8 coded Unicode string ; ; < eax = Unicode characters count push esi mov esi, [_string] xor ecx, ecx .scan: lodsb test al, al jz .end and al, 0xc0 cmp al, 0x80 je .scan inc ecx jmp .scan .end: mov eax, ecx pop esi ret endp proc str.utf8.validate _string ; Validates a UTF-8 encoded string and return its size in bytes ; ; > _string = pointer to utf-8 coded Unicode string ; ; < eax = size in bytes push esi edi xor edi, edi mov esi, [_string] .loop: cmp byte [esi], 0 jz .exit call utf8_num_bytes test eax, eax jz .error add edi, eax jmp .loop .exit: mov eax, edi pop edi esi ret .error: xor eax, eax pop edi esi ret endp proc str.utf8.num_bytes, _string ; Returns size of utf-8 sequence ; ; > _string = pointer to utf-8 coded Unicode string ; ; < eax = size in bytes 1 to 4 if sequence is valid, 0 otherwise push esi mov esi, [_string] call utf8_num_bytes pop esi ret endp proc utf8_num_bytes ; > esi = pointer to string ; ; < eax = size in bytes of the first character, 0 if the sequence is invalid ; < esi = pointer to the next character if eax > 0, otherwise undefined mov ecx, 1 cld lodsb test al, al jz .invalid test al, 0x80 jz .valid mov ah, al ; save original prefix mov dh, 0xe0 ; initial mask mov dl, 0xc0 ; initial expected prefix .loop: inc ecx lodsb test al, al jz .invalid and al, 0xc0 ; 11xxxxxxb mask for continuation cmp al, 0x80 ; 10xxxxxxb jne .invalid ; invalid sequence mov al, ah and al, dh cmp al, dl je .valid cmp ecx, 4 jae .invalid sar dl, 1 ; 11xxxxxxb -> 111xxxxxb expand mask sar dh, 1 ; 10xxxxxxb -> 110xxxxxb expand prefix jmp .loop .valid: mov eax, ecx ret .invalid: xor eax, eax ret endp section '.data' data readable writable align 16 mem.alloc dd ? mem.free dd ? mem.realloc dd ? dll.load dd ? align 16 @EXPORT: export \ libstr._.init, 'lib_init', \ 0x00010000, 'version', \ str.len, 'str_len', \ str.utf8.len, 'str_utf8_len', \ str.utf8.num_bytes, 'str_utf8_num_bytes',\ str.utf8.validate, 'str_utf8_validate'