forked from KolibriOS/kolibrios
1. Use cinvoke macro instead of invoke for con_printf 2. Convert `end:` to `P_END:`, As the previous one was a FASM keyword 3. Upgrade all functions in libunicode.asm to use stdcall convention
121 lines
3.0 KiB
NASM
121 lines
3.0 KiB
NASM
; libunicode -- KolibriOS Unicode Library
|
|
;
|
|
; Copyright (C) <2026> KolibriOS.org Team
|
|
; Author:
|
|
; 1. Swarnadeep Paul <swarnadeep@mail.com>
|
|
;
|
|
; This program is free software: you can redistribute it and/or modify it under
|
|
; the terms of the GNU General Public License as published by the Free Software
|
|
; Foundation, either version 2 of the License, or (at your option) any later
|
|
; version.
|
|
;
|
|
; This program is distributed in the hope that it will be useful, but WITHOUT
|
|
; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
|
|
; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
|
|
;
|
|
; You should have received a copy of the GNU General Public License along with
|
|
; this program. If not, see <http://www.gnu.org/licenses/>.
|
|
|
|
format MS COFF
|
|
|
|
public @EXPORT as 'EXPORTS'
|
|
|
|
include "macros.inc"
|
|
include "proc32.inc"
|
|
|
|
section '.flat' code readable align 16
|
|
|
|
|
|
;=============================================================
|
|
; CodePoint Counting function
|
|
; Parameters:
|
|
; _str_ptr = pointer to the memory address of the null
|
|
; terminated string
|
|
; Return values:
|
|
; eax = total codepoints
|
|
;=============================================================
|
|
proc count_utf8_codepoints uses ebx, _str_ptr
|
|
mov eax, [_str_ptr]
|
|
mov ebx, 0
|
|
|
|
.read_loop:
|
|
mov cl, byte [eax]
|
|
|
|
test cl, cl ; if it is an ending byte (0)
|
|
je .done
|
|
|
|
and cl, 0xC0
|
|
cmp cl, 0x80
|
|
je .skip_count
|
|
inc ebx
|
|
|
|
.skip_count:
|
|
inc eax
|
|
jmp .read_loop
|
|
.done:
|
|
mov eax, ebx
|
|
ret
|
|
endp
|
|
|
|
;=============================================================
|
|
; Grapheme Counting function
|
|
; Parameters:
|
|
; _str_ptr = pointer to the memory address of the null
|
|
; terminated string
|
|
; Return values:
|
|
; eax = total grapheme count
|
|
;=============================================================
|
|
proc count_utf8_graphemes uses ebx, _str_ptr
|
|
mov eax, [_str_ptr]
|
|
mov ebx, 0
|
|
|
|
.read_loop:
|
|
mov cl, byte [eax]
|
|
|
|
test cl, cl ; if it is an ending byte (0)
|
|
je .done
|
|
; Is this accent
|
|
|
|
cmp cl, 0xCC
|
|
je .skip_count
|
|
|
|
cmp cl, 0xCD
|
|
je .skip_count
|
|
|
|
; Check for not a zero width joint
|
|
cmp cl, 0xE2
|
|
jne .not_any_special
|
|
|
|
cmp byte [eax+1], 0x80
|
|
jne .not_any_special
|
|
|
|
cmp byte [eax+2], 0x8D
|
|
jne .not_any_special
|
|
|
|
dec ebx
|
|
add eax, 3
|
|
jmp .read_loop
|
|
|
|
.not_any_special:
|
|
and cl, 0xC0 ; Is this a continution byte
|
|
cmp cl, 0x80
|
|
je .skip_count
|
|
inc ebx
|
|
|
|
.skip_count:
|
|
inc eax
|
|
jmp .read_loop
|
|
|
|
.done:
|
|
mov eax, ebx
|
|
ret
|
|
endp
|
|
|
|
align 4
|
|
@EXPORT:
|
|
|
|
export \
|
|
count_utf8_codepoints, "utf8.count_codepoints", \
|
|
count_utf8_graphemes, "utf8.count_graphemes"
|
|
|