; libunicode -- KolibriOS Unicode Library ; ; Copyright (C) <2026> KolibriOS.org Team ; Author: ; 1. Swarnadeep Paul ; ; This program is free software: you can redistribute it and/or modify it under ; the terms of the GNU General Public License as published by the Free Software ; Foundation, either version 2 of the License, or (at your option) any later ; version. ; ; This program is distributed in the hope that it will be useful, but WITHOUT ; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS ; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. ; ; You should have received a copy of the GNU General Public License along with ; this program. If not, see . format MS COFF public @EXPORT as 'EXPORTS' include "macros.inc" include "proc32.inc" section '.flat' code readable align 16 ;============================================================= ; CodePoint Counting function ; Parameters: ; _str_ptr = pointer to the memory address of the null ; terminated string ; Return values: ; eax = total codepoints ;============================================================= proc count_utf8_codepoints uses ebx, _str_ptr mov eax, [_str_ptr] mov ebx, 0 .read_loop: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je .done and cl, 0xC0 cmp cl, 0x80 je .skip_count inc ebx .skip_count: inc eax jmp .read_loop .done: mov eax, ebx ret endp ;============================================================= ; Grapheme Counting function ; Parameters: ; _str_ptr = pointer to the memory address of the null ; terminated string ; Return values: ; eax = total grapheme count ;============================================================= proc count_utf8_graphemes uses ebx, _str_ptr mov eax, [_str_ptr] mov ebx, 0 .read_loop: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je .done ; Is this accent cmp cl, 0xCC je .skip_count cmp cl, 0xCD je .skip_count ; Check for not a zero width joint cmp cl, 0xE2 jne .not_any_special cmp byte [eax+1], 0x80 jne .not_any_special cmp byte [eax+2], 0x8D jne .not_any_special dec ebx add eax, 3 jmp .read_loop .not_any_special: and cl, 0xC0 ; Is this a continution byte cmp cl, 0x80 je .skip_count inc ebx .skip_count: inc eax jmp .read_loop .done: mov eax, ebx ret endp align 4 @EXPORT: export \ count_utf8_codepoints, "utf8.count_codepoints", \ count_utf8_graphemes, "utf8.count_graphemes"