Files
kolibrios/programs/develop/libraries/libunicode/libunicode.asm
Swarnadeep Paul 4ff3024108 Convert initial libunicode into library structure
Edit the example file to reflect the changes
2026-03-18 02:15:50 +05:30

123 lines
3.0 KiB
NASM

; libunicode -- KolibriOS Unicode Library
;
; Copyright (C) <2026> KolibriOS.org Team
; Author:
; 1. Swarnadeep Paul <swarnadeep@mail.com>
;
; This program is free software: you can redistribute it and/or modify it under
; the terms of the GNU General Public License as published by the Free Software
; Foundation, either version 2 of the License, or (at your option) any later
; version.
;
; This program is distributed in the hope that it will be useful, but WITHOUT
; ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
; FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details.
;
; You should have received a copy of the GNU General Public License along with
; this program. If not, see <http://www.gnu.org/licenses/>.
format MS COFF
public @EXPORT as 'EXPORTS'
include "macros.inc"
include "proc32.inc"
section '.flat' code readable align 16
;=============================================================
; CodePoint Counting function
; Parameters:
; eax = pointer to the memory address of the null
; terminated string
; Return values:
; eax = total codepoints
; -------PRIVATE--------
; ebx <- counter of codepoints
; ecx <- each byte
;
;=============================================================
proc count_utf8_codepoints
mov ebx, 0
.read_loop:
mov cl, byte [eax]
test cl, cl ; if it is an ending byte (0)
je .done
and cl, 0xC0
cmp cl, 0x80
je .skip_count
inc ebx
.skip_count:
inc eax
jmp .read_loop
.done:
mov eax, ebx
ret
endp
;=============================================================
; eax <- pointer to the memory address of the string
; * Do not use other register because it may be overwritten.
; -------PRIVATE--------
; ebx <- counter of grapheme
; ecx <- each byte
;
;=============================================================
proc count_utf8_graphemes
mov ebx, 0
.read_loop:
mov cl, byte [eax]
test cl, cl ; if it is an ending byte (0)
je .done
; Is this accent
cmp cl, 0xCC
je .skip_count
cmp cl, 0xCD
je .skip_count
; Check for not a zero width joint
cmp cl, 0xE2
jne .not_any_special
cmp byte [eax+1], 0x80
jne .not_any_special
cmp byte [eax+2], 0x8D
jne .not_any_special
dec ebx
add eax, 3
jmp .read_loop
.not_any_special:
and cl, 0xC0 ; Is this a continution byte
cmp cl, 0x80
je .skip_count
inc ebx
.skip_count:
inc eax
jmp .read_loop
.done:
mov eax, ebx
ret
endp
align 4
@EXPORT:
export \
count_utf8_codepoints, "utf8.count_codepoints", \
count_utf8_graphemes, "utf8.count_graphemes"