;============================================================= ; eax <- pointer to the memory address of the string ; * Do not use other register because it may be overwritten. ; -------PRIVATE-------- ; ebx <- counter of codepoints ; ecx <- each byte ; ;============================================================= count_utf8_codepoints: mov ebx, 0 .read_loop: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je .done and cl, 0xC0 cmp cl, 0x80 je .skip_count inc ebx .skip_count: inc eax jmp .read_loop .done: mov eax, ebx ret ;============================================================= ; eax <- pointer to the memory address of the string ; * Do not use other register because it may be overwritten. ; -------PRIVATE-------- ; ebx <- counter of grapheme ; ecx <- each byte ; ;============================================================= count_utf8_gramphene: mov ebx, 0 .read_loop: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je .done ; Is this accent cmp cl, 0xCC je .skip_count cmp cl, 0xCD je .skip_count ; Check for not a zero width joint cmp cl, 0xE2 jne .not_any_special cmp byte [eax+1], 0x80 jne .not_any_special cmp byte [eax+2], 0x8D jne .not_any_special dec ebx add eax, 3 jmp .read_loop .not_any_special: and cl, 0xC0 ; Is this a continution byte cmp cl, 0x80 je .skip_count inc ebx .skip_count: inc eax jmp .read_loop .done: mov eax, ebx ret