;============================================================= ; eax <- pointer to the memory address of the string ; * Do not use other register because it may be overwritten. ; -------PRIVATE-------- ; ebx <- counter of codepoints ; ecx <- each byte ; ;============================================================= count_utf8_codepoints: mov ebx, 0 read_loop: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je done and cl, 0xC0 cmp cl, 0x80 je skip_count inc ebx skip_count: inc eax jmp read_loop done: mov eax, ebx ret ;============================================================= ; eax <- pointer to the memory address of the string ; * Do not use other register because it may be overwritten. ; -------PRIVATE-------- ; ebx <- counter of grapheme ; ecx <- each byte ; ;============================================================= count_utf8_gramphene: mov ebx, 0 read_loop_graph: mov cl, byte [eax] test cl, cl ; if it is an ending byte (0) je done_graph ; Is this accent cmp cl, 0xCC je skip_count_graph cmp cl, 0xCD je skip_count_graph ; Check for not a zero width joint cmp cl, 0xE2 jne not_any_special cmp byte [eax+1], 0x80 jne not_any_special cmp byte [eax+2], 0x8D jne not_any_special dec ebx add eax, 3 jmp read_loop_graph not_any_special: and cl, 0xC0 ; Is this a continution byte cmp cl, 0x80 je skip_count_graph inc ebx skip_count_graph: inc eax jmp read_loop_graph done_graph: mov eax, ebx ret