Bugfixes in UTF-8 character counting for IRC client.

git-svn-id: svn://kolibrios.org@6596 a494cfbc-eb01-0410-851d-a64ba20cac60
This commit is contained in:
hidnplayr 2016-10-15 13:44:06 +00:00
parent 9a7d86dfc0
commit a8a15eb424
2 changed files with 11 additions and 19 deletions

View File

@ -13,7 +13,7 @@
;; ;; ;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
version equ '0.28' version equ '0.29'
; connection status ; connection status
STATUS_DISCONNECTED = 0 STATUS_DISCONNECTED = 0

View File

@ -23,9 +23,8 @@ text_insert_newlines: ; esi = ASCIIZ string
.more: .more:
dec ecx dec ecx
jz .end_of_line jz .end_of_line
.next_byte:
lodsb ; get one character of the string lodsb ; get one character of the string
test al, 0x80
jnz .multiball!
test al, al ; end of string? test al, al ; end of string?
jz .almost_done jz .almost_done
cmp al, ' ' ; it's a space! remember its position cmp al, ' ' ; it's a space! remember its position
@ -33,7 +32,12 @@ text_insert_newlines: ; esi = ASCIIZ string
cmp al, 13 ; we already inserted a newline once, make it a space again cmp al, 13 ; we already inserted a newline once, make it a space again
je .soft_nl je .soft_nl
cmp al, 10 ; it's a newline, continue onto the next line cmp al, 10 ; it's a newline, continue onto the next line
jne .more je .newline
and al, 0xc0 ; Is it a multi byte UTF8 char?
cmp al, 0x80
je .next_byte
jmp .more
.newline:
inc edx inc edx
jmp .next_line jmp .next_line
.soft_nl: .soft_nl:
@ -54,19 +58,6 @@ text_insert_newlines: ; esi = ASCIIZ string
ret ret
.multiball!:
add esi, 4
and al, 11111000b
cmp al, 11110000b
je .more
dec esi
and al, 11110000b
cmp al, 11100000b
je .more
dec esi
jmp .more
;---------------------------------- ;----------------------------------
; scan untill next line is reached ; scan untill next line is reached
@ -86,13 +77,14 @@ text_nextline:
je .done je .done
cmp al, 13 cmp al, 13
je .done je .done
and al, 0xc0
cmp al, 0x80
je .loop ; This byte is the second, third or fourth byte of a multi-byte UTF8 char
dec ecx dec ecx
jnz .loop jnz .loop
.done: .done:
ret ret
;---------------------------------- ;----------------------------------
; print string ; print string
; ;