1
0
kolibrios/programs/encoding.inc

144 lines
3.4 KiB
PHP
Raw Normal View History

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Copyright (C) KolibriOS team 2013-2015. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License ;;
;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; fetch the UTF-8 character in addrspace:offs to char
macro fetch_utf8_char addrspace, offs, char
{ local first_byte, b
; fetch first byte
load first_byte byte from addrspace:offs
if first_byte < 0x80
char = first_byte
offs = offs + 1
else if first_byte < 0xC0
err Invalid UTF-8 string
else if first_byte < 0xE0
char = first_byte and 0x1F
load b byte from addrspace:offs + 1
char = (char shl 6) + (b and 0x3F)
offs = offs + 2
else if first_byte < 0xF0
char = first_byte and 0xF
load b byte from addrspace:offs + 1
char = (char shl 6) + (b and 0x3F)
load b byte from addrspace:offs + 2
char = (char shl 6) + (b and 0x3F)
offs = offs + 3
else if first_byte < 0xF8
char = first_byte and 0x7
load b byte from addrspace:offs + 1
char = (char shl 6) + (b and 0x3F)
load b byte from addrspace:offs + 2
char = (char shl 6) + (b and 0x3F)
load b byte from addrspace:offs + 3
char = (char shl 6) + (b and 0x3F)
offs = offs + 4
else
err Invalid UTF-8 string
end if
}
; Worker macro for all encodings.
; Common part for all encodings: map characters 0-0x7F trivially,
; translate pseudographics.
; Pseudographics for the boot screen:
; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF,
; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA
macro convert_utf8 encoding, [arg]
{ common
local ..addrspace, offs, char
offs = 0
virtual at 0
..addrspace:: db arg
..addrspace#.size = $
end virtual
while offs < ..addrspace#.size
fetch_utf8_char ..addrspace, offs, char
if char = 0x2500
db 0xC4
else if char = 0x2502
db 0xB3
else if char = 0x250C
db 0xDA
else if char = 0x2510
db 0xBF
else if char = 0x2514
db 0xC0
else if char = 0x2518
db 0xD9
else if char = 0x252C
db 0xC2
else if char = 0x2534
db 0xC1
else if char = 0x2551
db 0xBA
else if char < 0x80
db char
else
encoding char
end if
end while
}
macro declare_encoding encoding
{
macro encoding [arg]
\{ common convert_utf8 encoding#char, arg \}
struc encoding [arg]
\{ common convert_utf8 encoding#char, arg \}
macro encoding#char char
}
; Russian: use CP866.
; 0x410-0x43F -> 0x80-0xAF
; 0x440-0x44F -> 0xE0-0xEF
; 0x401 -> 0xF0, 0x451 -> 0xF1
declare_encoding cp866
{
if char = 0x401
db 0xF0
else if char = 0x451
db 0xF1
else if (char < 0x410) | (char > 0x44F)
err Failed to convert to CP866
else if char < 0x440
db char - 0x410 + 0x80
else
db char - 0x440 + 0xE0
end if
}
; Latin-1 encoding
; 0x00-0xFF - trivial map
declare_encoding latin1
{
if char < 0x100
db char
else
err Failed to convert to Latin-1
end if
}
; CP850 encoding
declare_encoding cp850
{
if char = 0xBF
db 0xA8
else if char = 0xE1
db 0xA0
else if char = 0xE9
db 0x82
else if char = 0xED
db 0xA1
else if char = 0xF3
db 0xA2
else if char = 0xFA
db 0xA3
else
err Failed to convert to CP850
end if
}