From 10c141ffd390ec6b5137e32f5969cb0a3bcec5af Mon Sep 17 00:00:00 2001 From: Ivan Baravy Date: Fri, 6 Nov 2020 18:18:04 +0000 Subject: [PATCH] Add encoding.inc to /programs (copy from /kernel/trunk). git-svn-id: svn://kolibrios.org@8152 a494cfbc-eb01-0410-851d-a64ba20cac60 --- programs/encoding.inc | 143 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 143 insertions(+) create mode 100644 programs/encoding.inc diff --git a/programs/encoding.inc b/programs/encoding.inc new file mode 100644 index 0000000000..7a1aa9adec --- /dev/null +++ b/programs/encoding.inc @@ -0,0 +1,143 @@ +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; ;; +;; Copyright (C) KolibriOS team 2013-2015. All rights reserved. ;; +;; Distributed under terms of the GNU General Public License ;; +;; ;; +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; + +; fetch the UTF-8 character in addrspace:offs to char +macro fetch_utf8_char addrspace, offs, char +{ local first_byte, b + ; fetch first byte + load first_byte byte from addrspace:offs + if first_byte < 0x80 + char = first_byte + offs = offs + 1 + else if first_byte < 0xC0 + err Invalid UTF-8 string + else if first_byte < 0xE0 + char = first_byte and 0x1F + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + offs = offs + 2 + else if first_byte < 0xF0 + char = first_byte and 0xF + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 2 + char = (char shl 6) + (b and 0x3F) + offs = offs + 3 + else if first_byte < 0xF8 + char = first_byte and 0x7 + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 2 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 3 + char = (char shl 6) + (b and 0x3F) + offs = offs + 4 + else + err Invalid UTF-8 string + end if +} + +; Worker macro for all encodings. +; Common part for all encodings: map characters 0-0x7F trivially, +; translate pseudographics. +; Pseudographics for the boot screen: +; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF, +; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA +macro convert_utf8 encoding, [arg] +{ common + local ..addrspace, offs, char + offs = 0 + virtual at 0 + ..addrspace:: db arg + ..addrspace#.size = $ + end virtual + while offs < ..addrspace#.size + fetch_utf8_char ..addrspace, offs, char + if char = 0x2500 + db 0xC4 + else if char = 0x2502 + db 0xB3 + else if char = 0x250C + db 0xDA + else if char = 0x2510 + db 0xBF + else if char = 0x2514 + db 0xC0 + else if char = 0x2518 + db 0xD9 + else if char = 0x252C + db 0xC2 + else if char = 0x2534 + db 0xC1 + else if char = 0x2551 + db 0xBA + else if char < 0x80 + db char + else + encoding char + end if + end while +} + +macro declare_encoding encoding +{ + macro encoding [arg] + \{ common convert_utf8 encoding#char, arg \} + struc encoding [arg] + \{ common convert_utf8 encoding#char, arg \} + macro encoding#char char +} + +; Russian: use CP866. +; 0x410-0x43F -> 0x80-0xAF +; 0x440-0x44F -> 0xE0-0xEF +; 0x401 -> 0xF0, 0x451 -> 0xF1 +declare_encoding cp866 +{ + if char = 0x401 + db 0xF0 + else if char = 0x451 + db 0xF1 + else if (char < 0x410) | (char > 0x44F) + err Failed to convert to CP866 + else if char < 0x440 + db char - 0x410 + 0x80 + else + db char - 0x440 + 0xE0 + end if +} + +; Latin-1 encoding +; 0x00-0xFF - trivial map +declare_encoding latin1 +{ + if char < 0x100 + db char + else + err Failed to convert to Latin-1 + end if +} + +; CP850 encoding +declare_encoding cp850 +{ + if char = 0xBF + db 0xA8 + else if char = 0xE1 + db 0xA0 + else if char = 0xE9 + db 0x82 + else if char = 0xED + db 0xA1 + else if char = 0xF3 + db 0xA2 + else if char = 0xFA + db 0xA3 + else + err Failed to convert to CP850 + end if +}