diff --git a/kernel/trunk/encoding.inc b/kernel/trunk/encoding.inc index 29522bf66d..deea5f405c 100644 --- a/kernel/trunk/encoding.inc +++ b/kernel/trunk/encoding.inc @@ -7,171 +7,139 @@ $Revision$ - -; fetch the UTF-8 character in string+offs to char -; common part for all encodings: translate pseudographics -; Pseudographics for the boot screen: -; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF, -; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA -macro fetch_utf8_char string, offs, char, graph +; fetch the UTF-8 character in addrspace:offs to char +macro fetch_utf8_char addrspace, offs, char { local first_byte, b - virtual at 0 - db string - if offs >= $ - char = -1 - else - ; fetch first byte - load first_byte byte from offs - if first_byte < 0x80 - char = first_byte - offs = offs + 1 - else if first_byte < 0xC0 - .err Invalid UTF-8 string - else if first_byte < 0xE0 - char = first_byte and 0x1F - load b byte from offs + 1 - char = (char shl 6) + (b and 0x3F) - offs = offs + 2 - else if first_byte < 0xF0 - char = first_byte and 0xF - load b byte from offs + 1 - char = (char shl 6) + (b and 0x3F) - load b byte from offs + 2 - char = (char shl 6) + (b and 0x3F) - offs = offs + 3 - else if first_byte < 0xF8 - char = first_byte and 0x7 - load b byte from offs + 1 - char = (char shl 6) + (b and 0x3F) - load b byte from offs + 2 - char = (char shl 6) + (b and 0x3F) - load b byte from offs + 3 - char = (char shl 6) + (b and 0x3F) - offs = offs + 4 - else - .err Invalid UTF-8 string - end if - end if - end virtual - if char = 0x2500 - graph = 0xC4 - else if char = 0x2502 - graph = 0xB3 - else if char = 0x250C - graph = 0xDA - else if char = 0x2510 - graph = 0xBF - else if char = 0x2514 - graph = 0xC0 - else if char = 0x2518 - graph = 0xD9 - else if char = 0x252C - graph = 0xC2 - else if char = 0x2534 - graph = 0xC1 - else if char = 0x2551 - graph = 0xBA + ; fetch first byte + load first_byte byte from addrspace:offs + if first_byte < 0x80 + char = first_byte + offs = offs + 1 + else if first_byte < 0xC0 + err Invalid UTF-8 string + else if first_byte < 0xE0 + char = first_byte and 0x1F + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + offs = offs + 2 + else if first_byte < 0xF0 + char = first_byte and 0xF + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 2 + char = (char shl 6) + (b and 0x3F) + offs = offs + 3 + else if first_byte < 0xF8 + char = first_byte and 0x7 + load b byte from addrspace:offs + 1 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 2 + char = (char shl 6) + (b and 0x3F) + load b byte from addrspace:offs + 3 + char = (char shl 6) + (b and 0x3F) + offs = offs + 4 else - graph = 0 + err Invalid UTF-8 string end if } -; Russian: use CP866. -; 0x00-0x7F - trivial map -; 0x410-0x43F -> 0x80-0xAF -; 0x440-0x44F -> 0xE0-0xEF -; 0x401 -> 0xF0, 0x451 -> 0xF1 -macro cp866 [arg] -{ local offs, char, graph +; Worker macro for all encodings. +; Common part for all encodings: map characters 0-0x7F trivially, +; translate pseudographics. +; Pseudographics for the boot screen: +; 0x2500 -> 0xC4, 0x2502 -> 0xB3, 0x250C -> 0xDA, 0x2510 -> 0xBF, +; 0x2514 -> 0xC0, 0x2518 -> 0xD9, 0x252C -> 0xC2, 0x2534 -> 0xC1, 0x2551 -> 0xBA +macro convert_utf8 encoding, [arg] +{ common + local ..addrspace, offs, char offs = 0 - while 1 - fetch_utf8_char arg, offs, char, graph - if char = -1 - break - end if - if graph - db graph + virtual at 0 + ..addrspace:: db arg + ..addrspace#.size = $ + end virtual + while offs < ..addrspace#.size + fetch_utf8_char ..addrspace, offs, char + if char = 0x2500 + db 0xC4 + else if char = 0x2502 + db 0xB3 + else if char = 0x250C + db 0xDA + else if char = 0x2510 + db 0xBF + else if char = 0x2514 + db 0xC0 + else if char = 0x2518 + db 0xD9 + else if char = 0x252C + db 0xC2 + else if char = 0x2534 + db 0xC1 + else if char = 0x2551 + db 0xBA else if char < 0x80 db char - else if char = 0x401 - db 0xF0 - else if char = 0x451 - db 0xF1 - else if (char < 0x410) | (char > 0x44F) - .err Failed to convert to CP866 - else if char < 0x440 - db char - 0x410 + 0x80 else - db char - 0x440 + 0xE0 + encoding char end if end while } -struc cp866 [arg] +macro declare_encoding encoding { -common - cp866 arg + macro encoding [arg] + \{ common convert_utf8 encoding#char, arg \} + struc encoding [arg] + \{ common convert_utf8 encoding#char, arg \} + macro encoding#char char +} + +; Russian: use CP866. +; 0x410-0x43F -> 0x80-0xAF +; 0x440-0x44F -> 0xE0-0xEF +; 0x401 -> 0xF0, 0x451 -> 0xF1 +declare_encoding cp866 +{ + if char = 0x401 + db 0xF0 + else if char = 0x451 + db 0xF1 + else if (char < 0x410) | (char > 0x44F) + err Failed to convert to CP866 + else if char < 0x440 + db char - 0x410 + 0x80 + else + db char - 0x440 + 0xE0 + end if } ; Latin-1 encoding ; 0x00-0xFF - trivial map -macro latin1 [arg] -{ local offs, char, graph - offs = 0 - while 1 - fetch_utf8_char arg, offs, char, graph - if char = -1 - break - end if - if graph - db graph - else if char < 0x100 - db char - else - .err Failed to convert to Latin-1 - end if - end while -} - -struc latin1 [arg] +declare_encoding latin1 { -common - latin1 arg + if char < 0x100 + db char + else + err Failed to convert to Latin-1 + end if } ; CP850 encoding -macro cp850 [arg] -{ local offs, char, graph - offs = 0 - while 1 - fetch_utf8_char arg, offs, char, graph - if char = -1 - break - end if - if graph - db graph - else if char < 0x80 - db char - else if char = 0xBF - db 0xA8 - else if char = 0xE1 - db 0xA0 - else if char = 0xE9 - db 0x82 - else if char = 0xED - db 0xA1 - else if char = 0xF3 - db 0xA2 - else if char = 0xFA - db 0xA3 - else - err Failed to convert to CP850 - end if - end while -} - -struc cp850 [arg] +declare_encoding cp850 { -common - cp850 arg + if char = 0xBF + db 0xA8 + else if char = 0xE1 + db 0xA0 + else if char = 0xE9 + db 0x82 + else if char = 0xED + db 0xA1 + else if char = 0xF3 + db 0xA2 + else if char = 0xFA + db 0xA3 + else + err Failed to convert to CP850 + end if }