kolibrios/programs/other/fb2reader/SRC/Encoding.ob07

267 lines
6.7 KiB
Plaintext
Raw Normal View History

(*
Copyright 2016, 2023 Anton Krotov
This file is part of fb2read.
fb2read is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
fb2read is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with fb2read. If not, see <http://www.gnu.org/licenses/>.
*)
MODULE Encoding;
IMPORT SYSTEM;
CONST
TABLE_SIZE = 65536;
TYPE
tUtf8* = ARRAY 4 OF CHAR;
tCodePage* = ARRAY 256 OF RECORD code*, len*: INTEGER; utf8*: tUtf8 END;
VAR
cp1250*, cp1251*, cp1252*, cp866*: tCodePage;
table1251: ARRAY TABLE_SIZE OF BYTE;
PROCEDURE getUtf8Char* (VAR ptr, size: INTEGER): INTEGER;
VAR
c: BYTE;
n, k, code: INTEGER;
end: BOOLEAN;
BEGIN
code := 0;
end := FALSE;
REPEAT
SYSTEM.GET(ptr, c);
INC(ptr);
DEC(size);
CASE c OF
| 0..127:
code := c;
end := TRUE
|128..191:
code := code * 64 + c MOD 64;
DEC(n);
end := n <= 0
|192..255:
k := LSL(c, 24);
n := -2;
REPEAT
k := ROR(k, -1);
INC(n)
UNTIL k MOD 2 = 0;
k := LSL(c, n + 25);
code := LSR(k, n + 25)
END
UNTIL (size = 0) OR end
RETURN code
END getUtf8Char;
PROCEDURE convert1251* (src, dst: INTEGER; len: INTEGER);
VAR
val, size: INTEGER;
BEGIN
WHILE len > 0 DO
size := 0;
val := getUtf8Char(src, size);
IF (0 <= val) & (val < TABLE_SIZE) THEN
SYSTEM.PUT8(dst, table1251[val])
ELSE
SYSTEM.PUT8(dst, ORD("?"))
END;
INC(dst);
DEC(len)
END
END convert1251;
PROCEDURE utf8* (code: INTEGER; VAR utf8char: tUtf8);
BEGIN
utf8char[0] := 0X;
IF code < 80H THEN
utf8char[0] := CHR(code);
utf8char[1] := 0X
ELSIF code < 800H THEN
utf8char[1] := CHR(code MOD 64 + 80H);
utf8char[0] := CHR(code DIV 64 + 0C0H);
utf8char[2] := 0X
ELSIF code < 10000H THEN
utf8char[2] := CHR(code MOD 64 + 80H);
code := code DIV 64;
utf8char[1] := CHR(code MOD 64 + 80H);
utf8char[0] := CHR(code DIV 64 + 0E0H);
utf8char[3] := 0X
(*
ELSIF code < 200000H THEN
ELSIF code < 4000000H THEN
ELSE *)
END
END utf8;
PROCEDURE ucs2to1251 (code: INTEGER): BYTE;
VAR
i: INTEGER;
BEGIN
i := 255;
WHILE (i >= 0) & (cp1251[i].code # code) DO
DEC(i)
END;
IF i < 0 THEN
i := ORD("?")
END
RETURN i
END ucs2to1251;
PROCEDURE initCP (VAR cp: tCodePage);
VAR
i: INTEGER;
BEGIN
FOR i := 0H TO 7FH DO
cp[i].code := i
END;
FOR i := 0H TO 0FFH DO
utf8(cp[i].code, cp[i].utf8);
cp[i].len := LENGTH(cp[i].utf8)
END
END initCP;
PROCEDURE init8 (VAR cp: tCodePage; VAR n: INTEGER; a, b, c, d, e, f, g, h: INTEGER);
BEGIN
cp[n].code := a; INC(n);
cp[n].code := b; INC(n);
cp[n].code := c; INC(n);
cp[n].code := d; INC(n);
cp[n].code := e; INC(n);
cp[n].code := f; INC(n);
cp[n].code := g; INC(n);
cp[n].code := h; INC(n);
END init8;
PROCEDURE init1250 (VAR cp: tCodePage);
VAR
n: INTEGER;
BEGIN
n := 80H;
init8(cp, n, 20ACH, 20H, 201AH, 20H, 201EH, 2026H, 2020H, 2021H);
init8(cp, n, 20H, 2030H, 0160H, 2039H, 015AH, 0164H, 017DH, 0179H);
init8(cp, n, 20H, 2018H, 2019H, 201CH, 201DH, 2022H, 2013H, 2014H);
init8(cp, n, 20H, 2122H, 0161H, 203AH, 015BH, 0165H, 017EH, 017AH);
init8(cp, n, 00A0H, 02C7H, 02D8H, 0141H, 00A4H, 0104H, 00A6H, 00A7H);
init8(cp, n, 00A8H, 00A9H, 015EH, 00ABH, 00ACH, 00ADH, 00AEH, 017BH);
init8(cp, n, 00B0H, 00B1H, 02DBH, 0142H, 00B4H, 00B5H, 00B6H, 00B7H);
init8(cp, n, 00B8H, 0105H, 015FH, 00BBH, 013DH, 02DDH, 013EH, 017CH);
init8(cp, n, 0154H, 00C1H, 00C2H, 0102H, 00C4H, 0139H, 0106H, 00C7H);
init8(cp, n, 010CH, 00C9H, 0118H, 00CBH, 011AH, 00CDH, 00CEH, 010EH);
init8(cp, n, 0110H, 0143H, 0147H, 00D3H, 00D4H, 0150H, 00D6H, 00D7H);
init8(cp, n, 0158H, 016EH, 00DAH, 0170H, 00DCH, 00DDH, 0162H, 00DFH);
init8(cp, n, 0155H, 00E1H, 00E2H, 0103H, 00E4H, 013AH, 0107H, 00E7H);
init8(cp, n, 010DH, 00E9H, 0119H, 00EBH, 011BH, 00EDH, 00EEH, 010FH);
init8(cp, n, 0111H, 0144H, 0148H, 00F3H, 00F4H, 0151H, 00F6H, 00F7H);
init8(cp, n, 0159H, 016FH, 00FAH, 0171H, 00FCH, 00FDH, 0163H, 02D9H);
initCP(cp)
END init1250;
PROCEDURE init1251 (VAR cp: tCodePage);
VAR
n, i: INTEGER;
BEGIN
n := 80H;
init8(cp, n, 0402H, 0403H, 201AH, 0453H, 201EH, 2026H, 2020H, 2021H);
init8(cp, n, 20ACH, 2030H, 0409H, 2039H, 040AH, 040CH, 040BH, 040FH);
init8(cp, n, 0452H, 2018H, 2019H, 201CH, 201DH, 2022H, 2013H, 2014H);
init8(cp, n, 20H, 2122H, 0459H, 203AH, 045AH, 045CH, 045BH, 045FH);
init8(cp, n, 00A0H, 040EH, 045EH, 0408H, 00A4H, 0490H, 00A6H, 00A7H);
init8(cp, n, 0401H, 00A9H, 0404H, 00ABH, 00ACH, 00ADH, 00AEH, 0407H);
init8(cp, n, 00B0H, 00B1H, 0406H, 0456H, 0491H, 00B5H, 00B6H, 00B7H);
init8(cp, n, 0451H, 2116H, 0454H, 00BBH, 0458H, 0405H, 0455H, 0457H);
FOR i := 0410H TO 044FH DO
cp[i - 350H].code := i
END;
initCP(cp)
END init1251;
PROCEDURE init1252 (VAR cp: tCodePage);
VAR
n, i: INTEGER;
BEGIN
n := 80H;
init8(cp, n, 20ACH, 20H, 201AH, 0192H, 201EH, 2026H, 2020H, 2021H);
init8(cp, n, 02C6H, 2030H, 0160H, 2039H, 0152H, 20H, 017DH, 20H);
init8(cp, n, 20H, 2018H, 2019H, 201CH, 201DH, 2022H, 2013H, 2014H);
init8(cp, n, 02DCH, 2122H, 0161H, 203AH, 0153H, 20H, 017EH, 0178H);
FOR i := 0A0H TO 0FFH DO
cp[i].code := i
END;
initCP(cp)
END init1252;
PROCEDURE init866 (VAR cp: tCodePage);
VAR
n, i: INTEGER;
BEGIN
FOR i := 0410H TO 043FH DO
cp[i - 0410H + 80H].code := i
END;
FOR i := 0440H TO 044FH DO
cp[i - 0440H + 0E0H].code := i
END;
n := 0B0H;
init8(cp, n, 2591H, 2592H, 2593H, 2502H, 2524H, 2561H, 2562H, 2556H);
init8(cp, n, 2555H, 2563H, 2551H, 2557H, 255DH, 255CH, 255BH, 2510H);
init8(cp, n, 2514H, 2534H, 252CH, 251CH, 2500H, 253CH, 255EH, 255FH);
init8(cp, n, 255AH, 2554H, 2569H, 2566H, 2560H, 2550H, 256CH, 2567H);
init8(cp, n, 2568H, 2564H, 2565H, 2559H, 2558H, 2552H, 2553H, 256BH);
init8(cp, n, 256AH, 2518H, 250CH, 2588H, 2584H, 258CH, 2590H, 2580H);
n := 0F0H;
init8(cp, n, 0401H, 0451H, 0404H, 0454H, 0407H, 0457H, 040EH, 045EH);
init8(cp, n, 00B0H, 2219H, 00B7H, 221AH, 2116H, 00A4H, 25A0H, 00A0H);
initCP(cp)
END init866;
PROCEDURE init;
VAR
i: INTEGER;
BEGIN
init1250(cp1250);
init1251(cp1251);
init1252(cp1252);
init866(cp866);
FOR i := 0 TO TABLE_SIZE - 1 DO
table1251[i] := ucs2to1251(i)
END
END init;
BEGIN
init
END Encoding.