kolibrios-gitea/programs/develop/cedit/SRC/RW.ob07
Anton Krotov 0d7861018a CEDIT: EOL conversion (CRLF/LF/CR)
git-svn-id: svn://kolibrios.org@9180 a494cfbc-eb01-0410-851d-a64ba20cac60
2021-09-06 21:06:31 +00:00

560 lines
12 KiB
Plaintext

(*
Copyright 2021 Anton Krotov
This file is part of CEdit.
CEdit is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
CEdit is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with CEdit. If not, see <http://www.gnu.org/licenses/>.
*)
MODULE RW;
IMPORT
File, SYSTEM, KOSAPI, E := Encodings,
CB := Clipboard, Lines;
CONST
CR = 0DX; LF = 0AX; TAB = 9X; SPACE = 20X;
BOM = 0FEFFX;
BUF_SIZE = 65536;
NAME_LEN = 1024;
EOL_CRLF* = 0; EOL_LF* = 1; EOL_CR* = 2;
TYPE
tFileName* = ARRAY NAME_LEN OF CHAR;
tEOL = ARRAY 3 OF WCHAR;
tInput* = POINTER TO RECORD
buffer: INTEGER;
pos, cnt: INTEGER;
CR: BOOLEAN;
clipbrd: BOOLEAN;
getChar: PROCEDURE (file: tInput): INTEGER
END;
tOutput* = POINTER TO RECORD
handle: File.FS;
buffer: ARRAY BUF_SIZE OF BYTE;
pos: INTEGER;
eol: tEOL;
putChar: PROCEDURE (file: tOutput; code: INTEGER): BOOLEAN
END;
VAR
eol*: ARRAY 3 OF tEOL;
eolNames*: ARRAY 3, 16 OF WCHAR;
PROCEDURE getByte (file: tInput): BYTE;
VAR
res: BYTE;
BEGIN
IF file.cnt > 0 THEN
SYSTEM.GET8(file.buffer + file.pos, res);
INC(file.pos);
DEC(file.cnt)
ELSE
res := 0
END
RETURN res
END getByte;
PROCEDURE peakByte (file: tInput): BYTE;
VAR
res: BYTE;
BEGIN
IF file.cnt > 0 THEN
SYSTEM.GET8(file.buffer + file.pos, res)
ELSE
res := 0
END
RETURN res
END peakByte;
PROCEDURE getCharUTF8 (file: tInput): INTEGER;
VAR
code, n: INTEGER;
b: BYTE;
BEGIN
b := getByte(file);
IF b <= 07FH THEN
n := 0
ELSIF (0C0H <= b) & (b <= 0DFH) THEN
DEC(b, 0C0H);
n := 1
ELSIF (0E0H <= b) & (b <= 0EFH) THEN
DEC(b, 0E0H);
n := 2
ELSIF (0F0H <= b) & (b <= 0F7H) THEN
DEC(b, 0F0H);
n := 3
ELSIF (0F8H <= b) & (b <= 0FBH) THEN
DEC(b, 0F8H);
n := 4
ELSIF (0FCH <= b) & (b <= 0FDH) THEN
DEC(b, 0FCH);
n := 5
ELSIF b = 0FEH THEN
b := 0;
n := 6
ELSIF b = 0FFH THEN
n := -1
ELSIF (080H <= b) & (b <= 0BFH) THEN
n := -1
END;
code := b;
IF n > 2 THEN
n := -1
END;
WHILE n > 0 DO
DEC(n);
b := peakByte(file);
IF (080H <= b) & (b <= 0BFH) THEN
code := code*64 + getByte(file) - 080H
ELSE
n := -1
END
END;
IF n = -1 THEN
code := E.UNDEF
END
RETURN code
END getCharUTF8;
PROCEDURE getCharW1251 (file: tInput): INTEGER;
RETURN E.cpW1251[getByte(file)]
END getCharW1251;
PROCEDURE getCharCP866 (file: tInput): INTEGER;
RETURN E.cp866[getByte(file)]
END getCharCP866;
PROCEDURE getCharUTF16LE (file: tInput): INTEGER;
RETURN getByte(file) + getByte(file) * 256
END getCharUTF16LE;
PROCEDURE getString* (file: tInput; line: Lines.tLine; tabs: BOOLEAN; VAR eol: BOOLEAN): INTEGER;
VAR
c: WCHAR;
i, L, k, n: INTEGER;
s: ARRAY 1000 OF WCHAR;
BEGIN
L := LEN(s);
eol := FALSE;
n := 0;
i := ORD(file.cnt > 0) - 1;
WHILE (file.cnt > 0) & ~eol DO
c := WCHR(file.getChar(file) MOD 65536);
IF c = Lines.TAB1 THEN
c := SPACE
END;
IF c = CR THEN
eol := TRUE;
file.CR := TRUE
ELSIF (c = LF) OR (c = 0X) THEN
IF ~file.CR THEN
eol := TRUE
END;
file.CR := FALSE
ELSIF c = TAB THEN
k := Lines.tab - i MOD Lines.tab;
IF tabs THEN
s[i] := TAB
ELSE
s[i] := SPACE
END;
INC(i);
DEC(k);
WHILE k > 0 DO
IF tabs THEN
s[i] := Lines.TAB1
ELSE
s[i] := SPACE
END;
INC(i);
IF i = L THEN
Lines.concat(line, s);
INC(n, i);
i := 0
END;
DEC(k)
END;
file.CR := FALSE
ELSIF c = BOM THEN
file.CR := FALSE
ELSE
s[i] := c;
INC(i);
IF i = L THEN
Lines.concat(line, s);
INC(n, i);
i := 0
END;
file.CR := FALSE
END
END;
IF i >= 0 THEN
s[i] := 0X;
Lines.concat(line, s);
END;
INC(n, i)
RETURN n
END getString;
PROCEDURE detectEncoding (text: tInput): INTEGER;
VAR
pos, cnt, res: INTEGER;
continue, bom: BOOLEAN;
b: BYTE;
cp866, w1251: INTEGER;
BEGIN
pos := text.pos;
cnt := text.cnt;
continue := TRUE;
WHILE (text.cnt > 0) & continue DO
IF getByte(text) > 127 THEN
continue := FALSE
END
END;
text.cnt := cnt;
text.pos := pos;
IF continue THEN
res := E.CP866
ELSE
bom := getCharUTF8(text) = ORD(BOM);
continue := TRUE;
text.cnt := cnt;
text.pos := pos;
WHILE (text.cnt > 0) & continue DO
IF getCharUTF8(text) = E.UNDEF THEN
continue := FALSE
END
END;
IF continue THEN
IF bom THEN
res := E.UTF8BOM
ELSE
res := E.UTF8
END
ELSE
text.cnt := cnt;
text.pos := pos;
cp866 := 0;
w1251 := 0;
WHILE text.cnt > 0 DO
b := getByte(text);
IF b > 127 THEN
IF b >= 192 THEN
INC(w1251)
ELSE
INC(cp866)
END
END
END;
IF w1251 > cp866 THEN
res := E.W1251
ELSE
res := E.CP866
END
END;
text.cnt := cnt;
text.pos := pos
END
RETURN res
END detectEncoding;
PROCEDURE detectEOL (text: tInput): INTEGER;
VAR
pos, cnt, c, res: INTEGER;
BEGIN
res := -1;
pos := text.pos;
cnt := text.cnt;
WHILE (text.cnt > 0) & (res = -1) DO
c := text.getChar(text);
IF c = 10 THEN
res := EOL_LF
ELSIF c = 13 THEN
IF text.getChar(text) = 10 THEN
res := EOL_CRLF
ELSE
res := EOL_CR
END
END
END;
text.cnt := cnt;
text.pos := pos;
IF res = -1 THEN
res := EOL_CRLF
END
RETURN res
END detectEOL;
PROCEDURE load* (name: tFileName; VAR enc, eol: INTEGER): tInput;
VAR
res: tInput;
fsize: INTEGER;
BEGIN
NEW(res);
res.pos := 0;
res.CR := FALSE;
res.getChar := NIL;
res.clipbrd := FALSE;
fsize := File.FileSize(name);
IF fsize = 0 THEN
res.buffer := KOSAPI.malloc(4096);
ASSERT(res.buffer # 0);
res.cnt := 0
ELSE
res.buffer := File.Load(name, res.cnt)
END;
IF res.buffer = 0 THEN
DISPOSE(res)
ELSE
enc := detectEncoding(res);
IF (enc = E.UTF8BOM) OR (enc = E.UTF8) THEN
res.getChar := getCharUTF8
ELSIF enc = E.CP866 THEN
res.getChar := getCharCP866
ELSIF enc = E.W1251 THEN
res.getChar := getCharW1251
END;
eol := detectEOL(res)
END
RETURN res
END load;
PROCEDURE clipboard* (): tInput;
VAR
res: tInput;
BEGIN
NEW(res);
res.pos := 0;
res.CR := FALSE;
res.clipbrd := TRUE;
res.getChar := NIL;
res.getChar := getCharCP866;
res.buffer := CB.get(res.cnt);
IF res.buffer = 0 THEN
DISPOSE(res)
END
RETURN res
END clipboard;
PROCEDURE putByte (file: tOutput; b: BYTE);
VAR
c: INTEGER;
BEGIN
IF file.pos = BUF_SIZE THEN
c := File.Write(file.handle, SYSTEM.ADR(file.buffer[0]), BUF_SIZE);
file.pos := 0
END;
file.buffer[file.pos] := b;
INC(file.pos)
END putByte;
PROCEDURE putString* (file: tOutput; line: Lines.tLine; n: INTEGER): INTEGER;
VAR
i: INTEGER;
c: WCHAR;
err: BOOLEAN;
BEGIN
i := 0;
err := FALSE;
WHILE (i < n) & ~err DO
c := Lines.getChar(line, i);
IF c # Lines.TAB1 THEN
IF ~file.putChar(file, ORD(c)) THEN
err := TRUE;
DEC(i)
END
END;
INC(i)
END
RETURN i
END putString;
PROCEDURE newLine* (file: tOutput): BOOLEAN;
VAR
i: INTEGER;
BEGIN
i := 0;
WHILE (file.eol[i] # 0X) & file.putChar(file, ORD(file.eol[i])) DO
INC(i)
END
RETURN i = LENGTH(file.eol)
END newLine;
PROCEDURE putCharUTF8 (file: tOutput; code: INTEGER): BOOLEAN;
VAR
res: BOOLEAN;
BEGIN
res := TRUE;
IF code <= 7FH THEN
putByte(file, code)
ELSIF (80H <= code) & (code <= 7FFH) THEN
putByte(file, code DIV 64 + 0C0H);
putByte(file, code MOD 64 + 080H)
ELSIF (800H <= code) & (code <= 0FFFFH) THEN
putByte(file, code DIV 4096 + 0E0H);
putByte(file, (code DIV 64) MOD 64 + 080H);
putByte(file, code MOD 64 + 080H)
ELSE
res := FALSE
END
RETURN res
END putCharUTF8;
PROCEDURE putCharW1251 (file: tOutput; code: INTEGER): BOOLEAN;
VAR
n: INTEGER;
res: BOOLEAN;
BEGIN
res := TRUE;
n := E.UNI[code, E.W1251];
IF n # E.UNDEF THEN
putByte(file, n)
ELSE
res := FALSE
END
RETURN res
END putCharW1251;
PROCEDURE putCharCP866 (file: tOutput; code: INTEGER): BOOLEAN;
VAR
n: INTEGER;
res: BOOLEAN;
BEGIN
res := TRUE;
n := E.UNI[code, E.CP866];
IF n # E.UNDEF THEN
putByte(file, n)
ELSE
res := FALSE
END
RETURN res
END putCharCP866;
PROCEDURE putCharUTF16LE (file: tOutput; code: INTEGER): BOOLEAN;
VAR
res: BOOLEAN;
BEGIN
IF (0 <= code) & (code <= 65535) THEN
res := TRUE;
putByte(file, code MOD 256);
putByte(file, code DIV 256)
ELSE
res := FALSE
END
RETURN res
END putCharUTF16LE;
PROCEDURE close* (VAR file: tOutput): BOOLEAN;
VAR
res: BOOLEAN;
BEGIN
res := TRUE;
IF file # NIL THEN
IF file.handle # NIL THEN
IF file.pos > 0 THEN
res := File.Write(file.handle, SYSTEM.ADR(file.buffer[0]), file.pos) = file.pos
END;
File.Close(file.handle)
END;
DISPOSE(file)
END
RETURN res
END close;
PROCEDURE create* (name: tFileName; enc, nl: INTEGER): tOutput;
VAR
res: tOutput;
BEGIN
NEW(res);
res.pos := 0;
res.eol := eol[nl];
res.putChar := NIL;
IF (enc = E.UTF8) OR (enc = E.UTF8BOM) THEN
res.putChar := putCharUTF8;
IF enc = E.UTF8BOM THEN
ASSERT(res.putChar(res, ORD(BOM)))
END
ELSIF enc = E.UTF16LE THEN
res.putChar := putCharUTF16LE;
ELSIF enc = E.W1251 THEN
res.putChar := putCharW1251
ELSIF enc = E.CP866 THEN
res.putChar := putCharCP866
END;
ASSERT(res.putChar # NIL);
res.handle := File.Create(name);
IF res.handle = NIL THEN
DISPOSE(res)
END
RETURN res
END create;
PROCEDURE destroy* (VAR file: tInput);
BEGIN
IF file # NIL THEN
IF file.buffer # 0 THEN
file.buffer := KOSAPI.free(file.buffer - 12*ORD(file.clipbrd))
END;
DISPOSE(file)
END
END destroy;
BEGIN
eol[EOL_CRLF] := CR + LF;
eol[EOL_LF] := LF;
eol[EOL_CR] := CR;
eolNames[EOL_CRLF] := "CRLF";
eolNames[EOL_LF] := "LF";
eolNames[EOL_CR] := "CR"
END RW.