forked from KolibriOS/kolibrios
82d72daa76
git-svn-id: svn://kolibrios.org@7597 a494cfbc-eb01-0410-851d-a64ba20cac60
723 lines
15 KiB
Plaintext
723 lines
15 KiB
Plaintext
(*
|
|
BSD 2-Clause License
|
|
|
|
Copyright (c) 2018, Anton Krotov
|
|
All rights reserved.
|
|
*)
|
|
|
|
MODULE SCAN;
|
|
|
|
IMPORT TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, C := COLLECTIONS;
|
|
|
|
|
|
CONST
|
|
|
|
LEXLEN = 1024;
|
|
|
|
lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3;
|
|
lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
|
|
lxEOF* = 8;
|
|
|
|
lxKW = 101;
|
|
|
|
lxARRAY* = 101; lxBEGIN* = 102; lxBY* = 103; lxCASE* = 104;
|
|
lxCONST* = 105; lxDIV* = 106; lxDO* = 107; lxELSE* = 108;
|
|
lxELSIF* = 109; lxEND* = 110; lxFALSE* = 111; lxFOR* = 112;
|
|
lxIF* = 113; lxIMPORT* = 114; lxIN* = 115; lxIS* = 116;
|
|
lxMOD* = 117; lxMODULE* = 118; lxNIL* = 119; lxOF* = 120;
|
|
lxOR* = 121; lxPOINTER* = 122; lxPROCEDURE* = 123; lxRECORD* = 124;
|
|
lxREPEAT* = 125; lxRETURN* = 126; lxTHEN* = 127; lxTO* = 128;
|
|
lxTRUE* = 129; lxTYPE* = 130; lxUNTIL* = 131; lxVAR* = 132;
|
|
lxWHILE* = 133;
|
|
|
|
lxPLUS* = 201; lxMINUS* = 202; lxMUL* = 203; lxSLASH* = 204;
|
|
lxNOT* = 205; lxAND* = 206; lxPOINT* = 207; lxCOMMA* = 208;
|
|
lxSEMI* = 209; lxBAR* = 210; lxLROUND* = 211; lxLSQUARE* = 212;
|
|
lxLCURLY* = 213; lxCARET* = 214; lxEQ* = 215; lxNE* = 216;
|
|
lxLT* = 217; lxGT* = 218; lxCOLON* = 219; lxRROUND* = 220;
|
|
lxRSQUARE* = 221; lxRCURLY* = 222; lxLE* = 223; lxGE* = 224;
|
|
lxASSIGN* = 225; lxRANGE* = 226;
|
|
|
|
lxERROR01 = -1; lxERROR02 = -2; lxERROR03 = -3; lxERROR04 = -4;
|
|
lxERROR05 = -5; lxERROR06 = -6; lxERROR07 = -7; lxERROR08 = -8;
|
|
lxERROR09 = -9; lxERROR10 = -10; lxERROR11 = -11; lxERROR12 = -12;
|
|
|
|
|
|
TYPE
|
|
|
|
LEXSTR* = ARRAY LEXLEN OF CHAR;
|
|
|
|
IDENT* = POINTER TO RECORD (AVL.DATA)
|
|
|
|
s*: LEXSTR;
|
|
offset*, offsetW*: INTEGER
|
|
|
|
END;
|
|
|
|
POSITION* = RECORD
|
|
|
|
line*, col*: INTEGER
|
|
|
|
END;
|
|
|
|
LEX* = RECORD
|
|
|
|
s*: LEXSTR;
|
|
length*: INTEGER;
|
|
sym*: INTEGER;
|
|
pos*: POSITION;
|
|
ident*: IDENT;
|
|
string*: IDENT;
|
|
value*: ARITH.VALUE;
|
|
error*: INTEGER;
|
|
|
|
over: BOOLEAN
|
|
|
|
END;
|
|
|
|
SCANNER* = POINTER TO RECORD (C.ITEM)
|
|
|
|
text: TEXTDRV.TEXT;
|
|
range: BOOLEAN
|
|
|
|
END;
|
|
|
|
KEYWORD = ARRAY 10 OF CHAR;
|
|
|
|
|
|
VAR
|
|
|
|
vocabulary: RECORD
|
|
|
|
KW: ARRAY 33 OF KEYWORD;
|
|
|
|
delimiters: ARRAY 256 OF BOOLEAN;
|
|
|
|
idents: AVL.NODE;
|
|
ident: IDENT
|
|
|
|
END;
|
|
|
|
scanners: C.COLLECTION;
|
|
|
|
|
|
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
|
|
RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
|
|
END nodecmp;
|
|
|
|
|
|
PROCEDURE key (VAR lex: LEX);
|
|
VAR
|
|
L, R, M: INTEGER;
|
|
|
|
BEGIN
|
|
L := 0;
|
|
R := LEN(vocabulary.KW) - 1;
|
|
M := (L + R) DIV 2;
|
|
|
|
WHILE L # M DO
|
|
IF lex.s > vocabulary.KW[M] THEN
|
|
L := M;
|
|
M := (L + R) DIV 2
|
|
ELSIF lex.s < vocabulary.KW[M] THEN
|
|
R := M;
|
|
M := (L + R) DIV 2
|
|
ELSE
|
|
lex.sym := lxKW + M;
|
|
L := M;
|
|
R := M
|
|
END
|
|
END;
|
|
|
|
IF L # R THEN
|
|
IF lex.s = vocabulary.KW[L] THEN
|
|
lex.sym := lxKW + L
|
|
END;
|
|
|
|
IF lex.s = vocabulary.KW[R] THEN
|
|
lex.sym := lxKW + R
|
|
END
|
|
END
|
|
|
|
END key;
|
|
|
|
|
|
PROCEDURE enterid* (s: LEXSTR): IDENT;
|
|
VAR
|
|
newnode: BOOLEAN;
|
|
node: AVL.NODE;
|
|
|
|
BEGIN
|
|
vocabulary.ident.s := s;
|
|
vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node);
|
|
|
|
IF newnode THEN
|
|
NEW(vocabulary.ident);
|
|
vocabulary.ident.offset := -1;
|
|
vocabulary.ident.offsetW := -1
|
|
END
|
|
|
|
RETURN node.data(IDENT)
|
|
END enterid;
|
|
|
|
|
|
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
|
|
BEGIN
|
|
IF lex.length < LEXLEN - 1 THEN
|
|
lex.s[lex.length] := c;
|
|
INC(lex.length);
|
|
lex.s[lex.length] := 0X
|
|
ELSE
|
|
lex.over := TRUE
|
|
END
|
|
END putchar;
|
|
|
|
|
|
PROCEDURE ident (text: TEXTDRV.TEXT; VAR lex: LEX);
|
|
VAR
|
|
c: CHAR;
|
|
|
|
BEGIN
|
|
c := text.peak(text);
|
|
ASSERT(S.letter(c));
|
|
|
|
WHILE S.letter(c) OR S.digit(c) DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END;
|
|
|
|
IF lex.over THEN
|
|
lex.sym := lxERROR06
|
|
ELSE
|
|
lex.sym := lxIDENT;
|
|
key(lex)
|
|
END;
|
|
|
|
IF lex.sym = lxIDENT THEN
|
|
lex.ident := enterid(lex.s)
|
|
END
|
|
|
|
END ident;
|
|
|
|
|
|
PROCEDURE number (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
|
|
VAR
|
|
c: CHAR;
|
|
hex: BOOLEAN;
|
|
error: INTEGER;
|
|
|
|
BEGIN
|
|
c := text.peak(text);
|
|
ASSERT(S.digit(c));
|
|
|
|
error := 0;
|
|
|
|
range := FALSE;
|
|
|
|
lex.sym := lxINTEGER;
|
|
hex := FALSE;
|
|
|
|
WHILE S.digit(c) DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END;
|
|
|
|
WHILE S.hexdigit(c) DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text);
|
|
hex := TRUE
|
|
END;
|
|
|
|
IF c = "H" THEN
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
lex.sym := lxHEX
|
|
|
|
ELSIF c = "X" THEN
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
lex.sym := lxCHAR
|
|
|
|
ELSIF c = "." THEN
|
|
|
|
IF hex THEN
|
|
lex.sym := lxERROR01
|
|
ELSE
|
|
|
|
text.nextc(text);
|
|
c := text.peak(text);
|
|
|
|
IF c # "." THEN
|
|
putchar(lex, ".");
|
|
lex.sym := lxFLOAT
|
|
ELSE
|
|
lex.sym := lxINTEGER;
|
|
range := TRUE
|
|
END;
|
|
|
|
WHILE S.digit(c) DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END;
|
|
|
|
IF c = "E" THEN
|
|
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text);
|
|
IF (c = "+") OR (c = "-") THEN
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END;
|
|
|
|
IF S.digit(c) THEN
|
|
WHILE S.digit(c) DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END
|
|
ELSE
|
|
lex.sym := lxERROR02
|
|
END
|
|
|
|
END
|
|
|
|
END
|
|
|
|
ELSE
|
|
|
|
IF hex THEN
|
|
lex.sym := lxERROR01
|
|
END
|
|
|
|
END;
|
|
|
|
IF lex.over & (lex.sym >= 0) THEN
|
|
lex.sym := lxERROR07
|
|
END;
|
|
|
|
IF lex.sym = lxINTEGER THEN
|
|
ARITH.iconv(lex.s, lex.value, error)
|
|
ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN
|
|
ARITH.hconv(lex.s, lex.value, error)
|
|
ELSIF lex.sym = lxFLOAT THEN
|
|
ARITH.fconv(lex.s, lex.value, error)
|
|
END;
|
|
|
|
CASE error OF
|
|
|0:
|
|
|1: lex.sym := lxERROR08
|
|
|2: lex.sym := lxERROR09
|
|
|3: lex.sym := lxERROR10
|
|
|4: lex.sym := lxERROR11
|
|
|5: lex.sym := lxERROR12
|
|
END
|
|
|
|
END number;
|
|
|
|
|
|
PROCEDURE string (text: TEXTDRV.TEXT; VAR lex: LEX);
|
|
VAR
|
|
c, c1: CHAR;
|
|
n: INTEGER;
|
|
quot: CHAR;
|
|
|
|
BEGIN
|
|
quot := text.peak(text);
|
|
|
|
ASSERT((quot = '"') OR (quot = "'"));
|
|
|
|
text.nextc(text);
|
|
c := text.peak(text);
|
|
c1 := c;
|
|
n := 0;
|
|
|
|
WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
c := text.peak(text);
|
|
INC(n)
|
|
END;
|
|
|
|
IF c = quot THEN
|
|
text.nextc(text);
|
|
IF lex.over THEN
|
|
lex.sym := lxERROR05
|
|
ELSE
|
|
IF n # 1 THEN
|
|
lex.sym := lxSTRING
|
|
ELSE
|
|
lex.sym := lxCHAR;
|
|
ARITH.setChar(lex.value, ORD(c1))
|
|
END
|
|
END
|
|
ELSE
|
|
lex.sym := lxERROR03
|
|
END;
|
|
|
|
IF lex.sym = lxSTRING THEN
|
|
lex.string := enterid(lex.s);
|
|
lex.value.typ := ARITH.tSTRING;
|
|
lex.value.string := lex.string
|
|
END
|
|
|
|
END string;
|
|
|
|
|
|
PROCEDURE comment (text: TEXTDRV.TEXT);
|
|
VAR
|
|
c: CHAR;
|
|
cond, depth: INTEGER;
|
|
|
|
BEGIN
|
|
cond := 0;
|
|
depth := 1;
|
|
|
|
REPEAT
|
|
|
|
c := text.peak(text);
|
|
text.nextc(text);
|
|
|
|
IF c = "*" THEN
|
|
IF cond = 1 THEN
|
|
cond := 0;
|
|
INC(depth)
|
|
ELSE
|
|
cond := 2
|
|
END
|
|
ELSIF c = ")" THEN
|
|
IF cond = 2 THEN
|
|
DEC(depth)
|
|
END;
|
|
cond := 0
|
|
ELSIF c = "(" THEN
|
|
cond := 1
|
|
ELSE
|
|
cond := 0
|
|
END
|
|
|
|
UNTIL (depth = 0) OR text.eof
|
|
|
|
END comment;
|
|
|
|
|
|
PROCEDURE delimiter (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN);
|
|
VAR
|
|
c: CHAR;
|
|
|
|
BEGIN
|
|
c := text.peak(text);
|
|
|
|
IF range THEN
|
|
ASSERT(c = ".")
|
|
END;
|
|
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
|
|
CASE c OF
|
|
|"+":
|
|
lex.sym := lxPLUS
|
|
|
|
|"-":
|
|
lex.sym := lxMINUS
|
|
|
|
|"*":
|
|
lex.sym := lxMUL
|
|
|
|
|"/":
|
|
lex.sym := lxSLASH;
|
|
|
|
IF text.peak(text) = "/" THEN
|
|
lex.sym := lxCOMMENT;
|
|
REPEAT
|
|
text.nextc(text)
|
|
UNTIL text.eol OR text.eof
|
|
END
|
|
|
|
|"~":
|
|
lex.sym := lxNOT
|
|
|
|
|"&":
|
|
lex.sym := lxAND
|
|
|
|
|".":
|
|
IF range THEN
|
|
|
|
putchar(lex, ".");
|
|
lex.sym := lxRANGE;
|
|
range := FALSE;
|
|
DEC(lex.pos.col)
|
|
|
|
ELSE
|
|
|
|
lex.sym := lxPOINT;
|
|
c := text.peak(text);
|
|
|
|
IF c = "." THEN
|
|
lex.sym := lxRANGE;
|
|
putchar(lex, c);
|
|
text.nextc(text)
|
|
END
|
|
|
|
END
|
|
|
|
|",":
|
|
lex.sym := lxCOMMA
|
|
|
|
|";":
|
|
lex.sym := lxSEMI
|
|
|
|
|"|":
|
|
lex.sym := lxBAR
|
|
|
|
|"(":
|
|
lex.sym := lxLROUND;
|
|
c := text.peak(text);
|
|
|
|
IF c = "*" THEN
|
|
lex.sym := lxCOMMENT;
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
comment(text)
|
|
END
|
|
|
|
|"[":
|
|
lex.sym := lxLSQUARE
|
|
|
|
|"{":
|
|
lex.sym := lxLCURLY
|
|
|
|
|"^":
|
|
lex.sym := lxCARET
|
|
|
|
|"=":
|
|
lex.sym := lxEQ
|
|
|
|
|"#":
|
|
lex.sym := lxNE
|
|
|
|
|"<":
|
|
lex.sym := lxLT;
|
|
c := text.peak(text);
|
|
|
|
IF c = "=" THEN
|
|
lex.sym := lxLE;
|
|
putchar(lex, c);
|
|
text.nextc(text)
|
|
END
|
|
|
|
|">":
|
|
lex.sym := lxGT;
|
|
c := text.peak(text);
|
|
|
|
IF c = "=" THEN
|
|
lex.sym := lxGE;
|
|
putchar(lex, c);
|
|
text.nextc(text)
|
|
END
|
|
|
|
|":":
|
|
lex.sym := lxCOLON;
|
|
c := text.peak(text);
|
|
|
|
IF c = "=" THEN
|
|
lex.sym := lxASSIGN;
|
|
putchar(lex, c);
|
|
text.nextc(text)
|
|
END
|
|
|
|
|")":
|
|
lex.sym := lxRROUND
|
|
|
|
|"]":
|
|
lex.sym := lxRSQUARE
|
|
|
|
|"}":
|
|
lex.sym := lxRCURLY
|
|
|
|
END
|
|
|
|
END delimiter;
|
|
|
|
|
|
PROCEDURE Next* (scanner: SCANNER; VAR lex: LEX);
|
|
VAR
|
|
c: CHAR;
|
|
text: TEXTDRV.TEXT;
|
|
|
|
BEGIN
|
|
text := scanner.text;
|
|
|
|
REPEAT
|
|
|
|
c := text.peak(text);
|
|
|
|
WHILE S.space(c) DO
|
|
text.nextc(text);
|
|
c := text.peak(text)
|
|
END;
|
|
|
|
lex.s[0] := 0X;
|
|
lex.length := 0;
|
|
lex.sym := lxUNDEF;
|
|
lex.pos.line := text.line;
|
|
lex.pos.col := text.col;
|
|
lex.ident := NIL;
|
|
lex.over := FALSE;
|
|
|
|
IF S.letter(c) THEN
|
|
ident(text, lex)
|
|
ELSIF S.digit(c) THEN
|
|
number(text, lex, scanner.range)
|
|
ELSIF (c = '"') OR (c = "'") THEN
|
|
string(text, lex)
|
|
ELSIF vocabulary.delimiters[ORD(c)] THEN
|
|
delimiter(text, lex, scanner.range)
|
|
ELSIF c = 0X THEN
|
|
lex.sym := lxEOF;
|
|
IF text.eof THEN
|
|
INC(lex.pos.col)
|
|
END
|
|
ELSE
|
|
putchar(lex, c);
|
|
text.nextc(text);
|
|
lex.sym := lxERROR04
|
|
END;
|
|
|
|
IF lex.sym < 0 THEN
|
|
lex.error := -lex.sym
|
|
ELSE
|
|
lex.error := 0
|
|
END
|
|
|
|
UNTIL lex.sym # lxCOMMENT
|
|
|
|
END Next;
|
|
|
|
|
|
PROCEDURE NewScanner (): SCANNER;
|
|
VAR
|
|
scan: SCANNER;
|
|
citem: C.ITEM;
|
|
|
|
BEGIN
|
|
citem := C.pop(scanners);
|
|
IF citem = NIL THEN
|
|
NEW(scan)
|
|
ELSE
|
|
scan := citem(SCANNER)
|
|
END
|
|
|
|
RETURN scan
|
|
END NewScanner;
|
|
|
|
|
|
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
|
|
VAR
|
|
scanner: SCANNER;
|
|
text: TEXTDRV.TEXT;
|
|
|
|
BEGIN
|
|
text := TEXTDRV.create();
|
|
IF text.open(text, name) THEN
|
|
scanner := NewScanner();
|
|
scanner.text := text;
|
|
scanner.range := FALSE
|
|
ELSE
|
|
scanner := NIL;
|
|
TEXTDRV.destroy(text)
|
|
END
|
|
|
|
RETURN scanner
|
|
END open;
|
|
|
|
|
|
PROCEDURE close* (VAR scanner: SCANNER);
|
|
BEGIN
|
|
IF scanner # NIL THEN
|
|
IF scanner.text # NIL THEN
|
|
TEXTDRV.destroy(scanner.text)
|
|
END;
|
|
|
|
C.push(scanners, scanner);
|
|
scanner := NIL
|
|
END
|
|
END close;
|
|
|
|
|
|
PROCEDURE init;
|
|
VAR
|
|
i: INTEGER;
|
|
delim: ARRAY 23 OF CHAR;
|
|
|
|
PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD);
|
|
BEGIN
|
|
vocabulary.KW[i] := kw;
|
|
INC(i)
|
|
END enterkw;
|
|
|
|
BEGIN
|
|
scanners := C.create();
|
|
|
|
FOR i := 0 TO 255 DO
|
|
vocabulary.delimiters[i] := FALSE
|
|
END;
|
|
|
|
delim := "+-*/~&.,;|([{^=#<>:)]}";
|
|
|
|
FOR i := 0 TO LEN(delim) - 2 DO
|
|
vocabulary.delimiters[ORD(delim[i])] := TRUE
|
|
END;
|
|
|
|
i := 0;
|
|
enterkw(i, "ARRAY");
|
|
enterkw(i, "BEGIN");
|
|
enterkw(i, "BY");
|
|
enterkw(i, "CASE");
|
|
enterkw(i, "CONST");
|
|
enterkw(i, "DIV");
|
|
enterkw(i, "DO");
|
|
enterkw(i, "ELSE");
|
|
enterkw(i, "ELSIF");
|
|
enterkw(i, "END");
|
|
enterkw(i, "FALSE");
|
|
enterkw(i, "FOR");
|
|
enterkw(i, "IF");
|
|
enterkw(i, "IMPORT");
|
|
enterkw(i, "IN");
|
|
enterkw(i, "IS");
|
|
enterkw(i, "MOD");
|
|
enterkw(i, "MODULE");
|
|
enterkw(i, "NIL");
|
|
enterkw(i, "OF");
|
|
enterkw(i, "OR");
|
|
enterkw(i, "POINTER");
|
|
enterkw(i, "PROCEDURE");
|
|
enterkw(i, "RECORD");
|
|
enterkw(i, "REPEAT");
|
|
enterkw(i, "RETURN");
|
|
enterkw(i, "THEN");
|
|
enterkw(i, "TO");
|
|
enterkw(i, "TRUE");
|
|
enterkw(i, "TYPE");
|
|
enterkw(i, "UNTIL");
|
|
enterkw(i, "VAR");
|
|
enterkw(i, "WHILE");
|
|
|
|
NEW(vocabulary.ident);
|
|
vocabulary.ident.s := "";
|
|
vocabulary.ident.offset := -1;
|
|
vocabulary.ident.offsetW := -1;
|
|
vocabulary.idents := NIL
|
|
END init;
|
|
|
|
|
|
BEGIN
|
|
init
|
|
END SCAN. |