761 lines
16 KiB
Plaintext
Raw Normal View History

(*
BSD 2-Clause License
Copyright (c) 2018-2020, Anton Krotov
All rights reserved.
*)
MODULE SCAN;
IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, ERRORS, LISTS;
CONST
LEXLEN = 1024;
lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3;
lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7;
lxEOF* = 8;
lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24;
lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28;
lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32;
lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36;
lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40;
lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44;
lxASSIGN* = 45; lxRANGE* = 46;
lxKW = 51;
lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54;
lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58;
lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62;
lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66;
lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70;
lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74;
lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78;
lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82;
lxWHILE* = 83;
lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4;
lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8;
lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12;
lxERROR13* = -13;
TYPE
LEXSTR* = ARRAY LEXLEN OF CHAR;
DEF = POINTER TO RECORD (LISTS.ITEM)
ident: LEXSTR
END;
IDENT* = POINTER TO RECORD (AVL.DATA)
s*: LEXSTR;
offset*, offsetW*: INTEGER;
key: INTEGER
END;
POSITION* = RECORD
line*, col*: INTEGER
END;
LEX* = RECORD
s*: LEXSTR;
length*: INTEGER;
sym*: INTEGER;
pos*: POSITION;
ident*: IDENT;
string*: IDENT;
value*: ARITH.VALUE;
error*: INTEGER;
over: BOOLEAN
END;
SCANNER* = TXT.TEXT;
VAR
idents: AVL.NODE;
delimiters: ARRAY 256 OF BOOLEAN;
NewIdent: IDENT;
upto, LowerCase, _if: BOOLEAN;
def: LISTS.LIST;
PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER;
RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s)
END nodecmp;
PROCEDURE enterid* (s: LEXSTR): IDENT;
VAR
newnode: BOOLEAN;
node: AVL.NODE;
BEGIN
NewIdent.s := s;
idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node);
IF newnode THEN
NEW(NewIdent);
NewIdent.offset := -1;
NewIdent.offsetW := -1;
NewIdent.key := 0
END
RETURN node.data(IDENT)
END enterid;
PROCEDURE putchar (VAR lex: LEX; c: CHAR);
BEGIN
IF lex.length < LEXLEN - 1 THEN
lex.s[lex.length] := c;
INC(lex.length);
lex.s[lex.length] := 0X
ELSE
lex.over := TRUE
END
END putchar;
PROCEDURE nextc (text: TXT.TEXT): CHAR;
BEGIN
TXT.next(text)
RETURN text.peak
END nextc;
PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX);
VAR
c: CHAR;
BEGIN
c := text.peak;
ASSERT(S.letter(c));
WHILE S.letter(c) OR S.digit(c) DO
putchar(lex, c);
c := nextc(text)
END;
IF lex.over THEN
lex.sym := lxERROR06
ELSE
lex.ident := enterid(lex.s);
IF lex.ident.key # 0 THEN
lex.sym := lex.ident.key
ELSE
lex.sym := lxIDENT
END
END
END ident;
PROCEDURE number (text: TXT.TEXT; VAR lex: LEX);
VAR
c: CHAR;
hex: BOOLEAN;
error, sym: INTEGER;
BEGIN
c := text.peak;
ASSERT(S.digit(c));
error := 0;
sym := lxINTEGER;
hex := FALSE;
WHILE S.digit(c) DO
putchar(lex, c);
c := nextc(text)
END;
WHILE S.hexdigit(c) DO
putchar(lex, c);
c := nextc(text);
hex := TRUE
END;
IF c = "H" THEN
putchar(lex, c);
TXT.next(text);
sym := lxHEX
ELSIF c = "X" THEN
putchar(lex, c);
TXT.next(text);
sym := lxCHAR
ELSIF c = "." THEN
IF hex THEN
sym := lxERROR01
ELSE
c := nextc(text);
IF c # "." THEN
putchar(lex, ".");
sym := lxFLOAT
ELSE
sym := lxINTEGER;
text.peak := 7FX;
upto := TRUE
END;
WHILE S.digit(c) DO
putchar(lex, c);
c := nextc(text)
END;
IF c = "E" THEN
putchar(lex, c);
c := nextc(text);
IF (c = "+") OR (c = "-") THEN
putchar(lex, c);
c := nextc(text)
END;
IF S.digit(c) THEN
WHILE S.digit(c) DO
putchar(lex, c);
c := nextc(text)
END
ELSE
sym := lxERROR02
END
END
END
ELSIF hex THEN
sym := lxERROR01
END;
IF lex.over & (sym >= 0) THEN
sym := lxERROR07
END;
IF sym = lxINTEGER THEN
ARITH.iconv(lex.s, lex.value, error)
ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN
ARITH.hconv(lex.s, lex.value, error)
ELSIF sym = lxFLOAT THEN
ARITH.fconv(lex.s, lex.value, error)
END;
CASE error OF
|0:
|1: sym := lxERROR08
|2: sym := lxERROR09
|3: sym := lxERROR10
|4: sym := lxERROR11
|5: sym := lxERROR12
END;
lex.sym := sym
END number;
PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR);
VAR
c: CHAR;
n: INTEGER;
BEGIN
c := nextc(text);
n := 0;
WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO
putchar(lex, c);
c := nextc(text);
INC(n)
END;
IF c = quot THEN
TXT.next(text);
IF lex.over THEN
lex.sym := lxERROR05
ELSE
IF n # 1 THEN
lex.sym := lxSTRING
ELSE
lex.sym := lxCHAR;
ARITH.setChar(lex.value, ORD(lex.s[0]))
END
END
ELSE
lex.sym := lxERROR03
END;
IF lex.sym = lxSTRING THEN
lex.string := enterid(lex.s);
lex.value.typ := ARITH.tSTRING;
lex.value.string := lex.string
END
END string;
PROCEDURE comment (text: TXT.TEXT);
VAR
c: CHAR;
cond, depth: INTEGER;
BEGIN
cond := 0;
depth := 1;
REPEAT
c := text.peak;
TXT.next(text);
IF c = "*" THEN
IF cond = 1 THEN
cond := 0;
INC(depth)
ELSE
cond := 2
END
ELSIF c = ")" THEN
IF cond = 2 THEN
DEC(depth)
END;
cond := 0
ELSIF c = "(" THEN
cond := 1
ELSE
cond := 0
END
UNTIL (depth = 0) OR text.eof
END comment;
PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR);
VAR
sym: INTEGER;
BEGIN
putchar(lex, c);
c := nextc(text);
CASE lex.s[0] OF
|"+":
sym := lxPLUS
|"-":
sym := lxMINUS
|"*":
sym := lxMUL
|"/":
sym := lxSLASH;
IF c = "/" THEN
sym := lxCOMMENT;
REPEAT
TXT.next(text)
UNTIL text.eol OR text.eof
END
|"~":
sym := lxNOT
|"&":
sym := lxAND
|".":
sym := lxPOINT;
IF c = "." THEN
sym := lxRANGE;
putchar(lex, c);
TXT.next(text)
END
|",":
sym := lxCOMMA
|";":
sym := lxSEMI
|"|":
sym := lxBAR
|"(":
sym := lxLROUND;
IF c = "*" THEN
sym := lxCOMMENT;
TXT.next(text);
comment(text)
END
|"[":
sym := lxLSQUARE
|"{":
sym := lxLCURLY
|"^":
sym := lxCARET
|"=":
sym := lxEQ
|"#":
sym := lxNE
|"<":
sym := lxLT;
IF c = "=" THEN
sym := lxLE;
putchar(lex, c);
TXT.next(text)
END
|">":
sym := lxGT;
IF c = "=" THEN
sym := lxGE;
putchar(lex, c);
TXT.next(text)
END
|":":
sym := lxCOLON;
IF c = "=" THEN
sym := lxASSIGN;
putchar(lex, c);
TXT.next(text)
END
|")":
sym := lxRROUND
|"]":
sym := lxRSQUARE
|"}":
sym := lxRCURLY
END;
lex.sym := sym
END delimiter;
PROCEDURE Next* (text: SCANNER; VAR lex: LEX);
VAR
c: CHAR;
PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER);
BEGIN
IF ~cond THEN
ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno)
END
END check;
PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN;
VAR
cur: DEF;
BEGIN
cur := def.first(DEF);
WHILE (cur # NIL) & (cur.ident # str) DO
cur := cur.next(DEF)
END
RETURN cur # NIL
END IsDef;
PROCEDURE Skip (text: SCANNER);
VAR
i: INTEGER;
BEGIN
i := 0;
WHILE (i <= text.ifc) & ~text._skip[i] DO
INC(i)
END;
text.skip := i <= text.ifc
END Skip;
PROCEDURE prep_if (text: SCANNER; VAR lex: LEX);
VAR
skip: BOOLEAN;
BEGIN
INC(text.ifc);
text._elsif[text.ifc] := lex.sym = lxELSIF;
IF lex.sym = lxIF THEN
INC(text.elsec);
text._else[text.elsec] := FALSE
END;
_if := TRUE;
skip := TRUE;
text.skip := FALSE;
Next(text, lex);
check(lex.sym = lxLROUND, text, lex, 64);
Next(text, lex);
check(lex.sym = lxIDENT, text, lex, 22);
REPEAT
IF IsDef(lex.s) THEN
skip := FALSE
END;
Next(text, lex);
IF lex.sym = lxBAR THEN
Next(text, lex);
check(lex.sym = lxIDENT, text, lex, 22)
ELSE
check(lex.sym = lxRROUND, text, lex, 33)
END
UNTIL lex.sym = lxRROUND;
_if := FALSE;
text._skip[text.ifc] := skip;
Skip(text);
Next(text, lex)
END prep_if;
PROCEDURE prep_end (text: SCANNER; VAR lex: LEX);
BEGIN
check(text.ifc > 0, text, lex, 118);
IF lex.sym = lxEND THEN
WHILE text._elsif[text.ifc] DO
DEC(text.ifc)
END;
DEC(text.ifc);
DEC(text.elsec)
ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
check(~text._else[text.elsec], text, lex, 118);
text._skip[text.ifc] := ~text._skip[text.ifc];
text._else[text.elsec] := lex.sym = lxELSE
END;
Skip(text);
IF lex.sym = lxELSIF THEN
prep_if(text, lex)
ELSE
Next(text, lex)
END
END prep_end;
BEGIN
REPEAT
c := text.peak;
WHILE S.space(c) DO
c := nextc(text)
END;
lex.s[0] := 0X;
lex.length := 0;
lex.pos.line := text.line;
lex.pos.col := text.col;
lex.ident := NIL;
lex.over := FALSE;
IF S.letter(c) THEN
ident(text, lex)
ELSIF S.digit(c) THEN
number(text, lex)
ELSIF (c = '"') OR (c = "'") THEN
string(text, lex, c)
ELSIF delimiters[ORD(c)] THEN
delimiter(text, lex, c)
ELSIF c = "$" THEN
IF S.letter(nextc(text)) THEN
ident(text, lex);
IF lex.sym = lxIF THEN
IF ~_if THEN
prep_if(text, lex)
END
ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN
IF ~_if THEN
prep_end(text, lex)
END
ELSE
check(FALSE, text, lex, 119)
END
ELSE
check(FALSE, text, lex, 119)
END
ELSIF c = 0X THEN
lex.sym := lxEOF;
text.skip := FALSE;
IF text.eof THEN
INC(lex.pos.col)
END
ELSIF (c = 7FX) & upto THEN
upto := FALSE;
lex.sym := lxRANGE;
putchar(lex, ".");
putchar(lex, ".");
DEC(lex.pos.col);
TXT.next(text)
ELSE
putchar(lex, c);
TXT.next(text);
lex.sym := lxERROR04
END;
IF lex.sym < 0 THEN
lex.error := -lex.sym
ELSE
lex.error := 0
END
UNTIL (lex.sym # lxCOMMENT) & ~text.skip
END Next;
PROCEDURE open* (name: ARRAY OF CHAR): SCANNER;
RETURN TXT.open(name)
END open;
PROCEDURE close* (VAR scanner: SCANNER);
BEGIN
TXT.close(scanner)
END close;
PROCEDURE init* (lower: BOOLEAN);
VAR
i: INTEGER;
delim: ARRAY 23 OF CHAR;
PROCEDURE enterkw (key: INTEGER; kw: LEXSTR);
VAR
id: IDENT;
upper: LEXSTR;
BEGIN
IF LowerCase THEN
id := enterid(kw);
id.key := key
END;
upper := kw;
S.UpCase(upper);
id := enterid(upper);
id.key := key
END enterkw;
BEGIN
upto := FALSE;
LowerCase := lower;
FOR i := 0 TO 255 DO
delimiters[i] := FALSE
END;
delim := "+-*/~&.,;|([{^=#<>:)]}";
FOR i := 0 TO LEN(delim) - 2 DO
delimiters[ORD(delim[i])] := TRUE
END;
NEW(NewIdent);
NewIdent.s := "";
NewIdent.offset := -1;
NewIdent.offsetW := -1;
NewIdent.key := 0;
idents := NIL;
enterkw(lxARRAY, "array");
enterkw(lxBEGIN, "begin");
enterkw(lxBY, "by");
enterkw(lxCASE, "case");
enterkw(lxCONST, "const");
enterkw(lxDIV, "div");
enterkw(lxDO, "do");
enterkw(lxELSE, "else");
enterkw(lxELSIF, "elsif");
enterkw(lxEND, "end");
enterkw(lxFALSE, "false");
enterkw(lxFOR, "for");
enterkw(lxIF, "if");
enterkw(lxIMPORT, "import");
enterkw(lxIN, "in");
enterkw(lxIS, "is");
enterkw(lxMOD, "mod");
enterkw(lxMODULE, "module");
enterkw(lxNIL, "nil");
enterkw(lxOF, "of");
enterkw(lxOR, "or");
enterkw(lxPOINTER, "pointer");
enterkw(lxPROCEDURE, "procedure");
enterkw(lxRECORD, "record");
enterkw(lxREPEAT, "repeat");
enterkw(lxRETURN, "return");
enterkw(lxTHEN, "then");
enterkw(lxTO, "to");
enterkw(lxTRUE, "true");
enterkw(lxTYPE, "type");
enterkw(lxUNTIL, "until");
enterkw(lxVAR, "var");
enterkw(lxWHILE, "while")
END init;
PROCEDURE NewDef* (str: ARRAY OF CHAR);
VAR
item: DEF;
BEGIN
NEW(item);
COPY(str, item.ident);
LISTS.push(def, item)
END NewDef;
BEGIN
def := LISTS.create(NIL)
END SCAN.