(* BSD 2-Clause License Copyright (c) 2018-2020, Anton Krotov All rights reserved. *) MODULE SCAN; IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, ERRORS, LISTS; CONST LEXLEN = 1024; lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; lxEOF* = 8; lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; lxASSIGN* = 45; lxRANGE* = 46; lxKW = 51; lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; lxWHILE* = 83; lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; lxERROR13* = -13; TYPE LEXSTR* = ARRAY LEXLEN OF CHAR; DEF = POINTER TO RECORD (LISTS.ITEM) ident: LEXSTR END; IDENT* = POINTER TO RECORD (AVL.DATA) s*: LEXSTR; offset*, offsetW*: INTEGER; key: INTEGER END; POSITION* = RECORD line*, col*: INTEGER END; LEX* = RECORD s*: LEXSTR; length*: INTEGER; sym*: INTEGER; pos*: POSITION; ident*: IDENT; string*: IDENT; value*: ARITH.VALUE; error*: INTEGER; over: BOOLEAN END; SCANNER* = TXT.TEXT; VAR idents: AVL.NODE; delimiters: ARRAY 256 OF BOOLEAN; NewIdent: IDENT; upto, LowerCase, _if: BOOLEAN; def: LISTS.LIST; PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) END nodecmp; PROCEDURE enterid* (s: LEXSTR): IDENT; VAR newnode: BOOLEAN; node: AVL.NODE; BEGIN NewIdent.s := s; idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node); IF newnode THEN NEW(NewIdent); NewIdent.offset := -1; NewIdent.offsetW := -1; NewIdent.key := 0 END RETURN node.data(IDENT) END enterid; PROCEDURE putchar (VAR lex: LEX; c: CHAR); BEGIN IF lex.length < LEXLEN - 1 THEN lex.s[lex.length] := c; INC(lex.length); lex.s[lex.length] := 0X ELSE lex.over := TRUE END END putchar; PROCEDURE nextc (text: TXT.TEXT): CHAR; BEGIN TXT.next(text) RETURN text.peak END nextc; PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; BEGIN c := text.peak; ASSERT(S.letter(c)); WHILE S.letter(c) OR S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF lex.over THEN lex.sym := lxERROR06 ELSE lex.ident := enterid(lex.s); IF lex.ident.key # 0 THEN lex.sym := lex.ident.key ELSE lex.sym := lxIDENT END END END ident; PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; hex: BOOLEAN; error, sym: INTEGER; BEGIN c := text.peak; ASSERT(S.digit(c)); error := 0; sym := lxINTEGER; hex := FALSE; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; WHILE S.hexdigit(c) DO putchar(lex, c); c := nextc(text); hex := TRUE END; IF c = "H" THEN putchar(lex, c); TXT.next(text); sym := lxHEX ELSIF c = "X" THEN putchar(lex, c); TXT.next(text); sym := lxCHAR ELSIF c = "." THEN IF hex THEN sym := lxERROR01 ELSE c := nextc(text); IF c # "." THEN putchar(lex, "."); sym := lxFLOAT ELSE sym := lxINTEGER; text.peak := 7FX; upto := TRUE END; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF c = "E" THEN putchar(lex, c); c := nextc(text); IF (c = "+") OR (c = "-") THEN putchar(lex, c); c := nextc(text) END; IF S.digit(c) THEN WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END ELSE sym := lxERROR02 END END END ELSIF hex THEN sym := lxERROR01 END; IF lex.over & (sym >= 0) THEN sym := lxERROR07 END; IF sym = lxINTEGER THEN ARITH.iconv(lex.s, lex.value, error) ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN ARITH.hconv(lex.s, lex.value, error) ELSIF sym = lxFLOAT THEN ARITH.fconv(lex.s, lex.value, error) END; CASE error OF |0: |1: sym := lxERROR08 |2: sym := lxERROR09 |3: sym := lxERROR10 |4: sym := lxERROR11 |5: sym := lxERROR12 END; lex.sym := sym END number; PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); VAR c: CHAR; n: INTEGER; BEGIN c := nextc(text); n := 0; WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO putchar(lex, c); c := nextc(text); INC(n) END; IF c = quot THEN TXT.next(text); IF lex.over THEN lex.sym := lxERROR05 ELSE IF n # 1 THEN lex.sym := lxSTRING ELSE lex.sym := lxCHAR; ARITH.setChar(lex.value, ORD(lex.s[0])) END END ELSE lex.sym := lxERROR03 END; IF lex.sym = lxSTRING THEN lex.string := enterid(lex.s); lex.value.typ := ARITH.tSTRING; lex.value.string := lex.string END END string; PROCEDURE comment (text: TXT.TEXT); VAR c: CHAR; cond, depth: INTEGER; BEGIN cond := 0; depth := 1; REPEAT c := text.peak; TXT.next(text); IF c = "*" THEN IF cond = 1 THEN cond := 0; INC(depth) ELSE cond := 2 END ELSIF c = ")" THEN IF cond = 2 THEN DEC(depth) END; cond := 0 ELSIF c = "(" THEN cond := 1 ELSE cond := 0 END UNTIL (depth = 0) OR text.eof END comment; PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); VAR sym: INTEGER; BEGIN putchar(lex, c); c := nextc(text); CASE lex.s[0] OF |"+": sym := lxPLUS |"-": sym := lxMINUS |"*": sym := lxMUL |"/": sym := lxSLASH; IF c = "/" THEN sym := lxCOMMENT; REPEAT TXT.next(text) UNTIL text.eol OR text.eof END |"~": sym := lxNOT |"&": sym := lxAND |".": sym := lxPOINT; IF c = "." THEN sym := lxRANGE; putchar(lex, c); TXT.next(text) END |",": sym := lxCOMMA |";": sym := lxSEMI |"|": sym := lxBAR |"(": sym := lxLROUND; IF c = "*" THEN sym := lxCOMMENT; TXT.next(text); comment(text) END |"[": sym := lxLSQUARE |"{": sym := lxLCURLY |"^": sym := lxCARET |"=": sym := lxEQ |"#": sym := lxNE |"<": sym := lxLT; IF c = "=" THEN sym := lxLE; putchar(lex, c); TXT.next(text) END |">": sym := lxGT; IF c = "=" THEN sym := lxGE; putchar(lex, c); TXT.next(text) END |":": sym := lxCOLON; IF c = "=" THEN sym := lxASSIGN; putchar(lex, c); TXT.next(text) END |")": sym := lxRROUND |"]": sym := lxRSQUARE |"}": sym := lxRCURLY END; lex.sym := sym END delimiter; PROCEDURE Next* (text: SCANNER; VAR lex: LEX); VAR c: CHAR; PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER); BEGIN IF ~cond THEN ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno) END END check; PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN; VAR cur: DEF; BEGIN cur := def.first(DEF); WHILE (cur # NIL) & (cur.ident # str) DO cur := cur.next(DEF) END RETURN cur # NIL END IsDef; PROCEDURE Skip (text: SCANNER); VAR i: INTEGER; BEGIN i := 0; WHILE (i <= text.ifc) & ~text._skip[i] DO INC(i) END; text.skip := i <= text.ifc END Skip; PROCEDURE prep_if (text: SCANNER; VAR lex: LEX); VAR skip: BOOLEAN; BEGIN INC(text.ifc); text._elsif[text.ifc] := lex.sym = lxELSIF; IF lex.sym = lxIF THEN INC(text.elsec); text._else[text.elsec] := FALSE END; _if := TRUE; skip := TRUE; text.skip := FALSE; Next(text, lex); check(lex.sym = lxLROUND, text, lex, 64); Next(text, lex); check(lex.sym = lxIDENT, text, lex, 22); REPEAT IF IsDef(lex.s) THEN skip := FALSE END; Next(text, lex); IF lex.sym = lxBAR THEN Next(text, lex); check(lex.sym = lxIDENT, text, lex, 22) ELSE check(lex.sym = lxRROUND, text, lex, 33) END UNTIL lex.sym = lxRROUND; _if := FALSE; text._skip[text.ifc] := skip; Skip(text); Next(text, lex) END prep_if; PROCEDURE prep_end (text: SCANNER; VAR lex: LEX); BEGIN check(text.ifc > 0, text, lex, 118); IF lex.sym = lxEND THEN WHILE text._elsif[text.ifc] DO DEC(text.ifc) END; DEC(text.ifc); DEC(text.elsec) ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN check(~text._else[text.elsec], text, lex, 118); text._skip[text.ifc] := ~text._skip[text.ifc]; text._else[text.elsec] := lex.sym = lxELSE END; Skip(text); IF lex.sym = lxELSIF THEN prep_if(text, lex) ELSE Next(text, lex) END END prep_end; BEGIN REPEAT c := text.peak; WHILE S.space(c) DO c := nextc(text) END; lex.s[0] := 0X; lex.length := 0; lex.pos.line := text.line; lex.pos.col := text.col; lex.ident := NIL; lex.over := FALSE; IF S.letter(c) THEN ident(text, lex) ELSIF S.digit(c) THEN number(text, lex) ELSIF (c = '"') OR (c = "'") THEN string(text, lex, c) ELSIF delimiters[ORD(c)] THEN delimiter(text, lex, c) ELSIF c = "$" THEN IF S.letter(nextc(text)) THEN ident(text, lex); IF lex.sym = lxIF THEN IF ~_if THEN prep_if(text, lex) END ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN IF ~_if THEN prep_end(text, lex) END ELSE check(FALSE, text, lex, 119) END ELSE check(FALSE, text, lex, 119) END ELSIF c = 0X THEN lex.sym := lxEOF; text.skip := FALSE; IF text.eof THEN INC(lex.pos.col) END ELSIF (c = 7FX) & upto THEN upto := FALSE; lex.sym := lxRANGE; putchar(lex, "."); putchar(lex, "."); DEC(lex.pos.col); TXT.next(text) ELSE putchar(lex, c); TXT.next(text); lex.sym := lxERROR04 END; IF lex.sym < 0 THEN lex.error := -lex.sym ELSE lex.error := 0 END UNTIL (lex.sym # lxCOMMENT) & ~text.skip END Next; PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; RETURN TXT.open(name) END open; PROCEDURE close* (VAR scanner: SCANNER); BEGIN TXT.close(scanner) END close; PROCEDURE init* (lower: BOOLEAN); VAR i: INTEGER; delim: ARRAY 23 OF CHAR; PROCEDURE enterkw (key: INTEGER; kw: LEXSTR); VAR id: IDENT; upper: LEXSTR; BEGIN IF LowerCase THEN id := enterid(kw); id.key := key END; upper := kw; S.UpCase(upper); id := enterid(upper); id.key := key END enterkw; BEGIN upto := FALSE; LowerCase := lower; FOR i := 0 TO 255 DO delimiters[i] := FALSE END; delim := "+-*/~&.,;|([{^=#<>:)]}"; FOR i := 0 TO LEN(delim) - 2 DO delimiters[ORD(delim[i])] := TRUE END; NEW(NewIdent); NewIdent.s := ""; NewIdent.offset := -1; NewIdent.offsetW := -1; NewIdent.key := 0; idents := NIL; enterkw(lxARRAY, "array"); enterkw(lxBEGIN, "begin"); enterkw(lxBY, "by"); enterkw(lxCASE, "case"); enterkw(lxCONST, "const"); enterkw(lxDIV, "div"); enterkw(lxDO, "do"); enterkw(lxELSE, "else"); enterkw(lxELSIF, "elsif"); enterkw(lxEND, "end"); enterkw(lxFALSE, "false"); enterkw(lxFOR, "for"); enterkw(lxIF, "if"); enterkw(lxIMPORT, "import"); enterkw(lxIN, "in"); enterkw(lxIS, "is"); enterkw(lxMOD, "mod"); enterkw(lxMODULE, "module"); enterkw(lxNIL, "nil"); enterkw(lxOF, "of"); enterkw(lxOR, "or"); enterkw(lxPOINTER, "pointer"); enterkw(lxPROCEDURE, "procedure"); enterkw(lxRECORD, "record"); enterkw(lxREPEAT, "repeat"); enterkw(lxRETURN, "return"); enterkw(lxTHEN, "then"); enterkw(lxTO, "to"); enterkw(lxTRUE, "true"); enterkw(lxTYPE, "type"); enterkw(lxUNTIL, "until"); enterkw(lxVAR, "var"); enterkw(lxWHILE, "while") END init; PROCEDURE NewDef* (str: ARRAY OF CHAR); VAR item: DEF; BEGIN NEW(item); COPY(str, item.ident); LISTS.push(def, item) END NewDef; BEGIN def := LISTS.create(NIL) END SCAN.