(* BSD 2-Clause License Copyright (c) 2018, 2019, Anton Krotov All rights reserved. *) MODULE SCAN; IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS; CONST LEXLEN = 1024; lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; lxEOF* = 8; lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; lxASSIGN* = 45; lxRANGE* = 46; lxKW = 51; lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; lxWHILE* = 83; lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; lxERROR13* = -13; TYPE LEXSTR* = ARRAY LEXLEN OF CHAR; IDENT* = POINTER TO RECORD (AVL.DATA) s*: LEXSTR; offset*, offsetW*: INTEGER END; POSITION* = RECORD line*, col*: INTEGER END; LEX* = RECORD s*: LEXSTR; length*: INTEGER; sym*: INTEGER; pos*: POSITION; ident*: IDENT; string*: IDENT; value*: ARITH.VALUE; error*: INTEGER; over: BOOLEAN END; SCANNER* = TXT.TEXT; KEYWORD = ARRAY 10 OF CHAR; VAR vocabulary: RECORD KW: ARRAY 33 OF KEYWORD; delimiters: ARRAY 256 OF BOOLEAN; idents: AVL.NODE; ident: IDENT END; upto: BOOLEAN; PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) END nodecmp; PROCEDURE key (VAR lex: LEX); VAR L, R, M: INTEGER; found: BOOLEAN; BEGIN L := 0; R := LEN(vocabulary.KW) - 1; found := FALSE; REPEAT M := (L + R) DIV 2; IF lex.s # vocabulary.KW[M] THEN IF lex.s > vocabulary.KW[M] THEN L := M + 1 ELSE R := M - 1 END ELSE found := TRUE; lex.sym := lxKW + M END UNTIL found OR (L > R) END key; PROCEDURE enterid* (s: LEXSTR): IDENT; VAR newnode: BOOLEAN; node: AVL.NODE; BEGIN vocabulary.ident.s := s; vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node); IF newnode THEN NEW(vocabulary.ident); vocabulary.ident.offset := -1; vocabulary.ident.offsetW := -1 END RETURN node.data(IDENT) END enterid; PROCEDURE putchar (VAR lex: LEX; c: CHAR); BEGIN IF lex.length < LEXLEN - 1 THEN lex.s[lex.length] := c; INC(lex.length); lex.s[lex.length] := 0X ELSE lex.over := TRUE END END putchar; PROCEDURE nextc (text: TXT.TEXT): CHAR; BEGIN TXT.next(text) RETURN text.peak END nextc; PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; BEGIN c := text.peak; ASSERT(S.letter(c)); WHILE S.letter(c) OR S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF lex.over THEN lex.sym := lxERROR06 ELSE lex.sym := lxIDENT; key(lex) END; IF lex.sym = lxIDENT THEN lex.ident := enterid(lex.s) END END ident; PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; hex: BOOLEAN; error: INTEGER; BEGIN c := text.peak; ASSERT(S.digit(c)); error := 0; lex.sym := lxINTEGER; hex := FALSE; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; WHILE S.hexdigit(c) DO putchar(lex, c); c := nextc(text); hex := TRUE END; IF c = "H" THEN putchar(lex, c); TXT.next(text); lex.sym := lxHEX ELSIF c = "X" THEN putchar(lex, c); TXT.next(text); lex.sym := lxCHAR ELSIF c = "." THEN IF hex THEN lex.sym := lxERROR01 ELSE c := nextc(text); IF c # "." THEN putchar(lex, "."); lex.sym := lxFLOAT ELSE lex.sym := lxINTEGER; text.peak := 7FX; upto := TRUE END; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF c = "E" THEN putchar(lex, c); c := nextc(text); IF (c = "+") OR (c = "-") THEN putchar(lex, c); c := nextc(text) END; IF S.digit(c) THEN WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END ELSE lex.sym := lxERROR02 END END END ELSIF hex THEN lex.sym := lxERROR01 END; IF lex.over & (lex.sym >= 0) THEN lex.sym := lxERROR07 END; IF lex.sym = lxINTEGER THEN ARITH.iconv(lex.s, lex.value, error) ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN ARITH.hconv(lex.s, lex.value, error) ELSIF lex.sym = lxFLOAT THEN ARITH.fconv(lex.s, lex.value, error) END; CASE error OF |0: |1: lex.sym := lxERROR08 |2: lex.sym := lxERROR09 |3: lex.sym := lxERROR10 |4: lex.sym := lxERROR11 |5: lex.sym := lxERROR12 END END number; PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); VAR c: CHAR; n: INTEGER; BEGIN c := nextc(text); n := 0; WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO putchar(lex, c); c := nextc(text); INC(n) END; IF c = quot THEN TXT.next(text); IF lex.over THEN lex.sym := lxERROR05 ELSE IF n # 1 THEN lex.sym := lxSTRING ELSE lex.sym := lxCHAR; ARITH.setChar(lex.value, ORD(lex.s[0])) END END ELSE lex.sym := lxERROR03 END; IF lex.sym = lxSTRING THEN lex.string := enterid(lex.s); lex.value.typ := ARITH.tSTRING; lex.value.string := lex.string END END string; PROCEDURE comment (text: TXT.TEXT); VAR c: CHAR; cond, depth: INTEGER; BEGIN cond := 0; depth := 1; REPEAT c := text.peak; TXT.next(text); IF c = "*" THEN IF cond = 1 THEN cond := 0; INC(depth) ELSE cond := 2 END ELSIF c = ")" THEN IF cond = 2 THEN DEC(depth) END; cond := 0 ELSIF c = "(" THEN cond := 1 ELSE cond := 0 END UNTIL (depth = 0) OR text.eof END comment; PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); BEGIN putchar(lex, c); c := nextc(text); CASE lex.s[0] OF |"+": lex.sym := lxPLUS |"-": lex.sym := lxMINUS |"*": lex.sym := lxMUL |"/": lex.sym := lxSLASH; IF c = "/" THEN lex.sym := lxCOMMENT; REPEAT TXT.next(text) UNTIL text.eol OR text.eof END |"~": lex.sym := lxNOT |"&": lex.sym := lxAND |".": lex.sym := lxPOINT; IF c = "." THEN lex.sym := lxRANGE; putchar(lex, c); TXT.next(text) END |",": lex.sym := lxCOMMA |";": lex.sym := lxSEMI |"|": lex.sym := lxBAR |"(": lex.sym := lxLROUND; IF c = "*" THEN lex.sym := lxCOMMENT; TXT.next(text); comment(text) END |"[": lex.sym := lxLSQUARE |"{": lex.sym := lxLCURLY |"^": lex.sym := lxCARET |"=": lex.sym := lxEQ |"#": lex.sym := lxNE |"<": lex.sym := lxLT; IF c = "=" THEN lex.sym := lxLE; putchar(lex, c); TXT.next(text) END |">": lex.sym := lxGT; IF c = "=" THEN lex.sym := lxGE; putchar(lex, c); TXT.next(text) END |":": lex.sym := lxCOLON; IF c = "=" THEN lex.sym := lxASSIGN; putchar(lex, c); TXT.next(text) END |")": lex.sym := lxRROUND |"]": lex.sym := lxRSQUARE |"}": lex.sym := lxRCURLY END END delimiter; PROCEDURE Next* (text: SCANNER; VAR lex: LEX); VAR c: CHAR; BEGIN REPEAT c := text.peak; WHILE S.space(c) DO c := nextc(text) END; lex.s[0] := 0X; lex.length := 0; lex.pos.line := text.line; lex.pos.col := text.col; lex.ident := NIL; lex.over := FALSE; IF S.letter(c) THEN ident(text, lex) ELSIF S.digit(c) THEN number(text, lex) ELSIF (c = '"') OR (c = "'") THEN string(text, lex, c) ELSIF vocabulary.delimiters[ORD(c)] THEN delimiter(text, lex, c) ELSIF c = 0X THEN lex.sym := lxEOF; IF text.eof THEN INC(lex.pos.col) END ELSIF (c = 7FX) & upto THEN upto := FALSE; lex.sym := lxRANGE; putchar(lex, "."); putchar(lex, "."); DEC(lex.pos.col); TXT.next(text) ELSE putchar(lex, c); TXT.next(text); lex.sym := lxERROR04 END; IF lex.sym < 0 THEN lex.error := -lex.sym ELSE lex.error := 0 END UNTIL lex.sym # lxCOMMENT END Next; PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; RETURN TXT.open(name) END open; PROCEDURE close* (VAR scanner: SCANNER); BEGIN TXT.close(scanner) END close; PROCEDURE init; VAR i: INTEGER; delim: ARRAY 23 OF CHAR; PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD); BEGIN vocabulary.KW[i] := kw; INC(i) END enterkw; BEGIN upto := FALSE; FOR i := 0 TO 255 DO vocabulary.delimiters[i] := FALSE END; delim := "+-*/~&.,;|([{^=#<>:)]}"; FOR i := 0 TO LEN(delim) - 2 DO vocabulary.delimiters[ORD(delim[i])] := TRUE END; i := 0; enterkw(i, "ARRAY"); enterkw(i, "BEGIN"); enterkw(i, "BY"); enterkw(i, "CASE"); enterkw(i, "CONST"); enterkw(i, "DIV"); enterkw(i, "DO"); enterkw(i, "ELSE"); enterkw(i, "ELSIF"); enterkw(i, "END"); enterkw(i, "FALSE"); enterkw(i, "FOR"); enterkw(i, "IF"); enterkw(i, "IMPORT"); enterkw(i, "IN"); enterkw(i, "IS"); enterkw(i, "MOD"); enterkw(i, "MODULE"); enterkw(i, "NIL"); enterkw(i, "OF"); enterkw(i, "OR"); enterkw(i, "POINTER"); enterkw(i, "PROCEDURE"); enterkw(i, "RECORD"); enterkw(i, "REPEAT"); enterkw(i, "RETURN"); enterkw(i, "THEN"); enterkw(i, "TO"); enterkw(i, "TRUE"); enterkw(i, "TYPE"); enterkw(i, "UNTIL"); enterkw(i, "VAR"); enterkw(i, "WHILE"); NEW(vocabulary.ident); vocabulary.ident.s := ""; vocabulary.ident.offset := -1; vocabulary.ident.offsetW := -1; vocabulary.idents := NIL END init; BEGIN init END SCAN.