(* BSD 2-Clause License Copyright (c) 2018-2019, Anton Krotov All rights reserved. *) MODULE SCAN; IMPORT TXT := TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS; CONST LEXLEN = 1024; lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; lxEOF* = 8; lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; lxASSIGN* = 45; lxRANGE* = 46; lxKW = 51; lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; lxWHILE* = 83; lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; lxERROR05* = -5; lxERROR06* = -6; lxERROR07* = -7; lxERROR08* = -8; lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; lxERROR13* = -13; TYPE LEXSTR* = ARRAY LEXLEN OF CHAR; IDENT* = POINTER TO RECORD (AVL.DATA) s*: LEXSTR; offset*, offsetW*: INTEGER; key: INTEGER END; POSITION* = RECORD line*, col*: INTEGER END; LEX* = RECORD s*: LEXSTR; length*: INTEGER; sym*: INTEGER; pos*: POSITION; ident*: IDENT; string*: IDENT; value*: ARITH.VALUE; error*: INTEGER; over: BOOLEAN END; SCANNER* = TXT.TEXT; VAR idents: AVL.NODE; delimiters: ARRAY 256 OF BOOLEAN; NewIdent: IDENT; upto: BOOLEAN; PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) END nodecmp; PROCEDURE enterid* (s: LEXSTR): IDENT; VAR newnode: BOOLEAN; node: AVL.NODE; BEGIN NewIdent.s := s; idents := AVL.insert(idents, NewIdent, nodecmp, newnode, node); IF newnode THEN NEW(NewIdent); NewIdent.offset := -1; NewIdent.offsetW := -1; NewIdent.key := 0 END RETURN node.data(IDENT) END enterid; PROCEDURE putchar (VAR lex: LEX; c: CHAR); BEGIN IF lex.length < LEXLEN - 1 THEN lex.s[lex.length] := c; INC(lex.length); lex.s[lex.length] := 0X ELSE lex.over := TRUE END END putchar; PROCEDURE nextc (text: TXT.TEXT): CHAR; BEGIN TXT.next(text) RETURN text.peak END nextc; PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; BEGIN c := text.peak; ASSERT(S.letter(c)); WHILE S.letter(c) OR S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF lex.over THEN lex.sym := lxERROR06 ELSE lex.ident := enterid(lex.s); IF lex.ident.key # 0 THEN lex.sym := lex.ident.key ELSE lex.sym := lxIDENT END END END ident; PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; hex: BOOLEAN; error: INTEGER; BEGIN c := text.peak; ASSERT(S.digit(c)); error := 0; lex.sym := lxINTEGER; hex := FALSE; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; WHILE S.hexdigit(c) DO putchar(lex, c); c := nextc(text); hex := TRUE END; IF c = "H" THEN putchar(lex, c); TXT.next(text); lex.sym := lxHEX ELSIF c = "X" THEN putchar(lex, c); TXT.next(text); lex.sym := lxCHAR ELSIF c = "." THEN IF hex THEN lex.sym := lxERROR01 ELSE c := nextc(text); IF c # "." THEN putchar(lex, "."); lex.sym := lxFLOAT ELSE lex.sym := lxINTEGER; text.peak := 7FX; upto := TRUE END; WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END; IF c = "E" THEN putchar(lex, c); c := nextc(text); IF (c = "+") OR (c = "-") THEN putchar(lex, c); c := nextc(text) END; IF S.digit(c) THEN WHILE S.digit(c) DO putchar(lex, c); c := nextc(text) END ELSE lex.sym := lxERROR02 END END END ELSIF hex THEN lex.sym := lxERROR01 END; IF lex.over & (lex.sym >= 0) THEN lex.sym := lxERROR07 END; IF lex.sym = lxINTEGER THEN ARITH.iconv(lex.s, lex.value, error) ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN ARITH.hconv(lex.s, lex.value, error) ELSIF lex.sym = lxFLOAT THEN ARITH.fconv(lex.s, lex.value, error) END; CASE error OF |0: |1: lex.sym := lxERROR08 |2: lex.sym := lxERROR09 |3: lex.sym := lxERROR10 |4: lex.sym := lxERROR11 |5: lex.sym := lxERROR12 END END number; PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); VAR c: CHAR; n: INTEGER; BEGIN c := nextc(text); n := 0; WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO putchar(lex, c); c := nextc(text); INC(n) END; IF c = quot THEN TXT.next(text); IF lex.over THEN lex.sym := lxERROR05 ELSE IF n # 1 THEN lex.sym := lxSTRING ELSE lex.sym := lxCHAR; ARITH.setChar(lex.value, ORD(lex.s[0])) END END ELSE lex.sym := lxERROR03 END; IF lex.sym = lxSTRING THEN lex.string := enterid(lex.s); lex.value.typ := ARITH.tSTRING; lex.value.string := lex.string END END string; PROCEDURE comment (text: TXT.TEXT); VAR c: CHAR; cond, depth: INTEGER; BEGIN cond := 0; depth := 1; REPEAT c := text.peak; TXT.next(text); IF c = "*" THEN IF cond = 1 THEN cond := 0; INC(depth) ELSE cond := 2 END ELSIF c = ")" THEN IF cond = 2 THEN DEC(depth) END; cond := 0 ELSIF c = "(" THEN cond := 1 ELSE cond := 0 END UNTIL (depth = 0) OR text.eof END comment; PROCEDURE delimiter (text: TXT.TEXT; VAR lex: LEX; c: CHAR); BEGIN putchar(lex, c); c := nextc(text); CASE lex.s[0] OF |"+": lex.sym := lxPLUS |"-": lex.sym := lxMINUS |"*": lex.sym := lxMUL |"/": lex.sym := lxSLASH; IF c = "/" THEN lex.sym := lxCOMMENT; REPEAT TXT.next(text) UNTIL text.eol OR text.eof END |"~": lex.sym := lxNOT |"&": lex.sym := lxAND |".": lex.sym := lxPOINT; IF c = "." THEN lex.sym := lxRANGE; putchar(lex, c); TXT.next(text) END |",": lex.sym := lxCOMMA |";": lex.sym := lxSEMI |"|": lex.sym := lxBAR |"(": lex.sym := lxLROUND; IF c = "*" THEN lex.sym := lxCOMMENT; TXT.next(text); comment(text) END |"[": lex.sym := lxLSQUARE |"{": lex.sym := lxLCURLY |"^": lex.sym := lxCARET |"=": lex.sym := lxEQ |"#": lex.sym := lxNE |"<": lex.sym := lxLT; IF c = "=" THEN lex.sym := lxLE; putchar(lex, c); TXT.next(text) END |">": lex.sym := lxGT; IF c = "=" THEN lex.sym := lxGE; putchar(lex, c); TXT.next(text) END |":": lex.sym := lxCOLON; IF c = "=" THEN lex.sym := lxASSIGN; putchar(lex, c); TXT.next(text) END |")": lex.sym := lxRROUND |"]": lex.sym := lxRSQUARE |"}": lex.sym := lxRCURLY END END delimiter; PROCEDURE Next* (text: SCANNER; VAR lex: LEX); VAR c: CHAR; BEGIN REPEAT c := text.peak; WHILE S.space(c) DO c := nextc(text) END; lex.s[0] := 0X; lex.length := 0; lex.pos.line := text.line; lex.pos.col := text.col; lex.ident := NIL; lex.over := FALSE; IF S.letter(c) THEN ident(text, lex) ELSIF S.digit(c) THEN number(text, lex) ELSIF (c = '"') OR (c = "'") THEN string(text, lex, c) ELSIF delimiters[ORD(c)] THEN delimiter(text, lex, c) ELSIF c = 0X THEN lex.sym := lxEOF; IF text.eof THEN INC(lex.pos.col) END ELSIF (c = 7FX) & upto THEN upto := FALSE; lex.sym := lxRANGE; putchar(lex, "."); putchar(lex, "."); DEC(lex.pos.col); TXT.next(text) ELSE putchar(lex, c); TXT.next(text); lex.sym := lxERROR04 END; IF lex.sym < 0 THEN lex.error := -lex.sym ELSE lex.error := 0 END UNTIL lex.sym # lxCOMMENT END Next; PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; RETURN TXT.open(name) END open; PROCEDURE close* (VAR scanner: SCANNER); BEGIN TXT.close(scanner) END close; PROCEDURE init; VAR i: INTEGER; delim: ARRAY 23 OF CHAR; PROCEDURE enterkw (key: INTEGER; kw: LEXSTR); VAR id: IDENT; BEGIN id := enterid(kw); id.key := key END enterkw; BEGIN upto := FALSE; FOR i := 0 TO 255 DO delimiters[i] := FALSE END; delim := "+-*/~&.,;|([{^=#<>:)]}"; FOR i := 0 TO LEN(delim) - 2 DO delimiters[ORD(delim[i])] := TRUE END; NEW(NewIdent); NewIdent.s := ""; NewIdent.offset := -1; NewIdent.offsetW := -1; NewIdent.key := 0; idents := NIL; enterkw(lxARRAY, "ARRAY"); enterkw(lxBEGIN, "BEGIN"); enterkw(lxBY, "BY"); enterkw(lxCASE, "CASE"); enterkw(lxCONST, "CONST"); enterkw(lxDIV, "DIV"); enterkw(lxDO, "DO"); enterkw(lxELSE, "ELSE"); enterkw(lxELSIF, "ELSIF"); enterkw(lxEND, "END"); enterkw(lxFALSE, "FALSE"); enterkw(lxFOR, "FOR"); enterkw(lxIF, "IF"); enterkw(lxIMPORT, "IMPORT"); enterkw(lxIN, "IN"); enterkw(lxIS, "IS"); enterkw(lxMOD, "MOD"); enterkw(lxMODULE, "MODULE"); enterkw(lxNIL, "NIL"); enterkw(lxOF, "OF"); enterkw(lxOR, "OR"); enterkw(lxPOINTER, "POINTER"); enterkw(lxPROCEDURE, "PROCEDURE"); enterkw(lxRECORD, "RECORD"); enterkw(lxREPEAT, "REPEAT"); enterkw(lxRETURN, "RETURN"); enterkw(lxTHEN, "THEN"); enterkw(lxTO, "TO"); enterkw(lxTRUE, "TRUE"); enterkw(lxTYPE, "TYPE"); enterkw(lxUNTIL, "UNTIL"); enterkw(lxVAR, "VAR"); enterkw(lxWHILE, "WHILE") END init; BEGIN init END SCAN.