(* BSD 2-Clause License Copyright (c) 2018, Anton Krotov All rights reserved. *) MODULE SCAN; IMPORT TEXTDRV, AVL := AVLTREES, ARITH, S := STRINGS, C := COLLECTIONS; CONST LEXLEN = 1024; lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; lxEOF* = 8; lxKW = 101; lxARRAY* = 101; lxBEGIN* = 102; lxBY* = 103; lxCASE* = 104; lxCONST* = 105; lxDIV* = 106; lxDO* = 107; lxELSE* = 108; lxELSIF* = 109; lxEND* = 110; lxFALSE* = 111; lxFOR* = 112; lxIF* = 113; lxIMPORT* = 114; lxIN* = 115; lxIS* = 116; lxMOD* = 117; lxMODULE* = 118; lxNIL* = 119; lxOF* = 120; lxOR* = 121; lxPOINTER* = 122; lxPROCEDURE* = 123; lxRECORD* = 124; lxREPEAT* = 125; lxRETURN* = 126; lxTHEN* = 127; lxTO* = 128; lxTRUE* = 129; lxTYPE* = 130; lxUNTIL* = 131; lxVAR* = 132; lxWHILE* = 133; lxPLUS* = 201; lxMINUS* = 202; lxMUL* = 203; lxSLASH* = 204; lxNOT* = 205; lxAND* = 206; lxPOINT* = 207; lxCOMMA* = 208; lxSEMI* = 209; lxBAR* = 210; lxLROUND* = 211; lxLSQUARE* = 212; lxLCURLY* = 213; lxCARET* = 214; lxEQ* = 215; lxNE* = 216; lxLT* = 217; lxGT* = 218; lxCOLON* = 219; lxRROUND* = 220; lxRSQUARE* = 221; lxRCURLY* = 222; lxLE* = 223; lxGE* = 224; lxASSIGN* = 225; lxRANGE* = 226; lxERROR01 = -1; lxERROR02 = -2; lxERROR03 = -3; lxERROR04 = -4; lxERROR05 = -5; lxERROR06 = -6; lxERROR07 = -7; lxERROR08 = -8; lxERROR09 = -9; lxERROR10 = -10; lxERROR11 = -11; lxERROR12 = -12; TYPE LEXSTR* = ARRAY LEXLEN OF CHAR; IDENT* = POINTER TO RECORD (AVL.DATA) s*: LEXSTR; offset*, offsetW*: INTEGER END; POSITION* = RECORD line*, col*: INTEGER END; LEX* = RECORD s*: LEXSTR; length*: INTEGER; sym*: INTEGER; pos*: POSITION; ident*: IDENT; string*: IDENT; value*: ARITH.VALUE; error*: INTEGER; over: BOOLEAN END; SCANNER* = POINTER TO RECORD (C.ITEM) text: TEXTDRV.TEXT; range: BOOLEAN END; KEYWORD = ARRAY 10 OF CHAR; VAR vocabulary: RECORD KW: ARRAY 33 OF KEYWORD; delimiters: ARRAY 256 OF BOOLEAN; idents: AVL.NODE; ident: IDENT END; scanners: C.COLLECTION; PROCEDURE nodecmp (a, b: AVL.DATA): INTEGER; RETURN ORD(a(IDENT).s > b(IDENT).s) - ORD(a(IDENT).s < b(IDENT).s) END nodecmp; PROCEDURE key (VAR lex: LEX); VAR L, R, M: INTEGER; BEGIN L := 0; R := LEN(vocabulary.KW) - 1; M := (L + R) DIV 2; WHILE L # M DO IF lex.s > vocabulary.KW[M] THEN L := M; M := (L + R) DIV 2 ELSIF lex.s < vocabulary.KW[M] THEN R := M; M := (L + R) DIV 2 ELSE lex.sym := lxKW + M; L := M; R := M END END; IF L # R THEN IF lex.s = vocabulary.KW[L] THEN lex.sym := lxKW + L END; IF lex.s = vocabulary.KW[R] THEN lex.sym := lxKW + R END END END key; PROCEDURE enterid* (s: LEXSTR): IDENT; VAR newnode: BOOLEAN; node: AVL.NODE; BEGIN vocabulary.ident.s := s; vocabulary.idents := AVL.insert(vocabulary.idents, vocabulary.ident, nodecmp, newnode, node); IF newnode THEN NEW(vocabulary.ident); vocabulary.ident.offset := -1; vocabulary.ident.offsetW := -1 END RETURN node.data(IDENT) END enterid; PROCEDURE putchar (VAR lex: LEX; c: CHAR); BEGIN IF lex.length < LEXLEN - 1 THEN lex.s[lex.length] := c; INC(lex.length); lex.s[lex.length] := 0X ELSE lex.over := TRUE END END putchar; PROCEDURE ident (text: TEXTDRV.TEXT; VAR lex: LEX); VAR c: CHAR; BEGIN c := text.peak(text); ASSERT(S.letter(c)); WHILE S.letter(c) OR S.digit(c) DO putchar(lex, c); text.nextc(text); c := text.peak(text) END; IF lex.over THEN lex.sym := lxERROR06 ELSE lex.sym := lxIDENT; key(lex) END; IF lex.sym = lxIDENT THEN lex.ident := enterid(lex.s) END END ident; PROCEDURE number (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN); VAR c: CHAR; hex: BOOLEAN; error: INTEGER; BEGIN c := text.peak(text); ASSERT(S.digit(c)); error := 0; range := FALSE; lex.sym := lxINTEGER; hex := FALSE; WHILE S.digit(c) DO putchar(lex, c); text.nextc(text); c := text.peak(text) END; WHILE S.hexdigit(c) DO putchar(lex, c); text.nextc(text); c := text.peak(text); hex := TRUE END; IF c = "H" THEN putchar(lex, c); text.nextc(text); lex.sym := lxHEX ELSIF c = "X" THEN putchar(lex, c); text.nextc(text); lex.sym := lxCHAR ELSIF c = "." THEN IF hex THEN lex.sym := lxERROR01 ELSE text.nextc(text); c := text.peak(text); IF c # "." THEN putchar(lex, "."); lex.sym := lxFLOAT ELSE lex.sym := lxINTEGER; range := TRUE END; WHILE S.digit(c) DO putchar(lex, c); text.nextc(text); c := text.peak(text) END; IF c = "E" THEN putchar(lex, c); text.nextc(text); c := text.peak(text); IF (c = "+") OR (c = "-") THEN putchar(lex, c); text.nextc(text); c := text.peak(text) END; IF S.digit(c) THEN WHILE S.digit(c) DO putchar(lex, c); text.nextc(text); c := text.peak(text) END ELSE lex.sym := lxERROR02 END END END ELSE IF hex THEN lex.sym := lxERROR01 END END; IF lex.over & (lex.sym >= 0) THEN lex.sym := lxERROR07 END; IF lex.sym = lxINTEGER THEN ARITH.iconv(lex.s, lex.value, error) ELSIF (lex.sym = lxHEX) OR (lex.sym = lxCHAR) THEN ARITH.hconv(lex.s, lex.value, error) ELSIF lex.sym = lxFLOAT THEN ARITH.fconv(lex.s, lex.value, error) END; CASE error OF |0: |1: lex.sym := lxERROR08 |2: lex.sym := lxERROR09 |3: lex.sym := lxERROR10 |4: lex.sym := lxERROR11 |5: lex.sym := lxERROR12 END END number; PROCEDURE string (text: TEXTDRV.TEXT; VAR lex: LEX); VAR c, c1: CHAR; n: INTEGER; quot: CHAR; BEGIN quot := text.peak(text); ASSERT((quot = '"') OR (quot = "'")); text.nextc(text); c := text.peak(text); c1 := c; n := 0; WHILE (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO putchar(lex, c); text.nextc(text); c := text.peak(text); INC(n) END; IF c = quot THEN text.nextc(text); IF lex.over THEN lex.sym := lxERROR05 ELSE IF n # 1 THEN lex.sym := lxSTRING ELSE lex.sym := lxCHAR; ARITH.setChar(lex.value, ORD(c1)) END END ELSE lex.sym := lxERROR03 END; IF lex.sym = lxSTRING THEN lex.string := enterid(lex.s); lex.value.typ := ARITH.tSTRING; lex.value.string := lex.string END END string; PROCEDURE comment (text: TEXTDRV.TEXT); VAR c: CHAR; cond, depth: INTEGER; BEGIN cond := 0; depth := 1; REPEAT c := text.peak(text); text.nextc(text); IF c = "*" THEN IF cond = 1 THEN cond := 0; INC(depth) ELSE cond := 2 END ELSIF c = ")" THEN IF cond = 2 THEN DEC(depth) END; cond := 0 ELSIF c = "(" THEN cond := 1 ELSE cond := 0 END UNTIL (depth = 0) OR text.eof END comment; PROCEDURE delimiter (text: TEXTDRV.TEXT; VAR lex: LEX; VAR range: BOOLEAN); VAR c: CHAR; BEGIN c := text.peak(text); IF range THEN ASSERT(c = ".") END; putchar(lex, c); text.nextc(text); CASE c OF |"+": lex.sym := lxPLUS |"-": lex.sym := lxMINUS |"*": lex.sym := lxMUL |"/": lex.sym := lxSLASH; IF text.peak(text) = "/" THEN lex.sym := lxCOMMENT; REPEAT text.nextc(text) UNTIL text.eol OR text.eof END |"~": lex.sym := lxNOT |"&": lex.sym := lxAND |".": IF range THEN putchar(lex, "."); lex.sym := lxRANGE; range := FALSE; DEC(lex.pos.col) ELSE lex.sym := lxPOINT; c := text.peak(text); IF c = "." THEN lex.sym := lxRANGE; putchar(lex, c); text.nextc(text) END END |",": lex.sym := lxCOMMA |";": lex.sym := lxSEMI |"|": lex.sym := lxBAR |"(": lex.sym := lxLROUND; c := text.peak(text); IF c = "*" THEN lex.sym := lxCOMMENT; putchar(lex, c); text.nextc(text); comment(text) END |"[": lex.sym := lxLSQUARE |"{": lex.sym := lxLCURLY |"^": lex.sym := lxCARET |"=": lex.sym := lxEQ |"#": lex.sym := lxNE |"<": lex.sym := lxLT; c := text.peak(text); IF c = "=" THEN lex.sym := lxLE; putchar(lex, c); text.nextc(text) END |">": lex.sym := lxGT; c := text.peak(text); IF c = "=" THEN lex.sym := lxGE; putchar(lex, c); text.nextc(text) END |":": lex.sym := lxCOLON; c := text.peak(text); IF c = "=" THEN lex.sym := lxASSIGN; putchar(lex, c); text.nextc(text) END |")": lex.sym := lxRROUND |"]": lex.sym := lxRSQUARE |"}": lex.sym := lxRCURLY END END delimiter; PROCEDURE Next* (scanner: SCANNER; VAR lex: LEX); VAR c: CHAR; text: TEXTDRV.TEXT; BEGIN text := scanner.text; REPEAT c := text.peak(text); WHILE S.space(c) DO text.nextc(text); c := text.peak(text) END; lex.s[0] := 0X; lex.length := 0; lex.sym := lxUNDEF; lex.pos.line := text.line; lex.pos.col := text.col; lex.ident := NIL; lex.over := FALSE; IF S.letter(c) THEN ident(text, lex) ELSIF S.digit(c) THEN number(text, lex, scanner.range) ELSIF (c = '"') OR (c = "'") THEN string(text, lex) ELSIF vocabulary.delimiters[ORD(c)] THEN delimiter(text, lex, scanner.range) ELSIF c = 0X THEN lex.sym := lxEOF; IF text.eof THEN INC(lex.pos.col) END ELSE putchar(lex, c); text.nextc(text); lex.sym := lxERROR04 END; IF lex.sym < 0 THEN lex.error := -lex.sym ELSE lex.error := 0 END UNTIL lex.sym # lxCOMMENT END Next; PROCEDURE NewScanner (): SCANNER; VAR scan: SCANNER; citem: C.ITEM; BEGIN citem := C.pop(scanners); IF citem = NIL THEN NEW(scan) ELSE scan := citem(SCANNER) END RETURN scan END NewScanner; PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; VAR scanner: SCANNER; text: TEXTDRV.TEXT; BEGIN text := TEXTDRV.create(); IF text.open(text, name) THEN scanner := NewScanner(); scanner.text := text; scanner.range := FALSE ELSE scanner := NIL; TEXTDRV.destroy(text) END RETURN scanner END open; PROCEDURE close* (VAR scanner: SCANNER); BEGIN IF scanner # NIL THEN IF scanner.text # NIL THEN TEXTDRV.destroy(scanner.text) END; C.push(scanners, scanner); scanner := NIL END END close; PROCEDURE init; VAR i: INTEGER; delim: ARRAY 23 OF CHAR; PROCEDURE enterkw (VAR i: INTEGER; kw: KEYWORD); BEGIN vocabulary.KW[i] := kw; INC(i) END enterkw; BEGIN scanners := C.create(); FOR i := 0 TO 255 DO vocabulary.delimiters[i] := FALSE END; delim := "+-*/~&.,;|([{^=#<>:)]}"; FOR i := 0 TO LEN(delim) - 2 DO vocabulary.delimiters[ORD(delim[i])] := TRUE END; i := 0; enterkw(i, "ARRAY"); enterkw(i, "BEGIN"); enterkw(i, "BY"); enterkw(i, "CASE"); enterkw(i, "CONST"); enterkw(i, "DIV"); enterkw(i, "DO"); enterkw(i, "ELSE"); enterkw(i, "ELSIF"); enterkw(i, "END"); enterkw(i, "FALSE"); enterkw(i, "FOR"); enterkw(i, "IF"); enterkw(i, "IMPORT"); enterkw(i, "IN"); enterkw(i, "IS"); enterkw(i, "MOD"); enterkw(i, "MODULE"); enterkw(i, "NIL"); enterkw(i, "OF"); enterkw(i, "OR"); enterkw(i, "POINTER"); enterkw(i, "PROCEDURE"); enterkw(i, "RECORD"); enterkw(i, "REPEAT"); enterkw(i, "RETURN"); enterkw(i, "THEN"); enterkw(i, "TO"); enterkw(i, "TRUE"); enterkw(i, "TYPE"); enterkw(i, "UNTIL"); enterkw(i, "VAR"); enterkw(i, "WHILE"); NEW(vocabulary.ident); vocabulary.ident.s := ""; vocabulary.ident.offset := -1; vocabulary.ident.offsetW := -1; vocabulary.idents := NIL END init; BEGIN init END SCAN.