(* BSD 2-Clause License Copyright (c) 2018-2021, Anton Krotov All rights reserved. *) MODULE SCAN; IMPORT TXT := TEXTDRV, ARITH, S := STRINGS, ERRORS, LISTS; CONST NUMLEN = 256; IDLEN = 256; TEXTLEN = 512; lxUNDEF* = 0; lxIDENT* = 1; lxINTEGER* = 2; lxHEX* = 3; lxCHAR* = 4; lxFLOAT* = 5; lxSTRING* = 6; lxCOMMENT* = 7; lxEOF* = 8; lxPLUS* = 21; lxMINUS* = 22; lxMUL* = 23; lxSLASH* = 24; lxNOT* = 25; lxAND* = 26; lxPOINT* = 27; lxCOMMA* = 28; lxSEMI* = 29; lxBAR* = 30; lxLROUND* = 31; lxLSQUARE* = 32; lxLCURLY* = 33; lxCARET* = 34; lxEQ* = 35; lxNE* = 36; lxLT* = 37; lxGT* = 38; lxCOLON* = 39; lxRROUND* = 40; lxRSQUARE* = 41; lxRCURLY* = 42; lxLE* = 43; lxGE* = 44; lxASSIGN* = 45; lxRANGE* = 46; lxKW = 51; lxARRAY* = 51; lxBEGIN* = 52; lxBY* = 53; lxCASE* = 54; lxCONST* = 55; lxDIV* = 56; lxDO* = 57; lxELSE* = 58; lxELSIF* = 59; lxEND* = 60; lxFALSE* = 61; lxFOR* = 62; lxIF* = 63; lxIMPORT* = 64; lxIN* = 65; lxIS* = 66; lxMOD* = 67; lxMODULE* = 68; lxNIL* = 69; lxOF* = 70; lxOR* = 71; lxPOINTER* = 72; lxPROCEDURE* = 73; lxRECORD* = 74; lxREPEAT* = 75; lxRETURN* = 76; lxTHEN* = 77; lxTO* = 78; lxTRUE* = 79; lxTYPE* = 80; lxUNTIL* = 81; lxVAR* = 82; lxWHILE* = 83; lxERROR01* = -1; lxERROR02* = -2; lxERROR03* = -3; lxERROR04* = -4; lxERROR05* = -5; (*lxERROR06* = -6;*) lxERROR07* = -7; lxERROR08* = -8; lxERROR09* = -9; lxERROR10* = -10; lxERROR11* = -11; lxERROR12* = -12; lxERROR13* = -13; TYPE TEXTSTR* = ARRAY TEXTLEN OF CHAR; IDSTR* = ARRAY IDLEN OF CHAR; DEF = POINTER TO RECORD (LISTS.ITEM) ident: IDSTR END; STRING* = POINTER TO RECORD (LISTS.ITEM) s*: TEXTSTR; offset*, offsetW*, hash: INTEGER END; IDENT* = RECORD s*: IDSTR; hash*: INTEGER END; POSITION* = RECORD line*, col*: INTEGER END; LEX* = RECORD sym*: INTEGER; pos*: POSITION; ident*: IDENT; string*: STRING; value*: ARITH.VALUE; error*: INTEGER END; SCANNER* = TXT.TEXT; KEYWORD = ARRAY 10 OF CHAR; VAR delimiters: ARRAY 256 OF BOOLEAN; upto, LowerCase, _if: BOOLEAN; strings, def: LISTS.LIST; KW: ARRAY 33 OF RECORD upper, lower: KEYWORD; uhash, lhash: INTEGER END; PROCEDURE enterKW (s: KEYWORD; idx: INTEGER); BEGIN KW[idx].lower := s; KW[idx].upper := s; S.UpCase(KW[idx].upper); KW[idx].uhash := S.HashStr(KW[idx].upper); KW[idx].lhash := S.HashStr(KW[idx].lower); END enterKW; PROCEDURE checkKW (ident: IDENT): INTEGER; VAR i, res: INTEGER; BEGIN res := lxIDENT; i := 0; WHILE i < LEN(KW) DO IF (KW[i].uhash = ident.hash) & (KW[i].upper = ident.s) OR LowerCase & (KW[i].lhash = ident.hash) & (KW[i].lower = ident.s) THEN res := i + lxKW; i := LEN(KW) END; INC(i) END RETURN res END checkKW; PROCEDURE enterStr* (s: TEXTSTR): STRING; VAR str, res: STRING; hash: INTEGER; BEGIN hash := S.HashStr(s); str := strings.first(STRING); res := NIL; WHILE str # NIL DO IF (str.hash = hash) & (str.s = s) THEN res := str; str := NIL ELSE str := str.next(STRING) END END; IF res = NIL THEN NEW(res); res.s := s; res.offset := -1; res.offsetW := -1; res.hash := hash; LISTS.push(strings, res) END RETURN res END enterStr; PROCEDURE nextc (text: TXT.TEXT): CHAR; BEGIN TXT.next(text) RETURN text.peak END nextc; PROCEDURE setIdent* (VAR ident: IDENT; s: IDSTR); BEGIN ident.s := s; ident.hash := S.HashStr(s) END setIdent; PROCEDURE ident (text: TXT.TEXT; VAR lex: LEX); VAR c: CHAR; i: INTEGER; BEGIN c := text.peak; ASSERT(S.letter(c)); i := 0; WHILE (i < IDLEN - 1) & (S.letter(c) OR S.digit(c)) DO lex.ident.s[i] := c; INC(i); c := nextc(text) END; lex.ident.s[i] := 0X; lex.ident.hash := S.HashStr(lex.ident.s); lex.sym := checkKW(lex.ident); IF S.letter(c) OR S.digit(c) THEN ERRORS.WarningMsg(lex.pos.line, lex.pos.col, 2); WHILE S.letter(c) OR S.digit(c) DO c := nextc(text) END END END ident; PROCEDURE number (text: TXT.TEXT; VAR lex: LEX); TYPE NUMSTR = ARRAY NUMLEN OF CHAR; VAR c: CHAR; hex: BOOLEAN; error, sym, i: INTEGER; num: NUMSTR; PROCEDURE push (VAR num: NUMSTR; VAR i: INTEGER; c: CHAR); BEGIN IF i < NUMLEN - 1 THEN num[i] := c; INC(i) END END push; BEGIN c := text.peak; ASSERT(S.digit(c)); i := 0; error := 0; sym := lxINTEGER; hex := FALSE; WHILE S.digit(c) DO push(num, i, c); c := nextc(text) END; WHILE S.hexdigit(c) OR LowerCase & ("a" <= c) & (c <= "f") DO S.cap(c); push(num, i, c); c := nextc(text); hex := TRUE END; IF (c = "H") OR LowerCase & (c = "h") THEN push(num, i, c); TXT.next(text); sym := lxHEX ELSIF (c = "X") OR LowerCase & (c = "x") THEN push(num, i, c); TXT.next(text); sym := lxCHAR ELSIF c = "." THEN IF hex THEN sym := lxERROR01 ELSE c := nextc(text); IF c # "." THEN push(num, i, "."); sym := lxFLOAT ELSE sym := lxINTEGER; text.peak := 7FX; upto := TRUE END; WHILE S.digit(c) DO push(num, i, c); c := nextc(text) END; IF (c = "E") OR LowerCase & (c = "e") THEN push(num, i, c); c := nextc(text); IF (c = "+") OR (c = "-") THEN push(num, i, c); c := nextc(text) END; IF S.digit(c) THEN WHILE S.digit(c) DO push(num, i, c); c := nextc(text) END ELSE sym := lxERROR02 END END END ELSIF hex THEN sym := lxERROR01 END; IF (i = NUMLEN - 1) & (sym >= 0) THEN sym := lxERROR07 END; num[i] := 0X; IF sym = lxINTEGER THEN ARITH.iconv(num, lex.value, error) ELSIF (sym = lxHEX) OR (sym = lxCHAR) THEN ARITH.hconv(num, lex.value, error) ELSIF sym = lxFLOAT THEN ARITH.fconv(num, lex.value, error) END; CASE error OF |0: |1: sym := lxERROR08 |2: sym := lxERROR09 |3: sym := lxERROR10 |4: sym := lxERROR11 |5: sym := lxERROR12 END; lex.sym := sym END number; PROCEDURE string (text: TXT.TEXT; VAR lex: LEX; quot: CHAR); VAR c: CHAR; i: INTEGER; str: TEXTSTR; BEGIN c := nextc(text); i := 0; WHILE (i < LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof DO str[i] := c; c := nextc(text); INC(i) END; str[i] := 0X; IF (i = LEN(str) - 1) & (c # quot) & (c # 0X) & ~text.eol & ~text.eof THEN lex.sym := lxERROR05 END; IF c = quot THEN TXT.next(text); IF i # 1 THEN lex.sym := lxSTRING ELSE lex.sym := lxCHAR; ARITH.setChar(lex.value, ORD(str[0])) END ELSIF lex.sym # lxERROR05 THEN lex.sym := lxERROR03 END; IF lex.sym = lxSTRING THEN lex.string := enterStr(str); lex.value.typ := ARITH.tSTRING; lex.value.string := lex.string END END string; PROCEDURE comment (text: TXT.TEXT); VAR c: CHAR; cond, depth: INTEGER; BEGIN cond := 0; depth := 1; REPEAT c := text.peak; TXT.next(text); IF c = "*" THEN IF cond = 1 THEN cond := 0; INC(depth) ELSE cond := 2 END ELSIF c = ")" THEN IF cond = 2 THEN DEC(depth) END; cond := 0 ELSIF c = "(" THEN cond := 1 ELSE cond := 0 END UNTIL (depth = 0) OR text.eof END comment; PROCEDURE delimiter (text: TXT.TEXT; c: CHAR): INTEGER; VAR sym: INTEGER; c0: CHAR; BEGIN c0 := c; c := nextc(text); CASE c0 OF |"+": sym := lxPLUS |"-": sym := lxMINUS |"*": sym := lxMUL |"/": sym := lxSLASH; IF c = "/" THEN sym := lxCOMMENT; REPEAT TXT.next(text) UNTIL text.eol OR text.eof END |"~": sym := lxNOT |"&": sym := lxAND |".": sym := lxPOINT; IF c = "." THEN sym := lxRANGE; TXT.next(text) END |",": sym := lxCOMMA |";": sym := lxSEMI |"|": sym := lxBAR |"(": sym := lxLROUND; IF c = "*" THEN sym := lxCOMMENT; TXT.next(text); comment(text) END |"[": sym := lxLSQUARE |"{": sym := lxLCURLY |"^": sym := lxCARET |"=": sym := lxEQ |"#": sym := lxNE |"<": sym := lxLT; IF c = "=" THEN sym := lxLE; TXT.next(text) END |">": sym := lxGT; IF c = "=" THEN sym := lxGE; TXT.next(text) END |":": sym := lxCOLON; IF c = "=" THEN sym := lxASSIGN; TXT.next(text) END |")": sym := lxRROUND |"]": sym := lxRSQUARE |"}": sym := lxRCURLY END RETURN sym END delimiter; PROCEDURE Next* (text: SCANNER; VAR lex: LEX); VAR c: CHAR; PROCEDURE check (cond: BOOLEAN; text: SCANNER; lex: LEX; errno: INTEGER); BEGIN IF ~cond THEN ERRORS.ErrorMsg(text.fname, lex.pos.line, lex.pos.col, errno) END END check; PROCEDURE IsDef (str: ARRAY OF CHAR): BOOLEAN; VAR cur: DEF; BEGIN cur := def.first(DEF); WHILE (cur # NIL) & (cur.ident # str) DO cur := cur.next(DEF) END RETURN cur # NIL END IsDef; PROCEDURE Skip (text: SCANNER); VAR i: INTEGER; BEGIN i := 0; WHILE (i <= text.ifc) & ~text._skip[i] DO INC(i) END; text.skip := i <= text.ifc END Skip; PROCEDURE prep_if (text: SCANNER; VAR lex: LEX); VAR skip: BOOLEAN; BEGIN INC(text.ifc); text._elsif[text.ifc] := lex.sym = lxELSIF; IF lex.sym = lxIF THEN INC(text.elsec); text._else[text.elsec] := FALSE END; _if := TRUE; skip := TRUE; text.skip := FALSE; Next(text, lex); check(lex.sym = lxLROUND, text, lex, 64); Next(text, lex); check(lex.sym = lxIDENT, text, lex, 22); REPEAT IF IsDef(lex.ident.s) THEN skip := FALSE END; Next(text, lex); IF lex.sym = lxBAR THEN Next(text, lex); check(lex.sym = lxIDENT, text, lex, 22) ELSE check(lex.sym = lxRROUND, text, lex, 33) END UNTIL lex.sym = lxRROUND; _if := FALSE; text._skip[text.ifc] := skip; Skip(text); Next(text, lex) END prep_if; PROCEDURE prep_end (text: SCANNER; VAR lex: LEX); BEGIN check(text.ifc > 0, text, lex, 118); IF lex.sym = lxEND THEN WHILE text._elsif[text.ifc] DO DEC(text.ifc) END; DEC(text.ifc); DEC(text.elsec) ELSIF (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN check(~text._else[text.elsec], text, lex, 118); text._skip[text.ifc] := ~text._skip[text.ifc]; text._else[text.elsec] := lex.sym = lxELSE END; Skip(text); IF lex.sym = lxELSIF THEN prep_if(text, lex) ELSE Next(text, lex) END END prep_end; BEGIN REPEAT c := text.peak; WHILE S.space(c) DO c := nextc(text) END; lex.pos.line := text.line; lex.pos.col := text.col; IF S.letter(c) THEN ident(text, lex) ELSIF S.digit(c) THEN number(text, lex) ELSIF (c = '"') OR (c = "'") THEN string(text, lex, c) ELSIF delimiters[ORD(c)] THEN lex.sym := delimiter(text, c) ELSIF c = "$" THEN IF S.letter(nextc(text)) THEN ident(text, lex); IF lex.sym = lxIF THEN IF ~_if THEN prep_if(text, lex) END ELSIF (lex.sym = lxEND) OR (lex.sym = lxELSE) OR (lex.sym = lxELSIF) THEN IF ~_if THEN prep_end(text, lex) END ELSE check(FALSE, text, lex, 119) END ELSE check(FALSE, text, lex, 119) END ELSIF c = 0X THEN lex.sym := lxEOF; text.skip := FALSE; IF text.eof THEN INC(lex.pos.col) END ELSIF (c = 7FX) & upto THEN upto := FALSE; lex.sym := lxRANGE; DEC(lex.pos.col); TXT.next(text) ELSE TXT.next(text); lex.sym := lxERROR04 END; IF lex.sym < 0 THEN lex.error := -lex.sym ELSE lex.error := 0 END UNTIL (lex.sym # lxCOMMENT) & ~text.skip END Next; PROCEDURE open* (name: ARRAY OF CHAR): SCANNER; RETURN TXT.open(name) END open; PROCEDURE close* (VAR scanner: SCANNER); BEGIN TXT.close(scanner) END close; PROCEDURE init* (lower: BOOLEAN); VAR i: INTEGER; delim: ARRAY 23 OF CHAR; BEGIN upto := FALSE; LowerCase := lower; FOR i := 0 TO 255 DO delimiters[i] := FALSE END; delim := "+-*/~&.,;|([{^=#<>:)]}"; FOR i := 0 TO LEN(delim) - 2 DO delimiters[ORD(delim[i])] := TRUE END; enterKW("array", 0); enterKW("begin", 1); enterKW("by", 2); enterKW("case", 3); enterKW("const", 4); enterKW("div", 5); enterKW("do", 6); enterKW("else", 7); enterKW("elsif", 8); enterKW("end", 9); enterKW("false", 10); enterKW("for", 11); enterKW("if", 12); enterKW("import", 13); enterKW("in", 14); enterKW("is", 15); enterKW("mod", 16); enterKW("module", 17); enterKW("nil", 18); enterKW("of", 19); enterKW("or", 20); enterKW("pointer", 21); enterKW("procedure", 22); enterKW("record", 23); enterKW("repeat", 24); enterKW("return", 25); enterKW("then", 26); enterKW("to", 27); enterKW("true", 28); enterKW("type", 29); enterKW("until", 30); enterKW("var", 31); enterKW("while", 32) END init; PROCEDURE NewDef* (str: ARRAY OF CHAR); VAR item: DEF; BEGIN NEW(item); COPY(str, item.ident); LISTS.push(def, item) END NewDef; BEGIN def := LISTS.create(NIL); strings := LISTS.create(NIL) END SCAN.