MODULE FoxScanner;
IMPORT Streams, Strings, Diagnostics, Basic := FoxBasic, D := Debugging, Commands, StringPool;
CONST
Trace = FALSE;
MaxIdentifierLength* = 128;
MaxHexDigits* = 8;
MaxHugeHexDigits* = 16;
MaxRealExponent* = 38;
MaxLongrealExponent* = 308;
EOT* = 0X; LF* = 0AX; CR* = 0DX; TAB* = 09X;
TYPE
StringType* = Strings.String;
IdentifierType *= StringPool.Index;
IdentifierString*= ARRAY MaxIdentifierLength+1 OF CHAR;
CONST
None*= 0;
Equal*= 1; DotEqual*= 2; Unequal*= 3; DotUnequal*= 4; Less*= 5; DotLess*= 6;
LessEqual*= 7; DotLessEqual*= 8; Greater*= 9; DotGreater*= 10; GreaterEqual*= 11; DotGreaterEqual*= 12;
In*= 13; Is*= 14;
Times*= 15; TimesTimes*= 16; DotTimes*= 17; PlusTimes*= 18; Slash*= 19; Backslash*= 20;
DotSlash*= 21; Div*= 22; Mod*= 23; And*= 24;
Or*= 25; Plus*= 26; Minus*= 27;
Not*= 28;
LeftParenthesis*= 29; LeftBracket*= 30; LeftBrace*= 31; Number*= 32; Character*= 33; String*= 34;
Nil*= 35; Imag*= 36; True*= 37; False*= 38; Self*= 39; Result*= 40; Identifier*= 41;
If*= 42; Case*= 43; While*= 44; Repeat*= 45; For*= 46; Loop*= 47;
With*= 48; Exit*= 49; Await*= 50; Return*= 51; Begin*= 52;
Semicolon*= 53; Transpose*= 54; RightBrace*= 55; RightBracket*= 56; RightParenthesis*= 57; Questionmark*= 58; ExclamationMark*= 59;
Upto*= 60; Arrow*= 61; Period*= 62; Comma*= 63; Colon*= 64; Of*= 65;
Then*= 66; Do*= 67; To*= 68; By*= 69; Becomes*= 70; Bar*= 71;
End*= 72; Else*= 73; Elsif*= 74; Until*= 75; Finally*= 76;
Code*= 77; Const*= 78; Type*= 79; Var*= 80; Out*= 81; Procedure*= 82;
Operator*= 83; Import*= 84; Definition*= 85; Module*= 86; Cell*= 87; CellNet*= 88;
Array*= 89; Object*= 90; Record*= 91; Pointer*= 92; Enum*= 93; Port*= 94;
Ln*= 95; PC*= 96; PCOffset*= 97;
Shortint*= 98; Integer*= 99; Longint*= 100; Hugeint*= 101; Real*= 102; Longreal*= 103;
Comment*= 104; EndOfText*= 105;
SingleQuote = 27X; DoubleQuote* = 22X;
Ellipsis = 7FX;
Uppercase*=0;
Lowercase*=1;
Unknown*=2;
TYPE
Keyword* = ARRAY 32 OF CHAR;
KeywordTable* = OBJECT(Basic.HashTableInt);
VAR table: POINTER TO ARRAY OF LONGINT;
PROCEDURE &InitTable*(size: LONGINT);
VAR i: LONGINT;
BEGIN
Init(size); NEW(table,size); FOR i := 0 TO size-1 DO table[i] := -1; END;
END InitTable;
PROCEDURE IndexByIdentifier*(identifier: IdentifierType): LONGINT;
VAR stringPoolIndex: LONGINT;
BEGIN
IF Has(identifier) THEN
RETURN GetInt(identifier)
ELSE
RETURN -1
END;
END IndexByIdentifier;
PROCEDURE IndexByString*(CONST name: ARRAY OF CHAR): LONGINT;
VAR stringPoolIndex: LONGINT;
BEGIN
StringPool.GetIndex(name,stringPoolIndex);
IF Has(stringPoolIndex) THEN
RETURN GetInt(stringPoolIndex)
ELSE
RETURN -1
END;
END IndexByString;
PROCEDURE IdentifierByIndex*(index: LONGINT; VAR identifier: IdentifierType);
BEGIN
identifier := table[index]
END IdentifierByIndex;
PROCEDURE StringByIndex*(index: LONGINT; VAR name: ARRAY OF CHAR);
VAR stringPoolIndex: LONGINT;
BEGIN
stringPoolIndex := table[index];
IF stringPoolIndex < 0 THEN
name := ""
ELSE
StringPool.GetString(stringPoolIndex,name);
END;
END StringByIndex;
PROCEDURE PutString*(CONST name: ARRAY OF CHAR; index: LONGINT);
VAR stringPoolIndex: LONGINT;
BEGIN
StringPool.GetIndex(name,stringPoolIndex);
table[index] := stringPoolIndex;
PutInt(stringPoolIndex,index);
END PutString;
END KeywordTable;
TYPE
Token*=LONGINT;
Symbol*= RECORD
start*,end*,line-: LONGINT;
token*: Token;
identifier*: IdentifierType;
identifierString*: IdentifierString;
string*: StringType;
stringLength*: LONGINT;
numberType*: LONGINT;
integer*: LONGINT;
hugeint*: HUGEINT;
character*: CHAR;
real*: LONGREAL;
END;
StringMaker* = OBJECT
VAR length : LONGINT;
data : StringType;
PROCEDURE &Init*(initialSize : LONGINT);
BEGIN
IF initialSize < 256 THEN initialSize := 256 END;
NEW(data, initialSize); length := 0;
END Init;
PROCEDURE Add*(CONST buf: ARRAY OF CHAR; ofs, len: LONGINT; propagate: BOOLEAN; VAR res: LONGINT);
VAR i : LONGINT; n: StringType;
BEGIN
IF length + len + 1 >= LEN(data) THEN
NEW(n, LEN(data) + len + 1); FOR i := 0 TO length - 1 DO n[i] := data[i] END;
data := n
END;
WHILE len > 0 DO
data[length] := buf[ofs];
INC(ofs); INC(length); DEC(len)
END;
data[length] := 0X;
END Add;
PROCEDURE Shorten*(n : LONGINT);
BEGIN
DEC(length, n);
IF length < 0 THEN length := 0 END;
IF length > 0 THEN data[length - 1] := 0X ELSE data[length] := 0X END
END Shorten;
PROCEDURE Clear*;
BEGIN
data[0] := 0X;
length := 0
END Clear;
PROCEDURE GetWriter*() : Streams.Writer;
VAR w : Streams.Writer;
BEGIN
NEW(w, SELF.Add, 256);
RETURN w
END GetWriter;
PROCEDURE GetString*(VAR len: LONGINT) : StringType;
BEGIN
len := length;
RETURN data
END GetString;
PROCEDURE GetStringCopy*(VAR len: LONGINT): StringType;
VAR new: StringType;
BEGIN
len := length;
NEW(new,len+1);
COPY(data^,new^);
RETURN new
END GetStringCopy;
END StringMaker;
Scanner* = OBJECT
VAR
source-: StringType;
reader: Streams.Reader;
diagnostics: Diagnostics.Diagnostics;
ch: CHAR;
position: LONGINT;
line-: LONGINT;
error-: BOOLEAN;
firstIdentifier: BOOLEAN;
case-: LONGINT;
stringWriter: Streams.Writer;
stringMaker: StringMaker;
PROCEDURE & InitializeScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics );
BEGIN
NEW(stringMaker,1024);
stringWriter := stringMaker.GetWriter();
error := FALSE;
NEW(SELF.source, Strings.Length(source)+1);
COPY (source, SELF.source^);
SELF.reader := reader;
SELF.diagnostics := diagnostics;
ch := " ";
case := Unknown;
firstIdentifier := TRUE;
IF reader = NIL THEN ch := EOT ELSE GetNextCharacter END;
IF Trace THEN D.Str( "New scanner " ); D.Ln; END;
SELF.position := position;
line := 0;
END InitializeScanner;
PROCEDURE ResetCase*;
BEGIN
firstIdentifier := TRUE; case := Unknown;
END ResetCase;
PROCEDURE ErrorS(CONST msg: ARRAY OF CHAR);
BEGIN
IF diagnostics # NIL THEN
diagnostics.Error(source^, position, Diagnostics.Invalid, msg)
END;
error := TRUE;
END ErrorS;
PROCEDURE Error( code: INTEGER );
VAR errorMessage: ARRAY 256 OF CHAR;
BEGIN
IF diagnostics # NIL THEN
Basic.GetErrorMessage(code,"",errorMessage);
diagnostics.Error(source^, position, code, errorMessage)
END;
error := TRUE;
END Error;
PROCEDURE GetNextCharacter;
BEGIN
reader.Char(ch); INC(position);
IF ch = LF THEN INC(line) END;
END GetNextCharacter;
PROCEDURE GetString(VAR symbol: Symbol; multiLine, multiString, useControl: BOOLEAN);
VAR och: CHAR; error: BOOLEAN;
CONST control = '\';
PROCEDURE Append(ch :CHAR);
BEGIN
IF ch = 0X THEN
ErrorS("Unexpected end of text in string"); error := TRUE
ELSE
stringWriter.Char(ch)
END;
END Append;
BEGIN
stringMaker.Clear;
och := ch; error := FALSE;
REPEAT
LOOP
IF error THEN EXIT END;
GetNextCharacter;
IF (ch = och) OR (ch = EOT) THEN EXIT END;
IF useControl & (ch = control) THEN
GetNextCharacter;
IF (ch = control) OR (ch = och) THEN
Append(ch)
ELSIF ch = 'n' THEN
Append(CR); Append(LF);
ELSIF ch = 't' THEN
Append(TAB)
ELSE
ErrorS("Unknown control sequence")
END;
ELSE
IF ~multiLine & (ch < " ") THEN Error( Basic.StringIllegalCharacter ); EXIT END;
Append(ch)
END;
END;
IF ch = EOT THEN
ErrorS("Unexpected end of text in string")
ELSE
GetNextCharacter;
IF multiString THEN SkipBlanks END;
END;
UNTIL ~multiString OR (ch # och);
stringWriter.Char(0X);
stringWriter.Update;
symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
END GetString;
PROCEDURE GetIdentifier( VAR symbol: Symbol );
VAR i: LONGINT;
BEGIN
i := 0;
REPEAT symbol.identifierString[i] := ch; INC( i ); GetNextCharacter UNTIL reservedCharacter[ORD( ch )] OR (i = MaxIdentifierLength);
IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
symbol.identifierString[i] := 0X;
StringPool.GetIndex(symbol.identifierString, symbol.identifier);
END GetIdentifier;
PROCEDURE GetNumber(VAR symbol: Symbol): Token;
VAR i, nextInt, m, n, d, e, si: LONGINT;
dig: ARRAY 24 OF CHAR;
f: LONGREAL; expCh: CHAR; neg, long: BOOLEAN;
result: LONGINT;
hugeint, tenh: HUGEINT;
PROCEDURE Ten( e: LONGINT ): LONGREAL;
VAR x, p: LONGREAL;
BEGIN
x := 1; p := 10;
WHILE e > 0 DO
IF ODD( e ) THEN x := x * p END;
e := e DIV 2;
IF e > 0 THEN p := p * p END
END;
RETURN x
END Ten;
PROCEDURE Decimal( ch: CHAR ): LONGINT;
BEGIN
IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" ) ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0 END
END Decimal;
PROCEDURE Hexadecimal( ch: CHAR ): LONGINT;
BEGIN
IF ch <= "9" THEN RETURN ORD( ch ) - ORD( "0" )
ELSIF ch <= "F" THEN RETURN ORD( ch ) - ORD( "A" ) + 10
ELSE Error( Basic.NumberIllegalCharacter ); RETURN 0
END
END Hexadecimal;
BEGIN
result := Number;
i := 0; m := 0; n := 0; d := 0; si := 0; long := FALSE;
LOOP
IF ("0" <= ch) & (ch <= "9") OR (d = 0) & ("A" <= ch) & (ch <= "F") THEN
IF (m > 0) OR (ch # "0") THEN
IF n < LEN( dig ) THEN dig[n] := ch; INC( n ) END;
INC( m )
END;
symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; INC( i )
ELSIF ch = "." THEN
symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
IF ch = "." THEN ch := Ellipsis; EXIT
ELSIF d = 0 THEN d := i
ELSE Error( Basic.NumberIllegalCharacter )
END
ELSE EXIT
END
END;
IF d = 0 THEN
IF n = m THEN
symbol.integer := 0; i := 0; symbol.hugeint := 0;
IF ch = "X" THEN
symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; result := Character;
IF (n <= 2) THEN
WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
symbol.character := CHR(symbol.integer);
ELSE Error( Basic.NumberTooLarge )
END
ELSIF ch = "H" THEN
symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
IF (n < MaxHexDigits) OR (n=MaxHexDigits) & (dig[0] <= "7") THEN
symbol.numberType := Integer;
WHILE i < n DO symbol.integer := symbol.integer * 10H + Hexadecimal( dig[i] ); INC( i ) END;
symbol.hugeint := symbol.integer;
ELSIF n <= MaxHugeHexDigits THEN
symbol.numberType := Hugeint;
IF (n = MaxHugeHexDigits) & (dig[0] > "7") THEN symbol.hugeint := -1 END;
WHILE i < n DO symbol.hugeint := Hexadecimal( dig[i] ) + symbol.hugeint * 10H; INC( i ) END;
symbol.integer :=SHORT(symbol.hugeint);
ELSE
symbol.numberType := Hugeint;
Error( Basic.NumberTooLarge )
END
ELSE
symbol.numberType := Integer;
WHILE (i < n) & ~long DO
d := Decimal( dig[i] ); INC( i );
nextInt := symbol.integer*10+d;
IF nextInt >=0 THEN symbol.integer := nextInt ELSE long := TRUE END;
END;
IF long THEN
i := 0;
hugeint := 0;
tenh := 10;
symbol.numberType := Hugeint;
WHILE i < n DO
d := Decimal( dig[i] ); INC( i );
hugeint := hugeint * tenh + d;
IF hugeint < 0 THEN Error( Basic.NumberTooLarge ) END
END;
symbol.hugeint := hugeint;
symbol.integer := SHORT(symbol.hugeint);
ELSE
symbol.hugeint := symbol.integer;
END
END
ELSE
symbol.numberType := Hugeint;
Error( Basic.NumberTooLarge )
END
ELSE
f := 0; e := 0; expCh := "E";
WHILE n > 0 DO DEC( n ); f := (Decimal( dig[n] ) + f) / 10 END;
IF (ch = "E") OR (ch = "D") THEN
expCh := ch; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter; neg := FALSE;
IF ch = "-" THEN neg := TRUE; symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
ELSIF ch = "+" THEN symbol.identifierString[si] := ch; INC( si ); GetNextCharacter
END;
IF ("0" <= ch) & (ch <= "9") THEN
REPEAT
n := Decimal( ch ); symbol.identifierString[si] := ch; INC( si ); GetNextCharacter;
IF e <= (MAX( INTEGER ) - n) DIV 10 THEN e := e * 10 + n ELSE Error( Basic.NumberTooLarge ) END
UNTIL (ch < "0") OR ("9" < ch);
IF neg THEN e := -e END
ELSE Error( Basic.NumberIllegalCharacter )
END
END;
DEC( e, i - d - m );
IF expCh = "E" THEN
symbol.numberType := Real;
IF (1 - MaxRealExponent < e) & (e <= MaxRealExponent) THEN
IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
ELSE Error( Basic.NumberTooLarge )
END
ELSE
symbol.numberType := Longreal;
IF (1 - MaxLongrealExponent < e) & (e <= MaxLongrealExponent) THEN
IF e < 0 THEN symbol.real := f / Ten( -e ) ELSE symbol.real := f * Ten( e ) END
ELSE Error( Basic.NumberTooLarge )
END
END
END;
symbol.identifierString[si] := 0X;
RETURN result;
END GetNumber;
PROCEDURE ReadComment(VAR symbol: Symbol);
VAR level: LONGINT;
BEGIN
stringMaker.Clear;
level := 1;
WHILE (level > 0) & (ch # EOT) DO
IF ch = "(" THEN
stringWriter.Char(ch);
GetNextCharacter;
IF ch = "*" THEN INC(level); stringWriter.Char(ch); GetNextCharacter; END;
ELSIF ch = "*" THEN
stringWriter.Char(ch);
GetNextCharacter;
IF ch =")" THEN DEC(level); stringWriter.Char(ch); GetNextCharacter; END;
ELSE
stringWriter.Char(ch);
GetNextCharacter;
END;
END;
IF level > 0 THEN
Error(Basic.CommentNotClosed)
END;
stringWriter.Char(0X);
stringWriter.Update;
stringMaker.Shorten(2);
symbol.token := Comment;
symbol.string := stringMaker.GetString(symbol.stringLength);
END ReadComment;
PROCEDURE SkipToNextEnd*(VAR startPos,endPos: LONGINT; VAR symbol: Symbol): BOOLEAN;
VAR s: LONGINT;
BEGIN
ASSERT(case # Unknown);
stringMaker.Clear;
startPos := symbol.end;
s := symbol.token;
WHILE (s # EndOfText) & (s # End) DO
symbol.start := position;
endPos := position;
CASE ch OF
'A' .. 'Z','a'..'z': s := Identifier;
GetIdentifier(symbol);
IF (case=Uppercase) & (symbol.identifierString = "END") OR (case=Lowercase) & (symbol.identifierString = "end") THEN
s := End
ELSE
stringWriter.String(symbol.identifierString);
END;
ELSE
stringWriter.Char(ch);
GetNextCharacter;
END;
symbol.end := position;
END;
stringWriter.Update;
symbol.string := stringMaker.GetStringCopy(symbol.stringLength);
symbol.token := s;
IF Trace THEN
D.String("skip to end: "); D.Int(startPos,1); D.String(","); D.Int(endPos,1); D.Ln;
OutSymbol(D.Log,symbol); D.Ln;
END;
RETURN s=End
END SkipToNextEnd;
PROCEDURE SkipBlanks;
BEGIN
WHILE ch <= " " DO
IF ch = EOT THEN
IF Trace THEN D.String("EOT"); D.Ln; END;
RETURN
ELSE GetNextCharacter
END
END;
END SkipBlanks;
PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
VAR s,token: LONGINT;
BEGIN
SkipBlanks;
symbol.start := position; symbol.line := line;
stringMaker.Clear;
CASE ch OF
EOT: s := EndOfText
| DoubleQuote:
s := String; GetString(symbol,TRUE, TRUE, FALSE);
| SingleQuote:
s := String; GetString(symbol,FALSE, FALSE,FALSE);
| '#': s := Unequal; GetNextCharacter
| '&': s := And; GetNextCharacter
| '(': GetNextCharacter;
IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
| ')': s := RightParenthesis; GetNextCharacter
| '*': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := TimesTimes ELSE s := Times END
| '+': GetNextCharacter; IF ch = '*' THEN GetNextCharacter; s := PlusTimes ELSE s := Plus END
| ',': s := Comma; GetNextCharacter
| '-': s := Minus; GetNextCharacter
| '.': GetNextCharacter;
IF ch = '.' THEN GetNextCharacter; s := Upto;
ELSIF ch = '*' THEN GetNextCharacter; s := DotTimes;
ELSIF ch = '/' THEN GetNextCharacter; s := DotSlash;
ELSIF ch='=' THEN GetNextCharacter; s := DotEqual;
ELSIF ch='#' THEN GetNextCharacter; s := DotUnequal;
ELSIF ch='>' THEN GetNextCharacter;
IF ch='=' THEN s := DotGreaterEqual; GetNextCharacter
ELSE s := DotGreater;
END
ELSIF ch='<' THEN GetNextCharacter;
IF ch='=' THEN s := DotLessEqual; GetNextCharacter
ELSE s := DotLess;
END
ELSE s := Period END
| '/': s := Slash; GetNextCharacter
| '0'..'9': s := GetNumber(symbol);
| ':': GetNextCharacter;
IF ch = '=' THEN GetNextCharacter; s := Becomes ELSE s := Colon END
| ';': s := Semicolon; GetNextCharacter
| '<': GetNextCharacter;
IF ch = '=' THEN GetNextCharacter; s := LessEqual ELSE s := Less; END
| '=': s := Equal; GetNextCharacter
| '>': GetNextCharacter;
IF ch = '=' THEN GetNextCharacter; s := GreaterEqual ELSE s := Greater; END
| '[': s := LeftBracket; GetNextCharacter
| ']': s := RightBracket; GetNextCharacter
| '^': s := Arrow; GetNextCharacter
| '{': s := LeftBrace; GetNextCharacter
| '|': s := Bar; GetNextCharacter
| '}': s := RightBrace; GetNextCharacter
| '~': s := Not; GetNextCharacter
| '\': s := Backslash; GetNextCharacter;
IF ch = DoubleQuote THEN s := String; GetString(symbol, TRUE, TRUE, TRUE) END;
| '`': s := Transpose; GetNextCharacter
| '?': s := Questionmark; GetNextCharacter
| Ellipsis:
s := Upto; GetNextCharacter
| 'A'..'Z': s := Identifier; GetIdentifier( symbol );
IF (case=Uppercase) OR (case=Unknown) THEN
token := keywordsUpper.IndexByIdentifier(symbol.identifier);
IF (token >= 0) THEN s := token END;
IF (s = Module) OR (s=CellNet) THEN case := Uppercase END;
END;
| 'a'..'z': s := Identifier; GetIdentifier( symbol);
IF (case = Lowercase) OR (case=Unknown) THEN
token := keywordsLower.IndexByIdentifier(symbol.identifier);
IF (token >= 0) THEN s := token END;
IF (s = Module) OR (s=CellNet) THEN case := Lowercase END;
END;
IF firstIdentifier & (s # Module) & (s # CellNet) THEN case := Uppercase; s := Identifier END;
ELSE s := Identifier; GetIdentifier( symbol );
END;
firstIdentifier := FALSE;
symbol.token := s;
symbol.end := position;
IF Trace THEN OutSymbol(D.Log,symbol); D.Ln; END;
RETURN ~error
END GetNextSymbol;
PROCEDURE ResetError*();
BEGIN error := FALSE
END ResetError;
PROCEDURE ResetErrorDiagnostics*(VAR diagnostics: Diagnostics.Diagnostics);
VAR b: BOOLEAN; d: Diagnostics.Diagnostics;
BEGIN
error := FALSE;
d := SELF.diagnostics; SELF.diagnostics := diagnostics; diagnostics := d;
END ResetErrorDiagnostics;
END Scanner;
Context*=RECORD
position, readerPosition : LONGINT;
ch: CHAR;
END;
AssemblerScanner* = OBJECT (Scanner)
VAR
startContext-: Context;
PROCEDURE &InitAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics );
BEGIN
InitializeScanner(source,reader,position,diagnostics);
GetContext(startContext);
END InitAssemblerScanner;
PROCEDURE GetContext*(VAR context: Context);
BEGIN
context.ch := ch;
context.position := position;
context.readerPosition := reader.Pos();
END GetContext;
PROCEDURE SetContext*(CONST context: Context);
BEGIN
reader.SetPos(context.readerPosition);
ch := context.ch;
position := context.position;
END SetContext;
PROCEDURE SkipToEndOfLine*;
BEGIN
WHILE (ch # EOT) & (ch # CR) & (ch # LF) DO
GetNextCharacter
END;
END SkipToEndOfLine;
PROCEDURE GetIdentifier( VAR symbol: Symbol );
VAR
i: LONGINT;
PROCEDURE CharacterIsAllowed(character: CHAR): BOOLEAN;
BEGIN
CASE character OF
| 'a' .. 'z', 'A' .. 'Z', '0' .. '9', '@', '.', '_': RETURN TRUE
ELSE RETURN FALSE
END;
END CharacterIsAllowed;
BEGIN
i := 0;
REPEAT
symbol.identifierString[i] := ch; INC( i ); GetNextCharacter
UNTIL ~CharacterIsAllowed(ch) OR (i = MaxIdentifierLength);
IF i = MaxIdentifierLength THEN Error( Basic.IdentifierTooLong ); DEC( i ) END;
symbol.identifierString[i] := 0X;
END GetIdentifier;
PROCEDURE GetNextSymbol*(VAR symbol: Symbol ): BOOLEAN;
VAR s: LONGINT;
PROCEDURE SkipBlanks;
BEGIN
WHILE (ch <= ' ') & (ch # CR) & (ch # LF) & (ch # EOT) DO
GetNextCharacter
END;
END SkipBlanks;
BEGIN
REPEAT
SkipBlanks;
symbol.start := position; symbol.line := line;
CASE ch OF
| EOT: s := EndOfText;
| DoubleQuote:
s := String; GetString(symbol, TRUE, FALSE, TRUE);
| SingleQuote:
s := Character; GetString(symbol, FALSE, FALSE, FALSE); symbol.character := symbol.string[0];
IF symbol.stringLength #2 THEN
Error(Basic.IllegalCharacterValue)
END;
| '\': s := Backslash; GetNextCharacter;
IF ch = DoubleQuote THEN s := String; GetString(symbol, FALSE, FALSE, TRUE) END;
| '#': s := Unequal; GetNextCharacter;
| '(': GetNextCharacter;
IF ch = '*' THEN GetNextCharacter; ReadComment(symbol); s := Comment; ELSE s := LeftParenthesis END
| ')': s := RightParenthesis; GetNextCharacter
| CR: GetNextCharacter; s := Ln;IF ch = LF THEN GetNextCharacter END;
| LF: GetNextCharacter; s := Ln;
| '*': s := Times; GetNextCharacter;
| '+': s := Plus ; GetNextCharacter;
| ',': s := Comma; GetNextCharacter
| '-': s := Minus; GetNextCharacter
| '~': s := Not; GetNextCharacter
| '.': s:= Period; GetNextCharacter
| '/': s := Div; GetNextCharacter
| '%': s := Mod; GetNextCharacter
| '0'..'9': s := GetNumber(symbol);
| ':': s := Colon; GetNextCharacter;
| ';': s := Comment; SkipToEndOfLine;
| '=': s := Equal; GetNextCharacter
| '[': s := LeftBracket; GetNextCharacter
| ']': s := RightBracket; GetNextCharacter
| '{': s := LeftBrace; GetNextCharacter
| '}': s := RightBrace; GetNextCharacter
| '!': s := ExclamationMark; GetNextCharacter
| 'A'..'Z': s := Identifier; GetIdentifier( symbol );
| 'a'..'z': s := Identifier; GetIdentifier( symbol);
| '@': s := Identifier; GetIdentifier( symbol);
| '$': GetNextCharacter;
IF ch = '$' THEN s := PCOffset; GetNextCharacter ELSE s := PC; END
ELSE s := None; GetNextCharacter;
END;
symbol.end := position;
UNTIL s # Comment;
symbol.token := s;
IF Trace THEN D.Ln; D.Str( "Scan at " ); D.Int( symbol.start,1 ); D.Str( ": " ); OutSymbol(D.Log,symbol); D.Update; END;
RETURN ~error
END GetNextSymbol;
END AssemblerScanner;
VAR
reservedCharacter: ARRAY 256 OF BOOLEAN;
tokens-: ARRAY EndOfText+1 OF Keyword;
keywordsLower, keywordsUpper: KeywordTable;
PROCEDURE NewScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): Scanner;
VAR s: Scanner;
BEGIN
NEW( s, source, reader, position, diagnostics ); RETURN s;
END NewScanner;
PROCEDURE NewAssemblerScanner*( CONST source: ARRAY OF CHAR; reader: Streams.Reader; position: LONGINT; diagnostics: Diagnostics.Diagnostics ): AssemblerScanner;
VAR s: AssemblerScanner;
BEGIN
NEW( s, source, reader, position, diagnostics ); RETURN s;
END NewAssemblerScanner;
PROCEDURE SymbolToString*(CONST symbol: Symbol; case: LONGINT; VAR str: ARRAY OF CHAR);
VAR id: StringPool.Index;
BEGIN
CASE symbol.token OF
Identifier, Number: COPY(symbol.identifierString, str)
| String, Comment: ASSERT(LEN(str) >= LEN(symbol.string^)); COPY(symbol.string^, str);
ELSE
GetKeyword(case, symbol.token, id);
IF id < 0 THEN str := "" ELSE StringPool.GetString(id, str) END;
END;
END SymbolToString;
PROCEDURE OutSymbol*(w: Streams.Writer; CONST symbol: Symbol);
VAR str: ARRAY 256 OF CHAR;
BEGIN
w.Int(symbol.start,1); w.String("-");w.Int(symbol.end,1); w.String(":");
w.String(tokens[symbol.token]);
IF symbol.token= Number THEN
CASE symbol.numberType OF
Integer: w.String("(integer)")
|Hugeint: w.String("(hugeint)")
|Real: w.String("(real)")
|Longreal: w.String("(longreal)")
END;
END;
IF symbol.token = String THEN
w.String(":"); w.Char('"'); w.String(symbol.string^); w.Char('"');
ELSIF symbol.token = Comment THEN
w.String("(*"); w.String(symbol.string^); w.String("*)");
ELSE
SymbolToString(symbol, Uppercase, str); w.String(": "); w.String(str);
END
END OutSymbol;
PROCEDURE InitReservedCharacters;
VAR i: LONGINT;
BEGIN
FOR i := 0 TO LEN( reservedCharacter ) - 1 DO
CASE CHR(i) OF
| 'a' .. 'z', 'A' .. 'Z': reservedCharacter[i] := FALSE;
| '0'..'9': reservedCharacter[i] := FALSE;
| '_': reservedCharacter[i] := FALSE
ELSE
reservedCharacter[i] := TRUE
END;
END;
END InitReservedCharacters;
PROCEDURE GetKeyword*(case:LONGINT; token: LONGINT; VAR identifier: IdentifierType);
BEGIN
IF case = Uppercase THEN
keywordsUpper.IdentifierByIndex(token,identifier);
ELSE ASSERT(case=Lowercase);
keywordsLower.IdentifierByIndex(token,identifier);
END;
END GetKeyword;
PROCEDURE InitTokens;
VAR i: LONGINT;
BEGIN
tokens[None] := "None";
tokens[Equal] := "Equal";
tokens[DotEqual] := "DotEqual";
tokens[Unequal] := "Unequal";
tokens[DotUnequal] := "DotUnequal";
tokens[Less] := "Less";
tokens[DotLess] := "DotLess";
tokens[LessEqual] := "LessEqual";
tokens[DotLessEqual] := "DotLessEqual";
tokens[Greater] := "Greater";
tokens[DotGreater] := "DotGreater";
tokens[GreaterEqual] := "GreaterEqual";
tokens[DotGreaterEqual] := "DotGreaterEqual";
tokens[In] := "In";
tokens[Is] := "Is";
tokens[Times] := "Times";
tokens[TimesTimes] := "TimesTimes";
tokens[DotTimes] := "DotTimes";
tokens[PlusTimes] := "PlusTimes";
tokens[Slash] := "Slash";
tokens[Backslash] := "Backslash";
tokens[DotSlash] := "DotSlash";
tokens[Div] := "Div";
tokens[Mod] := "Mod";
tokens[And] := "And";
tokens[Or] := "Or";
tokens[Plus] := "Plus";
tokens[Minus] := "Minus";
tokens[Not] := "Not";
tokens[LeftParenthesis] := "LeftParenthesis";
tokens[LeftBracket] := "LeftBracket";
tokens[LeftBrace] := "LeftBrace";
tokens[Number] := "Number";
tokens[Character] := "Character";
tokens[String] := "String";
tokens[Nil] := "Nil";
tokens[Imag] := "Imag";
tokens[True] := "True";
tokens[False] := "False";
tokens[Self] := "Self";
tokens[Result] := "Result";
tokens[Identifier] := "Identifier";
tokens[If] := "If";
tokens[Case] := "Case";
tokens[While] := "While";
tokens[Repeat] := "Repeat";
tokens[For] := "For";
tokens[Loop] := "Loop";
tokens[With] := "With";
tokens[Exit] := "Exit";
tokens[Await] := "Await";
tokens[Return] := "Return";
tokens[Begin] := "Begin";
tokens[Semicolon] := "Semicolon";
tokens[Transpose] := "Transpose";
tokens[RightBrace] := "RightBrace";
tokens[RightBracket] := "RightBracket";
tokens[RightParenthesis] := "RightParenthesis";
tokens[Questionmark] := "Questionmark";
tokens[ExclamationMark] := "ExclamationMark";
tokens[Upto] := "Upto";
tokens[Arrow] := "Arrow";
tokens[Period] := "Period";
tokens[Comma] := "Comma";
tokens[Colon] := "Colon";
tokens[Of] := "Of";
tokens[Then] := "Then";
tokens[Do] := "Do";
tokens[To] := "To";
tokens[By] := "By";
tokens[Becomes] := "Becomes";
tokens[Bar] := "Bar";
tokens[End] := "End";
tokens[Else] := "Else";
tokens[Elsif] := "Elsif";
tokens[Until] := "Until";
tokens[Finally] := "Finally";
tokens[Code] := "Code";
tokens[Const] := "Const";
tokens[Type] := "Type";
tokens[Var] := "Var";
tokens[Out] := "Out";
tokens[Procedure] := "Procedure";
tokens[Operator] := "Operator";
tokens[Import] := "Import";
tokens[Definition] := "Definition";
tokens[Module] := "Module";
tokens[Cell] := "Cell";
tokens[CellNet] := "CellNet";
tokens[Array] := "Array";
tokens[Object] := "Object";
tokens[Record] := "Record";
tokens[Pointer] := "Pointer";
tokens[Enum] := "Enum";
tokens[Port] := "Port";
tokens[Ln] := "Ln";
tokens[PC] := "PC";
tokens[PCOffset] := "PCOffset";
tokens[Shortint] := "Shortint";
tokens[Integer] := "Integer";
tokens[Longint] := "Longint";
tokens[Hugeint] := "Hugeint";
tokens[Real] := "Real";
tokens[Longreal] := "Longreal";
tokens[Comment] := "Comment";
tokens[EndOfText] := "EndOfText";
FOR i := 0 TO EndOfText DO ASSERT(tokens[i] # "") END;
END InitTokens;
PROCEDURE InitKeywords;
PROCEDURE Upper(CONST source: ARRAY OF CHAR; VAR dest: ARRAY OF CHAR);
VAR c: CHAR; i: LONGINT;
BEGIN
i := 0;
REPEAT
c := source[i];
IF (c >= 'a') & (c<= 'z') THEN c := CHR(ORD(c)-ORD('a')+ORD('A')) END;
dest[i] := c; INC(i);
UNTIL c = 0X;
END Upper;
PROCEDURE Enter1(CONST name: ARRAY OF CHAR; token: LONGINT; case: SET);
BEGIN
IF Lowercase IN case THEN keywordsLower.PutString(name,token) END;
IF Uppercase IN case THEN keywordsUpper.PutString(name,token) END;
Basic.SetErrorExpected(token,name);
END Enter1;
PROCEDURE Enter(CONST name: ARRAY OF CHAR; token: LONGINT);
VAR upper: Keyword;
BEGIN
Enter1(name,token,{Lowercase});
Upper(name,upper);
Enter1(upper,token,{Uppercase});
END Enter;
PROCEDURE EnterSymbol(CONST name: ARRAY OF CHAR; token: LONGINT);
BEGIN
Enter1(name,token,{Lowercase,Uppercase});
END EnterSymbol;
BEGIN
NEW(keywordsUpper,EndOfText+1);
NEW(keywordsLower,EndOfText+1);
Enter( "cell", Cell );
Enter( "cellnet", CellNet);
Enter( "await" , Await);
Enter( "begin" , Begin);
Enter( "by" , By);
Enter( "const" , Const);
Enter( "case" , Case);
Enter( "code" , Code);
Enter( "definition", Definition);
Enter( "do" , Do);
Enter( "div" , Div);
Enter( "end" , End);
Enter( "enum", Enum);
Enter( "else" , Else);
Enter( "elsif" , Elsif);
Enter( "exit" , Exit);
Enter( "false" , False);
Enter( "for" , For);
Enter( "finally" , Finally);
Enter( "if" , If);
Enter( "imag" , Imag);
Enter( "in" , In);
Enter( "is" , Is);
Enter( "import" , Import);
Enter( "loop" , Loop);
Enter( "module", Module);
Enter( "mod" , Mod);
Enter( "nil" , Nil );
Enter( "of" , Of);
Enter( "or" , Or);
Enter( "out", Out);
Enter( "operator" , Operator);
Enter( "procedure" , Procedure);
Enter( "port", Port);
Enter( "repeat" , Repeat);
Enter( "return" , Return);
Enter( "self", Self);
Enter( "result", Result);
Enter( "then" , Then);
Enter( "true" , True);
Enter( "to" , To);
Enter( "type" , Type);
Enter( "until" , Until );
Enter( "var" , Var );
Enter( "while" , While);
Enter( "with" , With);
Enter( "array" , Array );
Enter( "object" , Object);
Enter( "pointer" , Pointer);
Enter( "record" , Record);
EnterSymbol( "#", Unequal);
EnterSymbol( "&", And);
EnterSymbol( "(", LeftParenthesis);
EnterSymbol( ")", RightParenthesis);
EnterSymbol( "*", Times);
EnterSymbol( "**",TimesTimes);
EnterSymbol( "+", Plus);
EnterSymbol( "+*", PlusTimes);
EnterSymbol( ",", Comma);
EnterSymbol( "-", Minus);
EnterSymbol(".",Period );
EnterSymbol("..",Upto );
EnterSymbol(".*",DotTimes );
EnterSymbol("./",DotSlash );
EnterSymbol(".=",DotEqual );
EnterSymbol(".#",DotUnequal );
EnterSymbol(".>",DotGreater );
EnterSymbol(".>=",DotGreaterEqual );
EnterSymbol(".<", DotLess);
EnterSymbol(".<=",DotLessEqual );
EnterSymbol( "/", Slash);
EnterSymbol( ":", Colon);
EnterSymbol( ":=",Becomes);
EnterSymbol( ";", Semicolon);
EnterSymbol( "<", Less);
EnterSymbol( "<=", LessEqual);
EnterSymbol( "=", Equal);
EnterSymbol( ">", Greater);
EnterSymbol( ">=", GreaterEqual);
EnterSymbol( "[", LeftBracket);
EnterSymbol( "]", RightBracket);
EnterSymbol( "^", Arrow);
EnterSymbol( "{", LeftBrace);
EnterSymbol( "|",Bar);
EnterSymbol( "}", RightBrace);
EnterSymbol( "~", Not);
EnterSymbol( "\", Backslash);
EnterSymbol( "`", Transpose);
EnterSymbol( "?",Questionmark);
Basic.SetErrorMessage(Number,"missing number");
Basic.SetErrorMessage(String,"missing string");
Basic.SetErrorMessage(Character,"missing character");
Basic.SetErrorMessage(Identifier,"missing identifier");
END InitKeywords;
PROCEDURE ReportKeywords*(context: Commands.Context);
VAR i: LONGINT; name: Keyword;
BEGIN
FOR i := 0 TO EndOfText DO
context.out.Int(i,1); context.out.String(": ");
context.out.Char('"');
keywordsLower.StringByIndex(i,name);
context.out.String(name);
context.out.Char('"');
context.out.String(", ");
context.out.Char('"');
keywordsUpper.StringByIndex(i,name);
context.out.String(name);
context.out.Char('"');
context.out.Ln;
END;
END ReportKeywords;
BEGIN
InitReservedCharacters; InitTokens; InitKeywords
END FoxScanner.
FoxScanner.ReportKeywords
FoxScanner.TestScanner Test.Mod ~