MODULE UnicodeProperties;
IMPORT
Texts, Codecs, Files, Streams, KernelLog, Strings;
CONST
NUL* = 00H;
EOT* = 04H;
LF* = 0AH;
CR* = 0DH;
SP* = 20H;
CacheDebugging = FALSE;
VAR
error- : BOOLEAN;
TYPE
CacheElement = OBJECT
VAR
next : CacheElement;
key : Texts.Char32;
sValue : ARRAY 256 OF CHAR;
cValue : Texts.Char32;
PROCEDURE &Init*(key : Texts.Char32; CONST sValue : ARRAY OF CHAR; cValue : Texts.Char32);
BEGIN
SELF.key := key;
IF sValue[0] # CHR(0H) THEN
Strings.Copy(sValue,0,LEN(sValue),SELF.sValue);
SELF.cValue := -1;
ELSE
SELF.sValue[0] := CHR(0H);
SELF.cValue := cValue;
END;
END Init;
END CacheElement;
CharacterPropertyCache = OBJECT
VAR
internalCache : POINTER TO ARRAY OF CacheElement;
cacheSize : LONGINT;
PROCEDURE &Init*(size : LONGINT);
BEGIN
cacheSize := size;
NEW(internalCache,cacheSize);
END Init;
PROCEDURE SLookup(char : Texts.Char32; VAR res : ARRAY OF CHAR);
VAR
bucket : LONGINT;
currentElement : CacheElement;
BEGIN
bucket := char MOD cacheSize;
currentElement := internalCache[bucket];
WHILE currentElement # NIL DO
IF currentElement.key = char THEN
IF CacheDebugging THEN
KernelLog.String("found: "); KernelLog.Hex(currentElement.key,4);
KernelLog.String(" ("); KernelLog.String(currentElement.sValue);
KernelLog.String(")"); KernelLog.Ln;
END;
Strings.Copy(currentElement.sValue,0,LEN(res),res);
RETURN;
ELSE
currentElement := currentElement.next;
END;
END;
res := "";
END SLookup;
PROCEDURE CLookup(char : Texts.Char32) : Texts.Char32;
VAR
bucket : LONGINT;
currentElement : CacheElement;
BEGIN
bucket := char MOD cacheSize;
currentElement := internalCache[bucket];
WHILE currentElement # NIL DO
IF currentElement.key = char THEN
IF CacheDebugging THEN
KernelLog.String("found: "); KernelLog.Hex(currentElement.key,4);
KernelLog.String(" ("); KernelLog.Hex(currentElement.cValue,4);
KernelLog.String(")"); KernelLog.Ln;
END;
RETURN currentElement.cValue;
ELSE
currentElement := currentElement.next;
END;
END;
RETURN -1
END CLookup;
PROCEDURE SInsert(char : Texts.Char32; CONST value : ARRAY OF CHAR);
VAR
newElement : CacheElement;
bucket : LONGINT;
BEGIN
NEW(newElement,char,value,-1);
bucket := char MOD cacheSize;
newElement.next := internalCache[bucket];
internalCache[bucket] := newElement;
IF CacheDebugging THEN
KernelLog.String("inserted: "); KernelLog.Hex(char,4);
KernelLog.String(" (");
KernelLog.String(value);
KernelLog.String(")"); KernelLog.Ln;
END;
END SInsert;
PROCEDURE CInsert(char : Texts.Char32; value : Texts.Char32);
VAR
newElement: CacheElement;
bucket : LONGINT;
dummy : ARRAY 1 OF CHAR;
BEGIN
dummy[0] := CHR(0H);
NEW(newElement,char,dummy,value);
bucket := char MOD cacheSize;
newElement.next := internalCache[bucket];
internalCache[bucket] := newElement;
IF CacheDebugging THEN
KernelLog.String("inserted: "); KernelLog.Hex(char,4);
KernelLog.String(" ("); KernelLog.Hex(value,4);
KernelLog.String(")"); KernelLog.Ln;
END;
END CInsert;
PROCEDURE Print;
VAR
i : LONGINT;
thisElement : CacheElement;
BEGIN
FOR i := 0 TO cacheSize - 1 DO
thisElement := internalCache[i];
KernelLog.Int(i,3); KernelLog.String(": ");
WHILE thisElement # NIL DO
KernelLog.Int(thisElement.key,4); KernelLog.String(" (");
IF thisElement.cValue = -1 THEN KernelLog.String(thisElement.sValue) END;
KernelLog.String(") -> ");
thisElement := thisElement.next;
END;
KernelLog.Ln;
END;
END Print;
END CharacterPropertyCache;
TxtReader = OBJECT
VAR
filename : ARRAY 256 OF CHAR;
text : Texts.Text;
textReader : Texts.TextReader;
startPos : LONGINT;
decoder : Codecs.TextDecoder;
msg : ARRAY 512 OF CHAR;
fullname : ARRAY 256 OF CHAR;
file : Files.File;
in: Streams.Reader;
decoderRes : LONGINT;
PROCEDURE LoadTxtFile;
BEGIN
error := FALSE;
COPY(filename, fullname);
file := Files.Old(filename);
IF (file # NIL) THEN
file.GetName(fullname);
ELSE
file := Files.New(filename);
IF (file # NIL) THEN
file.GetName(fullname);
file := NIL;
END;
END;
IF (file # NIL) THEN
decoder := Codecs.GetTextDecoder("ISO8859-1");
IF (decoder # NIL) THEN
in := Codecs.OpenInputStream(fullname);
IF in # NIL THEN
decoder.Open(in, decoderRes);
IF decoderRes = 0 THEN
text := decoder.GetText();
NEW(textReader,text);
END;
ELSE
msg := "Can't open input stream on file "; Strings.Append(msg, fullname);
KernelLog.String(msg);
error := TRUE;
END;
ELSE
msg := "No decoder for file "; Strings.Append(msg, fullname);
Strings.Append(msg, " (Format: "); Strings.Append(msg, "ISO8859-1"); Strings.Append(msg, ")");
KernelLog.String(msg);
error := TRUE;
END;
ELSE
msg := "file '"; Strings.Append(msg, fullname); Strings.Append(msg,"' not found.");
KernelLog.String(msg);
error := TRUE;
END;
FindStartPos;
END LoadTxtFile;
PROCEDURE FindStartPos;
BEGIN
HALT (999);
END FindStartPos;
PROCEDURE NextLine;
VAR
thisChar : Texts.Char32;
BEGIN
IF textReader = NIL THEN RETURN END;
REPEAT
textReader.ReadCh(thisChar);
UNTIL ((thisChar = LF) OR (thisChar = CR));
END NextLine;
END TxtReader;
TYPE
UnicodeTxtReader*=OBJECT(TxtReader)
VAR
charTypeCache, mirrorPropCache : CharacterPropertyCache;
PROCEDURE &Init*;
BEGIN
filename := "UnicodeData.txt";
LoadTxtFile;
NEW(charTypeCache,256);
NEW(mirrorPropCache,256);
END Init;
PROCEDURE FindStartPos;
BEGIN
startPos := 0;
END FindStartPos;
PROCEDURE GetBidiCharacterType*(char : Texts.Char32; VAR res : Strings.String);
VAR
tempRes : ARRAY 16 OF CHAR;
BEGIN
charTypeCache.SLookup(char,tempRes);
IF tempRes = "" THEN
GetProperty(char,4,res^);
IF res^ = "" THEN
res^ := "L";
KernelLog.String("no character type has been found. Using 'L'"); KernelLog.Ln;
END;
charTypeCache.SInsert(char,res^);
ELSE
Strings.Copy(tempRes,0,LEN(tempRes),res^);
END;
END GetBidiCharacterType;
PROCEDURE IsMirroredChar*(char : Texts.Char32) : BOOLEAN;
VAR
res : ARRAY 16 OF CHAR;
BEGIN
mirrorPropCache.SLookup(char,res);
IF res = "" THEN
GetProperty(char,9,res);
mirrorPropCache.SInsert(char,res);
END;
RETURN res = "Y";
END IsMirroredChar;
PROCEDURE IsWhiteSpaceChar*(char : Texts.Char32) : BOOLEAN;
VAR
res : ARRAY 16 OF CHAR;
BEGIN
charTypeCache.SLookup(char,res);
IF res = "" THEN
GetProperty(char,4,res);
charTypeCache.SInsert(char,res);
END;
RETURN res = "WS";
END IsWhiteSpaceChar;
PROCEDURE GetProperty*(char : Texts.Char32; pos : LONGINT; VAR res : ARRAY OF CHAR);
VAR
thisChar, thisInt : Texts.Char32;
i,j : INTEGER;
dummyVal : LONGINT;
BEGIN
text.AcquireRead;
textReader.SetPosition(startPos);
LOOP
i := 0;
LOOP
j := 0;
REPEAT
textReader.ReadCh(thisChar);
IF (j = 0) & ((thisChar = EOT) OR (thisChar = NUL)) THEN
res[j] := CHR(0H);
text.ReleaseRead;
RETURN;
END;
IF (i = pos) OR (i = 0) THEN
res[j] := CHR(thisChar);
END;
INC(j);
UNTIL (thisChar = ORD(';')) OR (thisChar = CR) OR (thisChar = LF);
IF (i = pos) THEN
res[j-1] := CHR(0H);
text.ReleaseRead;
RETURN;
ELSIF (i = 0) THEN
res[j-1] := CHR(0H);
Strings.HexStrToInt(res,thisInt, dummyVal);
IF (thisInt < char) THEN
EXIT;
ELSIF (thisInt > char) THEN
res[0] := CHR(0H);
text.ReleaseRead;
RETURN;
END;
ELSIF (i > pos) THEN
res[0] := CHR(0H);
text.ReleaseRead;
RETURN;
END;
IF (thisChar = CR) OR (thisChar = LF) THEN
EXIT;
END;
INC(i);
END;
NextLine;
END;
text.ReleaseRead;
END GetProperty;
PROCEDURE PrintCharTypeCache*;
BEGIN
charTypeCache.Print;
END PrintCharTypeCache;
END UnicodeTxtReader;
TYPE
BidiMirroringTxtReader*=OBJECT(TxtReader)
VAR
mirrorCache : CharacterPropertyCache;
PROCEDURE &Init*;
BEGIN
filename := "BidiMirroring.txt";
LoadTxtFile;
NEW(mirrorCache,256);
END Init;
PROCEDURE FindStartPos;
VAR
thisChar : LONGINT;
BEGIN
thisChar := 0;
text.AcquireRead;
textReader.ReadCh(thisChar);
WHILE (thisChar = ORD('#')) DO
NextLine;
textReader.ReadCh(thisChar);
END;
startPos := textReader.GetPosition();
text.ReleaseRead;
END FindStartPos;
PROCEDURE GetSourceChar() : Texts.Char32;
VAR
sourceString : ARRAY 7 OF CHAR;
sourceInt, tempChar : Texts.Char32;
i : INTEGER;
res : LONGINT;
BEGIN
sourceInt := -1;
i := -1;
REPEAT
INC(i);
textReader.ReadCh(tempChar);
sourceString[i] := CHR(tempChar);
UNTIL (tempChar = EOT) OR (tempChar = ORD('#')) OR (tempChar = ORD(';'));
IF (tempChar = ORD(';')) THEN
sourceString[i] := CHR(0H);
Strings.HexStrToInt(sourceString,sourceInt,res);
END;
RETURN sourceInt;
END GetSourceChar;
PROCEDURE GetTargetChar() : Texts.Char32;
VAR
targetString : ARRAY 7 OF CHAR;
targetInt, tempChar : Texts.Char32;
i : INTEGER;
res : LONGINT;
BEGIN
targetInt := -1;
i := -1;
textReader.ReadCh(tempChar);
REPEAT
INC(i);
textReader.ReadCh(tempChar);
targetString[i] := CHR(tempChar);
UNTIL (tempChar = EOT) OR (tempChar = ORD('#')) OR (tempChar = SP);
targetString[i] := CHR(0H);
Strings.HexStrToInt(targetString,targetInt,res);
RETURN targetInt;
END GetTargetChar;
PROCEDURE GetMirroredChar*(char : Texts.Char32) : Texts.Char32;
VAR
sChar : Texts.Char32;
BEGIN
sChar := mirrorCache.CLookup(char);
IF sChar = -1 THEN
text.AcquireRead;
textReader.SetPosition(startPos);
REPEAT
sChar := GetSourceChar();
IF (sChar # char) THEN
NextLine;
END;
UNTIL (sChar = char) OR (sChar = -1);
IF (sChar = -1) THEN
text.ReleaseRead;
RETURN 0;
ELSE
sChar := GetTargetChar();
mirrorCache.CInsert(char,sChar);
text.ReleaseRead;
RETURN sChar;
END;
ELSE
RETURN sChar;
END;
END GetMirroredChar;
END BidiMirroringTxtReader;
END UnicodeProperties.
SystemTools.Free UnicodeProperties ~
UnicodeProperties.TestIsMirroredChar 00000028H ~
PC0.Compile UnicodeProperties.Mod ~