MODULE UnicodeBidirectionality;
IMPORT
Codecs, Files, Streams, KernelLog, Texts, Commands, UnicodeProperties, Strings;
CONST
DOSNeutral = 0;
DOSRightToLeft = 1;
DOSLeftToRight = 2;
NeutralType = 0;
EuropeanNumber = 1;
ArabicNumber = 2;
LeftStrongType = 3;
CharacterDebugging = FALSE;
CacheDebugging = FALSE;
WeakTypes1* = 0;
WeakTypes2* = 1;
EuropeanNumberAdj* = 2;
ArabicNumberAdj* = 3;
NeutralTypes* = 4;
SameDirection* = 5;
initialCachePoolSize = 10000;
initialCacheElementSize = 1000;
TYPE
IntegerArray = POINTER TO ARRAY OF LONGINT;
CharArray = POINTER TO ARRAY OF Texts.Char32;
TextReaderArray = POINTER TO ARRAY OF Texts.TextReader;
BoolArray = POINTER TO ARRAY OF BOOLEAN;
PosArray = POINTER TO RECORD
array : IntegerArray;
size : LONGINT;
next : PosArray;
END;
PosArrays = POINTER TO ARRAY OF PosArray;
IntegerStack = OBJECT
VAR
top : INTEGER;
internalStack : IntegerArray;
internalStackSize : LONGINT;
PROCEDURE &Init*(size : LONGINT);
BEGIN
NEW(internalStack,size);
top := 0;
internalStackSize := size;
END Init;
PROCEDURE Push(i : LONGINT);
VAR
tempStack : IntegerArray;
j : LONGINT;
BEGIN
IF (top >= internalStackSize) THEN
internalStackSize := ENTIER(top*1.5);
NEW(tempStack,internalStackSize);
FOR j := 0 TO top - 1 DO
tempStack[j] := internalStack[j];
END;
internalStack := tempStack;
END;
internalStack[top] := i;
INC(top);
END Push;
PROCEDURE Pop() : LONGINT;
BEGIN
IF (top = 0) THEN
RETURN -1;
ELSE
DEC(top);
RETURN internalStack[top];
END;
END Pop;
PROCEDURE Top() : LONGINT;
BEGIN
IF (top = 0) THEN
RETURN -1;
ELSE
RETURN internalStack[top-1];
END;
END Top;
PROCEDURE Purge;
BEGIN
top := 0;
END Purge;
PROCEDURE Size() : INTEGER;
BEGIN
RETURN top;
END Size;
END IntegerStack;
PosArrayPool = RECORD
first, last : PosArray;
END;
StringElement = RECORD
element : Strings.String;
used : BOOLEAN;
END;
IntegerStackElement = RECORD
element : IntegerStack;
used : BOOLEAN;
END;
UStringElement = RECORD
element : Texts.PUCS32String;
used : BOOLEAN;
END;
ArrayMemoryManager = OBJECT
VAR
posArrayPool : PosArrayPool;
stringPool : ARRAY initialCachePoolSize OF StringElement;
integerStackPool : ARRAY 10 OF IntegerStackElement;
uStringPool : ARRAY 10 OF UStringElement;
PROCEDURE &Init*;
VAR
i : LONGINT;
newPosArray: PosArray;
BEGIN
NEW(posArrayPool.last);
NEW(posArrayPool.last.array,initialCacheElementSize);
posArrayPool.last.size := 0;
posArrayPool.first := posArrayPool.last;
FOR i := 1 TO initialCachePoolSize - 1 DO
NEW(newPosArray);
NEW(newPosArray.array,initialCacheElementSize);
newPosArray.size := 0;
posArrayPool.last.next := newPosArray;
posArrayPool.last := newPosArray;
END;
FOR i:= 0 TO initialCachePoolSize - 1 DO
NEW(stringPool[i].element,16);
END;
FOR i := 0 TO 9 DO
NEW(integerStackPool[i].element,100);
NEW(uStringPool[i].element,2);
END;
END Init;
PROCEDURE NewTextReaderArray(VAR trArray : TextReaderArray; size : LONGINT);
BEGIN
IF trArray = NIL THEN
NEW(trArray,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(trArray) < size THEN
NEW(trArray,2*size);
END;
END NewTextReaderArray;
PROCEDURE NewIntegerStack(VAR stack : IntegerStack; size : LONGINT);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO 9 DO
IF ~integerStackPool[i].used THEN
IF integerStackPool[i].element.internalStackSize < size THEN
NEW(integerStackPool[i].element,size);
END;
stack := integerStackPool[i].element;
integerStackPool[i].used := TRUE;
RETURN;
END;
END;
NEW(stack,size);
END NewIntegerStack;
PROCEDURE FreeIntegerStack(stack : IntegerStack);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO 9 DO
IF stack = integerStackPool[i].element THEN
integerStackPool[i].used := FALSE;
RETURN;
END;
END;
END FreeIntegerStack;
PROCEDURE NewBoolArray(VAR bArray : BoolArray; size : LONGINT);
BEGIN
IF bArray = NIL THEN
NEW(bArray,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(bArray) < size THEN
NEW(bArray,2*size);
END;
END NewBoolArray;
PROCEDURE NewPosArrays(VAR pArrays : PosArrays; size : LONGINT);
BEGIN
IF pArrays = NIL THEN
NEW(pArrays,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(pArrays) < size THEN
NEW(pArrays,2*size);
END;
END NewPosArrays;
PROCEDURE NewPosArray(VAR pArray : PosArray; size : LONGINT);
VAR
thisPosArray, lastPosArray : PosArray;
BEGIN
IF (pArray # NIL) & (pArray # NIL) & (LEN(pArray.array) >= size) THEN
pArray.size := size;
RETURN;
END;
IF posArrayPool.first = NIL THEN
NEW(pArray);
NEW(pArray.array,Strings.Max(initialCacheElementSize,2*size));
pArray.size := size;
RETURN;
END;
thisPosArray := posArrayPool.first;
IF LEN(thisPosArray.array) >= size THEN
pArray := thisPosArray;
pArray.size := size;
posArrayPool.first := pArray.next;
pArray.next := NIL;
RETURN;
END;
lastPosArray := thisPosArray;
thisPosArray := thisPosArray.next;
WHILE thisPosArray # NIL DO
IF LEN(thisPosArray.array) >= size THEN
pArray := thisPosArray;
pArray.size := size;
lastPosArray.next := thisPosArray.next;
pArray.next := NIL;
RETURN;
END;
lastPosArray := thisPosArray;
thisPosArray := thisPosArray.next;
END;
pArray := posArrayPool.first;
NEW(pArray.array,Strings.Max(initialCacheElementSize,2*size));
pArray.size := size;
posArrayPool.first := pArray.next;
pArray.next := NIL;
END NewPosArray;
PROCEDURE NewIntegerArray(VAR iArray : IntegerArray; size : LONGINT);
BEGIN
IF iArray = NIL THEN
NEW(iArray,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(iArray) < size THEN
NEW(iArray,2*size);
END;
END NewIntegerArray;
PROCEDURE NewStringArray(VAR sArray : Strings.StringArray; size : LONGINT);
BEGIN
IF sArray = NIL THEN
NEW(sArray,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(sArray) < size THEN
NEW(sArray,2*size);
END;
END NewStringArray;
PROCEDURE NewCharArray(VAR cArray : CharArray; size : LONGINT);
BEGIN
IF cArray = NIL THEN
NEW(cArray,Strings.Max(initialCacheElementSize,2*size));
ELSIF LEN(cArray) < size THEN
NEW(cArray,2*size);
END;
END NewCharArray;
PROCEDURE NewCharacterType(VAR charType : Strings.String);
BEGIN
IF charType = NIL THEN
NEW(charType,16);
END;
END NewCharacterType;
PROCEDURE NewString(VAR string : Strings.String);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO initialCachePoolSize - 1 DO
IF ~stringPool[i].used THEN
string := stringPool[i].element;
stringPool[i].used := TRUE;
RETURN;
END;
END;
NEW(string,256);
END NewString;
PROCEDURE FreeString(string : Strings.String);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO initialCachePoolSize - 1 DO
IF string = stringPool[i].element THEN
stringPool[i].used := FALSE;
RETURN;
END;
END;
END FreeString;
PROCEDURE NewUString(VAR string : Texts.PUCS32String);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO 9 DO
IF ~uStringPool[i].used THEN
string := uStringPool[i].element;
uStringPool[i].used := TRUE;
RETURN;
END;
END;
NEW(string,2);
END NewUString;
PROCEDURE FreeUString(string : Texts.PUCS32String);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO 9 DO
IF string = uStringPool[i].element THEN
uStringPool[i].used := FALSE;
RETURN;
END;
END;
END FreeUString;
END ArrayMemoryManager;
LineElement = OBJECT
VAR
next : LineElement;
id : LONGINT;
pos : LONGINT;
lineLength : LONGINT;
paragraphEmbeddingLevel : LONGINT;
textReaders : TextReaderArray;
dirty : BoolArray;
posArrays, reversePosArrays : PosArrays;
characterEmbeddingLevels : IntegerArray;
originalCharacterTypes : Strings.StringArray;
characterTypes : Strings.StringArray;
characters : CharArray;
PROCEDURE &Init*;
BEGIN
next := NIL;
id := -1;
pos := -1;
lineLength := -1;
paragraphEmbeddingLevel := -1;
END Init;
PROCEDURE Clear;
BEGIN
lineLength := 0;
paragraphEmbeddingLevel := -1;
textReaders := NIL;
dirty := NIL;
posArrays := NIL;
reversePosArrays := NIL;
characterEmbeddingLevels := NIL;
originalCharacterTypes := NIL;
characterTypes := NIL;
characters := NIL;
END Clear;
END LineElement;
LineCache = OBJECT
VAR
first : LineElement;
PROCEDURE &Init*;
BEGIN
first := NIL;
END Init;
PROCEDURE InsertLine(pos : LONGINT);
VAR
thisElement, lastElement, newElement : LineElement;
BEGIN
thisElement := first;
WHILE (thisElement # NIL) & (thisElement.pos < pos) DO
lastElement := thisElement;
thisElement := thisElement.next;
END;
IF thisElement = NIL THEN
NEW(newElement);
newElement.pos := pos;
IF first = NIL THEN
first := newElement;
ELSE
lastElement.next := newElement;
END;
ELSIF thisElement.pos = pos THEN
ELSE
NEW(newElement);
newElement.pos := pos;
newElement.next := thisElement;
lastElement.next := newElement;
END;
IF CacheDebugging THEN
KernelLog.Ln; KernelLog.String("line inserted (");
KernelLog.Int(pos,4); KernelLog.String(")"); KernelLog.Ln;
END;
END InsertLine;
PROCEDURE GetNextPos(pos : LONGINT) : LONGINT;
VAR
thisElement : LineElement;
BEGIN
thisElement := first;
FindElement(pos,thisElement);
IF (thisElement = NIL) OR (thisElement.pos > pos) THEN
RETURN -1;
ELSE
IF thisElement.next # NIL THEN
RETURN thisElement.next.pos;
ELSE
RETURN thisElement.pos;
END;
END;
END GetNextPos;
PROCEDURE RemoveLine(pos : LONGINT);
VAR
thisElement, lastElement : LineElement;
BEGIN
thisElement := first;
WHILE (thisElement # NIL) & (thisElement.pos < pos) DO
lastElement := thisElement;
thisElement := thisElement.next;
END;
IF (thisElement # NIL) & (thisElement.pos = pos) THEN
IF thisElement = first THEN
first := thisElement.next;
ELSE
lastElement.next := thisElement.next;
END;
END;
IF CacheDebugging THEN
KernelLog.Ln; KernelLog.String("line removed(");
KernelLog.Int(pos,4); KernelLog.String(")"); KernelLog.Ln;
END;
END RemoveLine;
PROCEDURE ChangePos(startPos, changeValue : LONGINT);
VAR
thisElement : LineElement;
BEGIN
thisElement := first;
FindElement(startPos,thisElement);
thisElement := thisElement.next;
WHILE thisElement # NIL DO
INC(thisElement.pos,changeValue);
thisElement := thisElement.next;
END;
END ChangePos;
PROCEDURE FindElement(pos : LONGINT; VAR thisElement : LineElement);
VAR
nextElement : LineElement;
BEGIN
nextElement := first;
WHILE (nextElement # NIL) & (nextElement.pos <= pos) DO
thisElement := nextElement;
nextElement := nextElement.next;
END;
END FindElement;
PROCEDURE PrintCache;
VAR
thisElement : LineElement;
i : INTEGER;
BEGIN
thisElement := first;
i := 0;
KernelLog.Ln;
KernelLog.String("pos pel last len"); KernelLog.Ln;
KernelLog.String("===================="); KernelLog.Ln;
WHILE thisElement # NIL DO
KernelLog.Int(thisElement.pos,4); KernelLog.String(": ");
KernelLog.Int(thisElement.paragraphEmbeddingLevel,2); KernelLog.String(" ");
IF thisElement.characters # NIL THEN
KernelLog.Hex(thisElement.characters[LEN(thisElement.characters)-1],4);
ELSE
KernelLog.String("-:-:-");
END;
KernelLog.Int(thisElement.lineLength,4);
KernelLog.Ln;
thisElement := thisElement.next;
INC(i);
END;
KernelLog.String("cache size: "); KernelLog.Int(i,3); KernelLog.String(" lines"); KernelLog.Ln;
KernelLog.Ln;
END PrintCache;
END LineCache;
SegmentCache = OBJECT
VAR
first, last : LineElement;
nextId : LONGINT;
PROCEDURE &Init*;
BEGIN
first := NIL;
last := NIL;
nextId := 0;
END Init;
PROCEDURE InsertSegment(start, end : LONGINT) : LONGINT;
VAR
thisElement, newElement : LineElement;
BEGIN
NEW(newElement);
newElement.pos := start;
newElement.lineLength := end - start + 1;
newElement.id := nextId;
INC(nextId);
IF first = NIL THEN
first := newElement;
last := newElement;
RETURN nextId;
END;
thisElement := first;
WHILE thisElement # last DO
thisElement := thisElement.next;
END;
thisElement.next := newElement;
last := newElement;
RETURN nextId;
END InsertSegment;
PROCEDURE RemoveSegment(id : LONGINT);
VAR
thisElement : LineElement;
BEGIN
IF first = NIL THEN
RETURN;
ELSIF first.id = id THEN
IF first = last THEN
first := NIL;
last := NIL;
RETURN;
ELSE
first := first.next;
RETURN;
END;
END;
thisElement := first;
WHILE thisElement.next # last DO
IF thisElement.next.id = id THEN
thisElement.next := thisElement.next.next;
RETURN;
END;
thisElement := thisElement.next;
END;
IF last.id = id THEN
last := thisElement;
END;
END RemoveSegment;
PROCEDURE ChangePos(id, changeValue : LONGINT);
VAR
thisElement : LineElement;
BEGIN
thisElement := first;
WHILE thisElement # last DO
IF thisElement.id = id THEN
INC(thisElement.pos,changeValue);
RETURN;
END;
END;
IF last.id = id THEN
INC(thisElement.pos,changeValue);
END;
END ChangePos;
PROCEDURE FindElement(id : LONGINT; VAR thisElement : LineElement);
BEGIN
thisElement := first;
WHILE thisElement # last DO
IF thisElement.id = id THEN
RETURN;
END;
thisElement := thisElement.next;
END;
IF last.id = id THEN
RETURN;
END;
thisElement := NIL;
END FindElement;
PROCEDURE PrintCache;
VAR
thisElement : LineElement;
i : INTEGER;
BEGIN
thisElement := first;
i := 0;
KernelLog.Ln;
KernelLog.String("id pel last pos len"); KernelLog.Ln;
KernelLog.String("===================="); KernelLog.Ln;
WHILE thisElement # NIL DO
KernelLog.Int(thisElement.id,4); KernelLog.String(": ");
KernelLog.Int(thisElement.pos,4); KernelLog.String(" ");
KernelLog.Int(thisElement.paragraphEmbeddingLevel,2); KernelLog.String(" ");
IF thisElement.characters # NIL THEN
KernelLog.Hex(thisElement.characters[LEN(thisElement.characters)-1],4);
ELSE
KernelLog.String("-:-:-");
END;
KernelLog.Int(thisElement.lineLength,4);
KernelLog.Ln;
thisElement := thisElement.next;
INC(i);
END;
KernelLog.String("cache size: "); KernelLog.Int(i,3); KernelLog.String(" lines"); KernelLog.Ln;
KernelLog.Ln;
END PrintCache;
END SegmentCache;
BidiFormatter*=OBJECT
VAR
amm : ArrayMemoryManager;
textReader : Texts.TextReader;
unicodePropertyReader: UnicodeProperties.UnicodeTxtReader;
mirrorPropertyReader: UnicodeProperties.BidiMirroringTxtReader;
reformatted : BOOLEAN;
lineCache: LineCache;
segmentCache : SegmentCache;
trueTextLength : LONGINT;
HL1* : HigherLevelProtocol1;
HL2* : HigherLevelProtocol2;
HL3* : HigherLevelProtocol3;
HL4* : BOOLEAN;
HL5* : HigherLevelProtocol5;
HL6* : HigherLevelProtocol6;
PROCEDURE &Init*(text : Texts.Text);
BEGIN
NEW(amm);
NEW(textReader,text);
trueTextLength := text.GetLength();
textReader.SetDirection(1);
NEW(unicodePropertyReader);
NEW(mirrorPropertyReader);
reformatted := FALSE;
HL4 := FALSE;
END Init;
PROCEDURE ReformatText*;
VAR
thisPos, nextPos : LONGINT;
char : Texts.Char32;
BEGIN
IF ~textReader.text.isUTF THEN
RETURN;
END;
trueTextLength := textReader.text.GetLength();
IF CharacterDebugging THEN KernelLog.String("reformatting text..."); KernelLog.Ln; END;
nextPos := 0;
NEW(lineCache);
IF HL4 THEN
NEW(segmentCache);
END;
REPEAT
thisPos := nextPos;
lineCache.InsertLine(thisPos);
FindEndOfParagraph(thisPos,nextPos);
ReformatParagraph(thisPos);
UNTIL (nextPos >= textReader.text.GetLength()) OR (thisPos = nextPos);
textReader.SetDirection(1);
textReader.SetPosition(textReader.text.GetLength()-1);
textReader.ReadCh(char);
IF char = UnicodeProperties.LF THEN
lineCache.InsertLine(textReader.text.GetLength());
END;
IF CacheDebugging THEN
KernelLog.Ln;
unicodePropertyReader.PrintCharTypeCache;
END;
IF CharacterDebugging THEN
KernelLog.Ln; KernelLog.String("...reformatting done! Text length: ");
KernelLog.Int(textReader.text.GetLength(),4); KernelLog.Ln;
END;
IF CacheDebugging THEN
lineCache.PrintCache;
IF segmentCache # NIL THEN
segmentCache.PrintCache;
END;
END;
reformatted := TRUE;
END ReformatText;
PROCEDURE ReformatTextFrom*(pos, changed : LONGINT);
VAR
char : Texts.Char32;
BEGIN
IF ~textReader.text.isUTF THEN
RETURN;
END;
IF trueTextLength = textReader.text.GetLength() THEN
changed := 0;
ELSE
lineCache.ChangePos(pos, changed);
trueTextLength := textReader.text.GetLength();
END;
IF CharacterDebugging THEN
KernelLog.String("reformatting text at position "); KernelLog.Int(pos,4);
KernelLog.String("..."); KernelLog.Ln;
END;
textReader.SetDirection(1);
textReader.SetPosition(pos);
textReader.ReadCh(char);
IF (changed > 0) & (char = UnicodeProperties.LF) THEN
lineCache.InsertLine(pos+1);
ReformatParagraph(pos);
ReformatParagraph(pos+1);
ELSIF changed < 0 THEN
SweepCache;
ReformatParagraph(pos);
ELSE
ReformatParagraph(pos);
END;
IF CharacterDebugging THEN
KernelLog.Ln; KernelLog.String("...reformatting done! Text length: ");
KernelLog.Int(textReader.text.GetLength(),4); KernelLog.Ln;
END;
IF CacheDebugging THEN
lineCache.PrintCache;
IF segmentCache # NIL THEN
segmentCache.PrintCache;
END;
END;
END ReformatTextFrom;
PROCEDURE ReformatParagraph(pos : LONGINT);
VAR
cacheElement : LineElement;
start, end : LONGINT;
BEGIN
lineCache.FindElement(pos,cacheElement);
FindStartOfParagraph(pos,start);
FindEndOfParagraph(pos,end);
IF CharacterDebugging THEN
KernelLog.Ln; KernelLog.Ln;
KernelLog.String("paragraph ("); KernelLog.Int(start,3); KernelLog.String(",");
KernelLog.Int(end-1,3); KernelLog.String(") found.");
END;
ReformatClause(start,end,cacheElement);
END ReformatParagraph;
PROCEDURE ReformatSegment*(start, end : LONGINT) : LONGINT;
VAR
thisId : LONGINT;
cacheElement : LineElement;
BEGIN
IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
RETURN -1;
END;
thisId := segmentCache.InsertSegment(start,end);
segmentCache.FindElement(thisId,cacheElement);
ReformatClause(start,end+1,cacheElement);
RETURN thisId;
END ReformatSegment;
PROCEDURE RemoveSegment*(id : LONGINT);
BEGIN
IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
RETURN;
END;
segmentCache.RemoveSegment(id);
END RemoveSegment;
PROCEDURE ChangeSegmentPos*(id, changeValue : LONGINT);
BEGIN
IF ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
RETURN;
END;
segmentCache.ChangePos(id,changeValue);
END ChangeSegmentPos;
PROCEDURE ReformatClause(start, end : LONGINT; VAR cacheElement : LineElement);
VAR
charType : Strings.String;
i : LONGINT;
BEGIN
amm.NewString(charType);
IF HL1 = NIL THEN
FindFirstStrongCharacter(start,end,charType);
IF charType = NIL THEN
IF CharacterDebugging THEN KernelLog.String(" (Empty paragraph)"); END;
cacheElement.Clear;
amm.FreeString(charType);
RETURN;
ELSIF charType^ = "L" THEN
cacheElement.paragraphEmbeddingLevel := 0;
ELSE
cacheElement.paragraphEmbeddingLevel := 1;
END;
ELSE
cacheElement.paragraphEmbeddingLevel := HL1(textReader,start,end);
END;
amm.FreeString(charType);
IF CharacterDebugging THEN
KernelLog.String(" Embedding Level: "); KernelLog.Int(cacheElement.paragraphEmbeddingLevel,2); KernelLog.Ln;
PrintOriginalTypedParagraph(start,end);
PrintCodedParagraph(start,end);
KernelLog.Ln; KernelLog.String("Applying explicit levels...");
END;
ApplyExplicitLevels(start,end,cacheElement);
IF CharacterDebugging THEN
PrintCurrentTypedParagraph(start,end);
PrintLeveledParagraph(start,end);
KernelLog.Ln; KernelLog.String("Resolving weak types...");
END;
ResolveWeakTypes(cacheElement);
IF CharacterDebugging THEN
PrintCurrentTypedParagraph(start,end);
KernelLog.Ln; KernelLog.String("Resolving neutral types...");
END;
ResolveNeutralTypes(cacheElement);
IF CharacterDebugging THEN
PrintCurrentTypedParagraph(start,end);
KernelLog.Ln; KernelLog.String("Resolving implicit levels...");
END;
ResolveImplicitLevels(cacheElement);
IF CharacterDebugging THEN
PrintLeveledParagraph(start,end);
END;
FOR i := 0 TO LEN(cacheElement.dirty) - 1 DO
cacheElement.dirty[i] := TRUE;
END;
END ReformatClause;
PROCEDURE ApplyExplicitLevels(start, end : LONGINT; VAR cacheElement : LineElement);
VAR
embeddingLevels : IntegerStack;
dummyEmbeddingLevel : LONGINT;
overrideStati : IntegerStack;
dummyOverrideStatus : LONGINT;
char : Texts.Char32;
charType : Strings.String;
nextLevel : LONGINT;
i : INTEGER;
lineLength : LONGINT;
surplusLevels : LONGINT;
hlCharacterType : Strings.String;
hlEmbeddingLevel : LONGINT;
BEGIN
char := 0H;
nextLevel := 0;
i := 0;
surplusLevels := 0;
lineLength := end-start;
amm.NewIntegerArray(cacheElement.characterEmbeddingLevels,lineLength);
amm.NewStringArray(cacheElement.characterTypes,lineLength);
amm.NewStringArray(cacheElement.originalCharacterTypes,lineLength);
amm.NewCharArray(cacheElement.characters,lineLength);
amm.NewIntegerStack(embeddingLevels, 62);
amm.NewIntegerStack(overrideStati,2*lineLength);
embeddingLevels.Push(cacheElement.paragraphEmbeddingLevel);
overrideStati.Push(DOSNeutral);
textReader.SetDirection(1);
textReader.SetPosition(start);
amm.NewString(charType);
amm.NewString(hlCharacterType);
WHILE (textReader.GetPosition() < end) DO
textReader.ReadCh(char);
unicodePropertyReader.GetBidiCharacterType(char,charType);
IF (HL3 = NIL) OR
~HL3(cacheElement.characters,cacheElement.characterEmbeddingLevels,cacheElement.originalCharacterTypes,cacheElement.characterTypes,
char,charType,hlCharacterType,hlEmbeddingLevel) THEN
IF charType^ = "RLE" THEN
GetNextOddEmbeddingLevel(embeddingLevels.Top(), nextLevel);
IF (embeddingLevels.Top() # nextLevel) THEN
embeddingLevels.Push(nextLevel);
overrideStati.Push(DOSNeutral);
ELSE
INC(surplusLevels);
END;
cacheElement.characterTypes[i] := Strings.NewString("BN");
ELSIF charType^ = "LRE" THEN
GetNextEvenEmbeddingLevel(embeddingLevels.Top(), nextLevel);
IF (embeddingLevels.Top() # nextLevel) THEN
embeddingLevels.Push(nextLevel);
overrideStati.Push(DOSNeutral);
ELSE
INC(surplusLevels);
END;
cacheElement.characterTypes[i] := Strings.NewString("BN");
ELSIF charType^ = "RLO" THEN
GetNextOddEmbeddingLevel(embeddingLevels.Top(), nextLevel);
IF (embeddingLevels.Top() # nextLevel) THEN
embeddingLevels.Push(nextLevel);
overrideStati.Push(DOSRightToLeft);
ELSE
INC(surplusLevels);
END;
cacheElement.characterTypes[i] := Strings.NewString("BN");
ELSIF charType^ = "LRO" THEN
GetNextEvenEmbeddingLevel(embeddingLevels.Top(), nextLevel);
IF (embeddingLevels.Top() # nextLevel) THEN
embeddingLevels.Push(nextLevel);
overrideStati.Push(DOSLeftToRight);
ELSE
INC(surplusLevels);
END;
cacheElement.characterTypes[i] := Strings.NewString("BN");
ELSIF charType^ = "PDF" THEN
IF surplusLevels > 0 THEN
DEC(surplusLevels);
ELSE
dummyEmbeddingLevel := embeddingLevels.Pop();
IF (embeddingLevels.Size() < 1) THEN
embeddingLevels.Push(dummyEmbeddingLevel);
END;
dummyOverrideStatus := overrideStati.Pop();
IF (overrideStati.Size() < 1) THEN
overrideStati.Push(dummyOverrideStatus);
END;
END;
cacheElement.characterTypes[i] := Strings.NewString("BN");
ELSE
IF (overrideStati.Top() = DOSNeutral) THEN
amm.NewCharacterType(cacheElement.characterTypes[i]);
Strings.Copy(charType^,0,LEN(charType),cacheElement.characterTypes[i]^);
ELSIF (overrideStati.Top() = DOSLeftToRight) THEN
cacheElement.characterTypes[i] := Strings.NewString("L");
ELSIF (overrideStati.Top() = DOSRightToLeft) THEN
cacheElement.characterTypes[i] := Strings.NewString("R");
END;
END;
cacheElement.characterEmbeddingLevels[i] := embeddingLevels.Top();
ELSE
amm.NewCharacterType(cacheElement.characterTypes[i]);
Strings.Copy(hlCharacterType^,0,LEN(hlCharacterType),cacheElement.characterTypes[i]^);
cacheElement.characterEmbeddingLevels[i] := hlEmbeddingLevel;
END;
amm.NewCharacterType(cacheElement.originalCharacterTypes[i]);
Strings.Copy(charType^,0,LEN(charType),cacheElement.originalCharacterTypes[i]^);
cacheElement.characters[i] := char;
INC(i);
END;
amm.FreeString(hlCharacterType);
amm.FreeString(charType);
amm.FreeIntegerStack(embeddingLevels);
amm.FreeIntegerStack(overrideStati);
cacheElement.lineLength := lineLength;
amm.NewBoolArray(cacheElement.dirty,lineLength);
amm.NewTextReaderArray(cacheElement.textReaders,lineLength);
amm.NewPosArrays(cacheElement.posArrays,lineLength);
amm.NewPosArrays(cacheElement.reversePosArrays,lineLength);
END ApplyExplicitLevels;
PROCEDURE ResolveWeakTypes(VAR cacheElement : LineElement);
VAR
i,nextI : LONGINT;
pos, state, ENstate : LONGINT;
passedBNs : IntegerStack;
charType : Strings.String;
lastCharType : Strings.String;
lastStrongType : LONGINT;
newLevel : BOOLEAN;
dummyBool : BOOLEAN;
dummyInt : LONGINT;
BEGIN
lastStrongType := NeutralType;
newLevel := FALSE;
amm.NewString(lastCharType);
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[0],cacheElement.paragraphEmbeddingLevel,lastCharType);
FOR i := 0 TO cacheElement.lineLength - 1 DO
IF HL5 # NIL THEN
dummyBool := HL5(cacheElement,i,lastCharType,lastStrongType,dummyInt,WeakTypes1);
END;
IF cacheElement.characterTypes[i]^ = "NSM" THEN
Strings.Copy(lastCharType^,0,Strings.Min(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),cacheElement.characterTypes[i]^);
END;
IF (i < cacheElement.lineLength - 1) & (cacheElement.characterEmbeddingLevels[i] # cacheElement.characterEmbeddingLevels[i+1]) THEN
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i+1],lastCharType);
lastStrongType := NeutralType;
newLevel := TRUE;
END;
IF cacheElement.characterTypes[i]^ = "AL" THEN
IF ~newLevel THEN
lastCharType^ := "R";
lastStrongType := ArabicNumber;
END;
cacheElement.characterTypes[i]^ := "R";
ELSIF IsStrongCharacterType(cacheElement.characterTypes[i]) & ~newLevel THEN
Strings.Copy(cacheElement.characterTypes[i]^,0,Strings.Min(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),lastCharType^);
lastStrongType := NeutralType;
ELSIF cacheElement.characterTypes[i]^ = "EN" THEN
IF HL2 = NIL THEN
IF lastStrongType = ArabicNumber THEN
cacheElement.characterTypes[i]^ := "AN";
lastCharType^ := "AN";
END;
ELSE
charType := HL2(cacheElement,i,lastStrongType);
Strings.Copy(charType^,0,Strings.Min(LEN(cacheElement.characterTypes[i]),LEN(charType)),cacheElement.characterTypes[i]^);
Strings.Copy(charType^,0,Strings.Min(LEN(lastCharType),LEN(charType)),lastCharType^);
END;
ELSIF cacheElement.characterTypes[i]^ # "BN" THEN
Strings.Copy(cacheElement.characterTypes[i]^,0,Strings.Min(LEN(cacheElement.characterTypes[i]),LEN(lastCharType)),lastCharType^);
END;
newLevel := FALSE;
END;
state := NeutralType;
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[0],cacheElement.paragraphEmbeddingLevel,lastCharType);
IF lastCharType^ = "L" THEN
ENstate := LeftStrongType;
ELSE
ENstate := NeutralType;
END;
amm.NewIntegerStack(passedBNs,cacheElement.lineLength);
nextI := 0;
FOR i := 0 TO cacheElement.lineLength - 1 DO
IF HL5 # NIL THEN
dummyBool := HL5(cacheElement,i,lastCharType,state,ENstate,WeakTypes2);
END;
IF cacheElement.characterTypes[i]^ = "EN" THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[i]^ := "L";
END;
state := EuropeanNumber;
passedBNs.Purge;
ELSIF cacheElement.characterTypes[i]^ = "AN" THEN
state := ArabicNumber;
passedBNs.Purge;
ELSIF cacheElement.characterTypes[i]^ = "BN" THEN
passedBNs.Push(i);
ELSIF cacheElement.characterTypes[i]^ = "ES" THEN
IF state = EuropeanNumber THEN
IF IsEuropeanNumberAdjacent(i+1,FALSE,cacheElement,nextI,state,ENstate) THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[i]^ := "L";
ELSE
cacheElement.characterTypes[i]^ := "EN";
END;
ELSE
cacheElement.characterTypes[i]^ := "ON";
END;
i := nextI;
ELSE
cacheElement.characterTypes[i]^ := "ON";
END;
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "ON";
END;
END;
ELSIF cacheElement.characterTypes[i]^ = "CS" THEN
IF state = EuropeanNumber THEN
IF IsEuropeanNumberAdjacent(i+1,FALSE,cacheElement,nextI,state,ENstate) THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[i]^ := "L";
ELSE
cacheElement.characterTypes[i]^ := "EN";
END;
ELSE
cacheElement.characterTypes[i]^ := "ON";
END;
i := nextI;
ELSIF state = ArabicNumber THEN
IF IsArabicNumberAdjacent(i+1,cacheElement,nextI,state,ENstate) THEN
cacheElement.characterTypes[i]^ := "AN";
ELSE
cacheElement.characterTypes[i]^ := "ON";
END;
i := nextI;
ELSE
cacheElement.characterTypes[i]^ := "ON";
END;
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "ON";
END;
END;
ELSIF cacheElement.characterTypes[i]^ = "ET" THEN
IF state = EuropeanNumber THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[i]^ := "L";
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "L";
END;
END;
ELSE
cacheElement.characterTypes[i]^ := "EN";
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "EN";
END;
END;
END;
ELSE
IF IsEuropeanNumberAdjacent(i+1,TRUE,cacheElement,nextI,state,ENstate) THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[i]^ := "L";
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "L";
END;
END;
ELSE
cacheElement.characterTypes[i]^ := "EN";
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "EN";
END;
END;
END;
ELSE
cacheElement.characterTypes[i]^ := "ON";
LOOP
pos := passedBNs.Pop();
IF pos < 0 THEN
EXIT;
ELSE
cacheElement.characterTypes[pos]^ := "ON";
END;
END;
END;
i := nextI;
END;
ELSE
IF cacheElement.characterTypes[i]^ = "L" THEN
ENstate := LeftStrongType;
ELSIF cacheElement.characterTypes[i]^ = "R" THEN
ENstate := NeutralType;
END;
state := NeutralType;
passedBNs.Purge;
END;
IF (i < cacheElement.lineLength - 1) & (cacheElement.characterEmbeddingLevels[i] # cacheElement.characterEmbeddingLevels[i+1]) THEN
amm.NewString(charType);
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i+1],charType);
IF charType^ = "L" THEN
ENstate := LeftStrongType;
ELSE
ENstate := NeutralType;
END;
amm.FreeString(charType);
END;
END;
amm.FreeString(lastCharType);
amm.FreeIntegerStack(passedBNs);
END ResolveWeakTypes;
PROCEDURE ResolveNeutralTypes(VAR cacheElement : LineElement);
VAR
i : LONGINT;
directionalType, hlCharacterType : Strings.String;
dummyInt : LONGINT;
BEGIN
amm.NewString(directionalType);
amm.NewString(hlCharacterType);
FOR i := 0 TO cacheElement.lineLength - 1 DO
IF (IsNeutralCharacterType(cacheElement.characterTypes[i])) THEN
IF (HL5 # NIL) & HL5(cacheElement,i,hlCharacterType,dummyInt,dummyInt,NeutralTypes) THEN
Strings.Copy(hlCharacterType^,0,LEN(hlCharacterType),directionalType^);
ELSIF (i = 0) THEN
GetFinalDirectionalType(cacheElement.characterTypes[i],cacheElement.characterEmbeddingLevels[i],cacheElement.paragraphEmbeddingLevel,directionalType);
ELSE
GetFinalDirectionalType(cacheElement.characterTypes[i],cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i-1],directionalType);
END;
IF (HasSameDirectionalType(directionalType,i+1,cacheElement)) THEN
WHILE (i < cacheElement.lineLength) & (IsNeutralCharacterType(cacheElement.characterTypes[i])) DO
Strings.Copy(directionalType^,0,LEN(directionalType),cacheElement.characterTypes[i]^);
INC(i);
END;
ELSE
WHILE (i < cacheElement.lineLength) & (IsNeutralCharacterType(cacheElement.characterTypes[i])) DO
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[i],cacheElement.characterEmbeddingLevels[i]-1,cacheElement.characterTypes[i]);
INC(i);
END;
END;
END;
END;
amm.FreeString(directionalType);
amm.FreeString(hlCharacterType);
END ResolveNeutralTypes;
PROCEDURE ResolveImplicitLevels(VAR cacheElement : LineElement);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO cacheElement.lineLength - 1 DO
IF (ODD(cacheElement.characterEmbeddingLevels[i])) THEN
IF (cacheElement.characterTypes[i]^ = "L") OR (cacheElement.characterTypes[i]^ = "EN") OR (cacheElement.characterTypes[i]^ = "AN") THEN
INC(cacheElement.characterEmbeddingLevels[i]);
END;
ELSE
IF cacheElement.characterTypes[i]^ = "R" THEN
INC(cacheElement.characterEmbeddingLevels[i]);
ELSIF (cacheElement.characterTypes[i]^ = "AN") OR (cacheElement.characterTypes[i]^ = "EN") THEN
INC(cacheElement.characterEmbeddingLevels[i],2);
END;
END;
END;
END ResolveImplicitLevels;
PROCEDURE ReorderLine*(start, length : LONGINT) : Texts.TextReader;
VAR
cacheElement : LineElement;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF THEN
RETURN NIL;
END;
lineCache.FindElement(start,cacheElement);
RETURN ReorderClause(cacheElement,start,length);
END ReorderLine;
PROCEDURE ReorderSegment*(id : LONGINT) : Texts.TextReader;
VAR
cacheElement : LineElement;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF OR ~HL4 OR (segmentCache = NIL) THEN
RETURN NIL;
END;
segmentCache.FindElement(id,cacheElement);
RETURN ReorderClause(cacheElement,cacheElement.pos,cacheElement.lineLength);
END ReorderSegment;
PROCEDURE ReorderClause(VAR cacheElement : LineElement; start, length : LONGINT) : Texts.TextReader;
VAR
bidiTextReader : Texts.TextReader;
reorderedText : Texts.Text;
posArray, reversePosArray : PosArray;
i, newPos : LONGINT;
relStart: LONGINT;
maxLevel, minLevel : LONGINT;
oneCharString : Texts.PUCS32String;
lastChar : Strings.String;
BEGIN
relStart := start - cacheElement.pos;
IF cacheElement.paragraphEmbeddingLevel = -1 THEN
RETURN NIL;
END;
IF relStart >= cacheElement.lineLength THEN
RETURN NIL;
END;
IF ~cacheElement.dirty[relStart] THEN
RETURN cacheElement.textReaders[relStart];
END;
length := Strings.Min(length,cacheElement.lineLength-relStart);
IF CharacterDebugging THEN
KernelLog.Ln;
KernelLog.String("Reordering line ("); KernelLog.Int(start,4); KernelLog.String(",");
KernelLog.Int(start+length-1,4); KernelLog.String(")..."); KernelLog.Ln;
END;
IF CharacterDebugging THEN PrintLineEmbeddingLevels(cacheElement.characterEmbeddingLevels); END;
i := length - 1;
LOOP
IF (i < relStart) OR ~IsNeutralCharacterType(cacheElement.originalCharacterTypes[i]) THEN
EXIT;
ELSE
cacheElement.characterEmbeddingLevels[i] := cacheElement.paragraphEmbeddingLevel;
DEC(i);
END;
END;
maxLevel := GetHighestLevel(relStart,length,cacheElement.characterEmbeddingLevels^);
minLevel := GetLowestOddLevel(relStart,length,cacheElement.characterEmbeddingLevels^);
lastChar := cacheElement.originalCharacterTypes[relStart+length-1];
IF lastChar^ = "B" THEN
amm.NewPosArray(cacheElement.posArrays[relStart],length);
posArray := cacheElement.posArrays[relStart];
FillPositionArray(relStart,posArray.array^);
FOR i := maxLevel TO minLevel BY - 1 DO
ReorderSubstrings(relStart,i,maxLevel,cacheElement.characterEmbeddingLevels^,posArray,lastChar);
END;
amm.NewPosArray(cacheElement.reversePosArrays[relStart],length);
reversePosArray := cacheElement.reversePosArrays[relStart];
ELSE
amm.NewPosArray(cacheElement.posArrays[relStart],length+1);
posArray := cacheElement.posArrays[relStart];
FillPositionArray(relStart,posArray.array^);
FOR i := maxLevel TO minLevel BY - 1 DO
ReorderSubstrings(relStart,i,maxLevel,cacheElement.characterEmbeddingLevels^,posArray,Strings.NewString("B"));
END;
amm.NewPosArray(cacheElement.reversePosArrays[relStart],length+1);
reversePosArray := cacheElement.reversePosArrays[relStart];
END;
FOR i := 0 TO posArray.size - 1 DO
reversePosArray.array[posArray.array[i] - relStart] := i;
END;
IF CharacterDebugging THEN PrintLineEmbeddingLevels(cacheElement.characterEmbeddingLevels); END;
NEW(reorderedText);
amm.NewUString(oneCharString);
oneCharString[1] := 0H;
reorderedText.AcquireWrite;
FOR i := 0 TO length - 1 DO
newPos := posArray.array[i];
oneCharString[0] := cacheElement.characters[newPos];
MirrorCharacter(oneCharString[0],cacheElement.characterEmbeddingLevels[newPos]);
reorderedText.InsertUCS32(i,oneCharString^);
END;
reorderedText.ReleaseWrite;
NEW(bidiTextReader,reorderedText);
IF CharacterDebugging THEN
KernelLog.Ln; KernelLog.String("Printing line reordered positions::"); KernelLog.Ln;
FOR i := 0 TO posArray.size - 1 DO
KernelLog.Int(posArray.array[i],3); KernelLog.String(" ");
END;
KernelLog.Ln;
reorderedText.AcquireRead;
PrintCodedLine(bidiTextReader,0,length);
reorderedText.ReleaseRead;
KernelLog.Ln; KernelLog.String("...reordering done! Text length: "); KernelLog.Int(length,4); KernelLog.Ln;
END;
cacheElement.dirty[relStart] := FALSE;
cacheElement.textReaders[relStart] := bidiTextReader;
amm.FreeUString(oneCharString);
RETURN bidiTextReader;
END ReorderClause;
PROCEDURE GetDisplayPosition*(pos, lineStart : LONGINT) : LONGINT;
VAR
relPos, relLineStart : LONGINT;
cacheElement : LineElement;
posArray : PosArray;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF THEN
RETURN pos;
END;
IF pos < 0 THEN
pos := 0
END;
lineCache.FindElement(pos,cacheElement);
IF (cacheElement = NIL) OR (cacheElement.reversePosArrays = NIL) OR (cacheElement.pos + cacheElement.lineLength <= pos) THEN
RETURN pos;
END;
relLineStart := lineStart - cacheElement.pos;
relPos := pos - lineStart;
posArray := cacheElement.reversePosArrays[relLineStart];
IF posArray = NIL THEN
RETURN pos;
ELSE
RETURN posArray.array[relPos] + lineStart;
END;
END GetDisplayPosition;
PROCEDURE GetInternalPosition*(pos, lineStart : LONGINT) : LONGINT;
VAR
relPos, relLineStart : LONGINT;
cacheElement : LineElement;
posArray : PosArray;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF THEN
RETURN pos;
END;
lineCache.FindElement(pos,cacheElement);
IF (cacheElement = NIL) OR (cacheElement.posArrays = NIL) THEN
RETURN pos;
END;
relLineStart := lineStart - cacheElement.pos;
relPos := pos - lineStart;
posArray := cacheElement.posArrays[relLineStart];
IF posArray = NIL THEN
RETURN pos
ELSE
IF relPos < posArray.size THEN
RETURN posArray.array[relPos] + cacheElement.pos;
ELSE
RETURN posArray.array[posArray.size-1] + cacheElement.pos;
END;
END;
END GetInternalPosition;
PROCEDURE GetImplicitLevel*(pos : LONGINT) : LONGINT;
VAR
relPos : LONGINT;
cacheElement : LineElement;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF THEN
RETURN 0;
END;
lineCache.FindElement(pos,cacheElement);
IF (cacheElement = NIL) OR (cacheElement.characterEmbeddingLevels = NIL) THEN
RETURN Strings.Max(0,cacheElement.paragraphEmbeddingLevel);
END;
IF cacheElement.pos + cacheElement.lineLength <= pos THEN
RETURN cacheElement.characterEmbeddingLevels[cacheElement.lineLength-1];
END;
relPos := pos - cacheElement.pos;
RETURN cacheElement.characterEmbeddingLevels[relPos];
END GetImplicitLevel;
PROCEDURE GetParagraphEmbeddingLevel*(pos : LONGINT) : LONGINT;
VAR
cacheElement : LineElement;
BEGIN
IF ~reformatted OR ~textReader.text.isUTF THEN
RETURN 0;
END;
lineCache.FindElement(pos,cacheElement);
IF (cacheElement = NIL) OR (cacheElement.paragraphEmbeddingLevel = -1) THEN
RETURN 0;
END;
RETURN cacheElement.paragraphEmbeddingLevel;
END GetParagraphEmbeddingLevel;
PROCEDURE ReadyTextReader*(start : LONGINT; VAR isFirst : BOOLEAN) : Texts.TextReader;
VAR
cacheElement : LineElement;
relStart : LONGINT;
BEGIN
IF (~reformatted) OR ~textReader.text.isUTF THEN
RETURN NIL;
END;
lineCache.FindElement(start,cacheElement);
relStart := start - cacheElement.pos;
IF cacheElement.paragraphEmbeddingLevel = -1 THEN
RETURN NIL;
END;
IF relStart >= cacheElement.lineLength THEN
RETURN NIL;
END;
IF ~cacheElement.dirty[relStart] THEN
isFirst := (relStart = 0);
RETURN cacheElement.textReaders[relStart];
END;
RETURN NIL;
END ReadyTextReader;
PROCEDURE SetReadyTextReader*(start : LONGINT; textReader : Texts.TextReader);
VAR
cacheElement : LineElement;
relStart : LONGINT;
BEGIN
IF (~reformatted) OR ~textReader.text.isUTF THEN
RETURN;
END;
lineCache.FindElement(start,cacheElement);
relStart := start - cacheElement.pos;
IF relStart >= cacheElement.lineLength THEN
RETURN;
END;
cacheElement.textReaders[relStart] := textReader;
END SetReadyTextReader;
PROCEDURE IsLastCharacterInLine*(pos : LONGINT) : BOOLEAN;
VAR
cacheElement : LineElement;
BEGIN
lineCache.FindElement(pos,cacheElement);
RETURN pos = cacheElement.pos + cacheElement.lineLength - 1;
END IsLastCharacterInLine;
PROCEDURE FindEndOfParagraph(pos : LONGINT; VAR end : LONGINT);
VAR
char : Texts.Char32;
charType : Strings.String;
textLength : LONGINT;
BEGIN
char := 0H;
textLength := textReader.text.GetLength();
amm.NewString(charType);
textReader.SetDirection(1);
textReader.SetPosition(pos);
REPEAT
textReader.ReadCh(char);
unicodePropertyReader.GetBidiCharacterType(char,charType);
UNTIL (charType^ = "B") OR (char = UnicodeProperties.EOT) OR (textReader.GetPosition() >= textLength);
end := textReader.GetPosition();
amm.FreeString(charType);
END FindEndOfParagraph;
PROCEDURE FindStartOfParagraph(pos : LONGINT; VAR start : LONGINT);
VAR
char : Texts.Char32;
charType : Strings.String;
BEGIN
textReader.SetPosition(pos-1);
textReader.SetDirection(-1);
amm.NewString(charType);
REPEAT
textReader.ReadCh(char);
unicodePropertyReader.GetBidiCharacterType(char,charType);
UNTIL (charType^ = "B") OR (textReader.GetPosition() <= 0);
IF charType^ # "B" THEN
start := 0;
ELSE
start := textReader.GetPosition() + 2;
END;
amm.FreeString(charType);
END FindStartOfParagraph;
PROCEDURE MirrorCharacter(VAR originalChar : Texts.Char32; embeddingLevel : LONGINT);
BEGIN
IF ODD(embeddingLevel) & unicodePropertyReader.IsMirroredChar(originalChar) THEN
originalChar := mirrorPropertyReader.GetMirroredChar(originalChar);
ELSIF ODD(embeddingLevel) & (HL6 # NIL) THEN
originalChar := HL6(originalChar);
END;
END MirrorCharacter;
PROCEDURE ReorderSubstrings(globalStart, fromLevel, toLevel : LONGINT; CONST levels : ARRAY OF LONGINT; VAR posArray : PosArray; lastCharType : Strings.String);
VAR
i, start, end : LONGINT;
BEGIN
IF posArray.size = 1 THEN
RETURN;
END;
IF lastCharType^ = "B" THEN
end := posArray.size - 2;
ELSE
end := posArray.size - 1;
END;
FOR i := globalStart TO globalStart + end DO
IF (levels[i] >= fromLevel) THEN
start := i;
REPEAT
INC(i);
UNTIL (i > globalStart + end) OR (levels[i] < fromLevel);
IF posArray.size > (i - globalStart) THEN
SwapSubstring(start-globalStart,i-start-1,posArray.array^);
END;
END;
END;
END ReorderSubstrings;
PROCEDURE SwapSubstring(start, end: LONGINT; VAR posArray : ARRAY OF LONGINT);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO (end+1) DIV 2 - 1 DO
SwapPositions(start+i,start+end-i,posArray);
END;
END SwapSubstring;
PROCEDURE SwapPositions(pos1, pos2 : LONGINT; VAR posArray : ARRAY OF LONGINT);
VAR
temp : LONGINT;
BEGIN
temp := posArray[pos1];
posArray[pos1] := posArray[pos2];
posArray[pos2] := temp;
END SwapPositions;
PROCEDURE FillPositionArray(start : LONGINT; VAR posArray : ARRAY OF LONGINT);
VAR
i : LONGINT;
BEGIN
FOR i := 0 TO LEN(posArray) - 1 DO
posArray[i] := start + i;
END;
END FillPositionArray;
PROCEDURE GetHighestLevel(start, length : LONGINT; CONST levels : ARRAY OF LONGINT) : LONGINT;
VAR
i, max : LONGINT;
BEGIN
max := 0;
FOR i := start TO start+length - 1 DO
IF i > LEN(levels) - 1 THEN
RETURN max;
END;
IF (levels[i] > max) THEN
max := levels[i];
END;
END;
RETURN max;
END GetHighestLevel;
PROCEDURE GetLowestOddLevel(start, length : LONGINT; CONST levels : ARRAY OF LONGINT) : LONGINT;
VAR
i, min : LONGINT;
BEGIN
min := 61;
FOR i := start TO start + length - 1 DO
IF i > LEN(levels) - 1 THEN
RETURN min;
END;
IF (levels[i] < min) & ODD(levels[i]) THEN
min := levels[i];
END;
END;
RETURN min;
END GetLowestOddLevel;
PROCEDURE GetNextEvenEmbeddingLevel(thisLevel : LONGINT; VAR nextLevel : LONGINT);
BEGIN
IF thisLevel > 59 THEN
nextLevel := thisLevel;
RETURN;
END;
IF (ODD(thisLevel)) THEN
nextLevel := thisLevel + 1;
ELSE
nextLevel := thisLevel + 2;
END;
END GetNextEvenEmbeddingLevel;
PROCEDURE GetNextOddEmbeddingLevel(thisLevel : LONGINT; VAR nextLevel : LONGINT);
BEGIN
IF thisLevel > 60 THEN
nextLevel := thisLevel;
RETURN;
END;
IF (ODD(thisLevel)) THEN
nextLevel := thisLevel + 2;
ELSE
nextLevel := thisLevel + 1;
END;
END GetNextOddEmbeddingLevel;
PROCEDURE GetBorderOfRunCharacterType(thisLevel, otherLevel : LONGINT; VAR result : Strings.String);
BEGIN
IF (ODD(Strings.Max(thisLevel,otherLevel))) THEN
result^ := "R";
ELSE
result^ := "L";
END;
END GetBorderOfRunCharacterType;
PROCEDURE IsEuropeanNumberAdjacent(pos : LONGINT; terminators : BOOLEAN; VAR cacheElement : LineElement; VAR max : LONGINT; VAR state : LONGINT; VAR ENstate : LONGINT) : BOOLEAN;
VAR
ENstateBefore : LONGINT;
hlThisCharacterType : Strings.String;
BEGIN
amm.NewString(hlThisCharacterType);
ENstateBefore := ENstate;
IF (HL5 # NIL) & HL5(cacheElement,pos,hlThisCharacterType,state,ENstate,EuropeanNumberAdj) THEN
IF hlThisCharacterType^ = "EN" THEN
max := pos;
amm.FreeString(hlThisCharacterType);
RETURN TRUE;
END;
END;
amm.FreeString(hlThisCharacterType);
IF (pos < 0) OR (pos >= cacheElement.lineLength) THEN
RETURN FALSE;
ELSIF cacheElement.characterTypes[pos]^ = "EN" THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[pos]^ := "L";
END;
max := pos;
state := EuropeanNumber;
RETURN TRUE;
ELSIF terminators & ((cacheElement.characterTypes[pos]^ = "ET") OR (cacheElement.characterTypes[pos]^ = "BN")) THEN
IF IsEuropeanNumberAdjacent(pos + 1,terminators,cacheElement,max,state,ENstate) THEN
IF ENstateBefore = LeftStrongType THEN
cacheElement.characterTypes[pos]^ := "L";
ELSE
cacheElement.characterTypes[pos]^ := "EN";
END;
RETURN TRUE;
ELSE
cacheElement.characterTypes[pos]^ := "ON";
RETURN FALSE;
END;
ELSIF ~terminators & (cacheElement.characterTypes[pos]^ = "BN") THEN
cacheElement.characterTypes[pos]^ := "ON";
RETURN IsEuropeanNumberAdjacent(pos + 1,terminators,cacheElement,max,state,ENstate);
ELSE
max := pos;
IF cacheElement.characterTypes[pos]^ = "AN" THEN
state := ArabicNumber;
ELSE
state := NeutralType;
IF cacheElement.characterTypes[pos]^ = "L" THEN
ENstate := LeftStrongType;
END;
END;
RETURN FALSE;
END;
END IsEuropeanNumberAdjacent;
PROCEDURE IsArabicNumberAdjacent(pos : LONGINT; VAR cacheElement : LineElement; VAR max : LONGINT; VAR state : LONGINT; VAR ENstate : LONGINT) : BOOLEAN;
VAR
hlThisCharacterType : Strings.String;
BEGIN
amm.NewString(hlThisCharacterType);
IF (HL5 # NIL) & HL5(cacheElement,pos,hlThisCharacterType,state,ENstate,ArabicNumberAdj) THEN
IF hlThisCharacterType^ = "AN" THEN
max := pos;
amm.FreeString(hlThisCharacterType);
RETURN TRUE;
END;
END;
amm.FreeString(hlThisCharacterType);
IF (pos < 0) OR (pos >= cacheElement.lineLength) THEN
RETURN FALSE;
ELSIF cacheElement.characterTypes[pos]^ = "AN" THEN
state := ArabicNumber;
max := pos;
RETURN TRUE;
ELSIF cacheElement.characterTypes[pos]^ = "BN" THEN
cacheElement.characterTypes[pos]^ := "ON";
RETURN IsArabicNumberAdjacent(pos + 1,cacheElement,max,state,ENstate);
ELSE
IF cacheElement.characterTypes[pos]^ = "EN" THEN
IF ENstate = LeftStrongType THEN
cacheElement.characterTypes[pos]^ := "L";
END;
state := EuropeanNumber;
ELSE
IF cacheElement.characterTypes[pos]^ = "L" THEN
ENstate := LeftStrongType;
ELSIF cacheElement.characterTypes[pos]^ = "R" THEN
ENstate := NeutralType;
END;
state := NeutralType;
END;
max := pos;
RETURN FALSE;
END;
END IsArabicNumberAdjacent;
PROCEDURE FindFirstStrongCharacter(start, end : LONGINT; VAR charType : Strings.String);
VAR
char : Texts.Char32;
strongCharFound : BOOLEAN;
BEGIN
char := 0H;
strongCharFound := FALSE;
textReader.SetPosition(start);
textReader.SetDirection(1);
REPEAT
textReader.ReadCh(char);
strongCharFound := IsStrongCharacter(char,charType);
UNTIL (strongCharFound) OR (textReader.GetPosition() >= end);
IF ~strongCharFound THEN
charType := NIL;
END;
END FindFirstStrongCharacter;
PROCEDURE IsStrongCharacter(CONST char : Texts.Char32; VAR charType : Strings.String) : BOOLEAN;
BEGIN
unicodePropertyReader.GetBidiCharacterType(char,charType);
RETURN IsStrongCharacterType(charType);
END IsStrongCharacter;
PROCEDURE IsStrongCharacterType(CONST charType : Strings.String) : BOOLEAN;
BEGIN
RETURN (charType^ = "L") OR (charType^ = "AL") OR (charType^ = "R");
END IsStrongCharacterType;
PROCEDURE IsNeutralCharacterType(CONST charType : Strings.String) : BOOLEAN;
BEGIN
RETURN (charType^ = "B") OR (charType^ = "S") OR (charType^ = "WS") OR (charType^ = "ON") OR (charType^ = "BN");
END IsNeutralCharacterType;
PROCEDURE GetFinalDirectionalType(charType : Strings.String; thisLevel, otherLevel : LONGINT; VAR result : Strings.String);
BEGIN
IF (thisLevel # otherLevel) THEN
GetBorderOfRunCharacterType(thisLevel,otherLevel,result);
RETURN;
ELSIF charType^ = "L" THEN
result^ := "L";
ELSE
result^ := "R";
END;
END GetFinalDirectionalType;
PROCEDURE HasSameDirectionalType(charType : Strings.String; thisPos : LONGINT; cacheElement : LineElement) : BOOLEAN;
VAR
hlThisCharacterType : Strings.String;
dummyInt : LONGINT;
dummyCharType : Strings.String;
result : BOOLEAN;
BEGIN
amm.NewString(hlThisCharacterType);
IF (HL5 # NIL) & HL5(cacheElement,thisPos,hlThisCharacterType,dummyInt,dummyInt,SameDirection) THEN
amm.FreeString(hlThisCharacterType);
RETURN charType^ = hlThisCharacterType^;
END;
amm.FreeString(hlThisCharacterType);
amm.NewString(dummyCharType);
IF (thisPos = cacheElement.lineLength) THEN
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.paragraphEmbeddingLevel,dummyCharType);
result := charType = dummyCharType;
amm.FreeString(dummyCharType);
RETURN result;
ELSIF (cacheElement.characterEmbeddingLevels[thisPos-1] # cacheElement.characterEmbeddingLevels[thisPos]) THEN
GetBorderOfRunCharacterType(cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos],dummyCharType);
result := charType = dummyCharType;
amm.FreeString(dummyCharType);
RETURN result;
ELSIF (IsNeutralCharacterType(cacheElement.characterTypes[thisPos])) THEN
amm.FreeString(dummyCharType);
RETURN HasSameDirectionalType(charType,thisPos+1,cacheElement);
ELSE
GetFinalDirectionalType(cacheElement.characterTypes[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos-1],cacheElement.characterEmbeddingLevels[thisPos],dummyCharType);
result := charType = dummyCharType;
amm.FreeString(dummyCharType);
RETURN result;
END;
END HasSameDirectionalType;
PROCEDURE SweepCache;
VAR
thisPos, lastPos : LONGINT;
char : Texts.Char32;
charType : Strings.String;
BEGIN
lastPos := 0;
amm.NewString(charType);
LOOP
thisPos := lineCache.GetNextPos(lastPos);
IF (thisPos = lastPos) OR (thisPos < 0) THEN EXIT END;
textReader.SetPosition(thisPos-1);
textReader.ReadCh(char);
unicodePropertyReader.GetBidiCharacterType(char,charType);
IF charType^ # "B" THEN
lineCache.RemoveLine(thisPos);
ELSE
lastPos := thisPos;
END;
END;
amm.FreeString(charType);
END SweepCache;
PROCEDURE PrintOriginalTypedText*;
BEGIN
PrintOriginalTypedParagraph(0,textReader.text.GetLength()-1);
END PrintOriginalTypedText;
PROCEDURE PrintOriginalTypedParagraph(start,end : LONGINT);
VAR
char : Texts.Char32;
charType : Strings.String;
BEGIN
IF textReader.text = NIL THEN
RETURN;
END;
amm.NewString(charType);
KernelLog.Ln;
KernelLog.String("Printing original character types:"); KernelLog.Ln;
textReader.SetDirection(1);
textReader.SetPosition(start);
WHILE (textReader.GetPosition() < end) DO
textReader.ReadCh(char);
unicodePropertyReader.GetBidiCharacterType(char,charType);
KernelLog.String(charType^); KernelLog.String(" ");
END;
KernelLog.Ln;
amm.FreeString(charType);
END PrintOriginalTypedParagraph;
PROCEDURE PrintCurrentTypedText*;
BEGIN
PrintCurrentTypedParagraph(0,textReader.text.GetLength()-1);
END PrintCurrentTypedText;
PROCEDURE PrintCurrentTypedParagraph(start, end : LONGINT);
VAR
i : LONGINT;
type: Strings.String;
cacheElement : LineElement;
BEGIN
lineCache.FindElement(start,cacheElement);
KernelLog.Ln;
KernelLog.String("Printing current character types:"); KernelLog.Ln;
FOR i := 0 TO cacheElement.lineLength - 1 DO
type := cacheElement.characterTypes[i];
KernelLog.String(type^); KernelLog.String(" ");
END;
KernelLog.Ln;
END PrintCurrentTypedParagraph;
PROCEDURE PrintCodedText*;
BEGIN
PrintCodedParagraph(0,textReader.text.GetLength()-1);
END PrintCodedText;
PROCEDURE PrintCodedParagraph(start, end : LONGINT);
VAR
char : Texts.Char32;
BEGIN
IF textReader.text = NIL THEN
RETURN;
END;
KernelLog.Ln;
KernelLog.String("Printing character codes:"); KernelLog.Ln;
textReader.SetDirection(1);
textReader.SetPosition(start);
WHILE (textReader.GetPosition() < end) DO
textReader.ReadCh(char);
KernelLog.Hex(char,9); KernelLog.String(" ");
END;
KernelLog.Ln;
END PrintCodedParagraph;
PROCEDURE PrintCodedLine(textReader : Texts.TextReader; start, end : LONGINT);
VAR
char : Texts.Char32;
BEGIN
IF textReader.text = NIL THEN
RETURN;
END;
textReader.SetDirection(1);
textReader.SetPosition(start);
KernelLog.Ln;
KernelLog.String("Printing line character codes:"); KernelLog.Ln;
WHILE (textReader.GetPosition() < end) DO
textReader.ReadCh(char);
KernelLog.Hex(char,9); KernelLog.String(" ");
END;
KernelLog.Ln;
END PrintCodedLine;
PROCEDURE PrintLeveledText*;
BEGIN
PrintLeveledParagraph(0,textReader.text.GetLength()-1);
END PrintLeveledText;
PROCEDURE PrintLeveledParagraph(start, end : LONGINT);
VAR
i : LONGINT;
cacheElement : LineElement;
BEGIN
lineCache.FindElement(start,cacheElement);
KernelLog.Ln;
KernelLog.String("Printing resolved levels:"); KernelLog.Ln;
FOR i := 0 TO cacheElement.lineLength - 1 DO
KernelLog.Int(cacheElement.characterEmbeddingLevels[i],2); KernelLog.String(" ");
END;
KernelLog.Ln;
END PrintLeveledParagraph;
PROCEDURE PrintLineEmbeddingLevels(CONST levels : IntegerArray);
VAR
i : LONGINT;
BEGIN
KernelLog.Ln;
KernelLog.String("Printing line levels:"); KernelLog.Ln;
FOR i := 0 TO LEN(levels) - 1 DO
KernelLog.Int(levels[i],2); KernelLog.String(" ");
END;
KernelLog.Ln;
END PrintLineEmbeddingLevels;
END BidiFormatter;
HigherLevelProtocol1* = PROCEDURE(textReader : Texts.TextReader; start, end : LONGINT) : LONGINT;
HigherLevelProtocol2* = PROCEDURE(cacheLine : LineElement; pos, lastStrongType : LONGINT) : Strings.String;
HigherLevelProtocol3* = PROCEDURE(characters : CharArray; embeddingLevels : IntegerArray; originalTypes, types : Strings.StringArray;
char : Texts.Char32; charType : Strings.String; VAR hlCharacterType : Strings.String;
VAR hlEmbeddingLevel : LONGINT) : BOOLEAN;
HigherLevelProtocol5* = PROCEDURE(cacheElement : LineElement; pos : LONGINT; VAR thisCharacterType : Strings.String;
VAR state, ENstate : LONGINT; placeOfVenue : LONGINT) : BOOLEAN;
HigherLevelProtocol6* = PROCEDURE(originalChar : Texts.Char32) : Texts.Char32;
VAR
showUnicodeControlCharacters* : BOOLEAN;
PROCEDURE GetDisplayCharacter*(VAR char : Texts.Char32);
BEGIN
IF ~showUnicodeControlCharacters THEN
RETURN;
END;
IF char = 200EH THEN
char := 21BEH;
ELSIF char = 200FH THEN
char := 21BFH;
ELSIF char = 202AH THEN
char := 2308H;
ELSIF char = 202BH THEN
char := 2309H;
ELSIF char = 202DH THEN
char := 250DH;
ELSIF char = 202EH THEN
char := 2511H;
ELSIF char = 202CH THEN
char := 252FH;
END;
END GetDisplayCharacter;
PROCEDURE TestReformatting*(context : Commands.Context);
VAR
filename, fullname: ARRAY 256 OF CHAR;
msg : ARRAY 512 OF CHAR;
file : Files.File;
decoder : Codecs.TextDecoder;
in : Streams.Reader;
decoderRes : LONGINT;
text : Texts.Text;
error : BOOLEAN;
bidiFormatter : BidiFormatter;
textReader : Texts.TextReader;
BEGIN
error := FALSE;
context.arg.SkipWhitespace; context.arg.String(filename);
COPY(filename, fullname);
file := Files.Old(filename);
IF (file # NIL) THEN
file.GetName(fullname);
ELSE
file := Files.New(filename);
IF (file # NIL) THEN
file.GetName(fullname);
file := NIL;
END;
END;
IF (file # NIL) THEN
decoder := Codecs.GetTextDecoder("UTF-8");
IF (decoder # NIL) THEN
in := Codecs.OpenInputStream(fullname);
IF in # NIL THEN
decoder.Open(in, decoderRes);
IF decoderRes = 0 THEN
text := decoder.GetText();
END;
ELSE
msg := "Can't open input stream on file "; Strings.Append(msg, fullname);
KernelLog.String(msg);
error := TRUE;
END;
ELSE
msg := "No decoder for file "; Strings.Append(msg, fullname);
Strings.Append(msg, " (Format: "); Strings.Append(msg, "UTF_8"); Strings.Append(msg, ")");
KernelLog.String(msg);
error := TRUE;
END;
ELSE
msg := "file '"; Strings.Append(msg, fullname); Strings.Append(msg,"' not found.");
KernelLog.String(msg);
error := TRUE;
END;
IF ~error THEN
KernelLog.String("file successfully read."); KernelLog.Ln;
text.AcquireRead;
NEW(bidiFormatter,text);
bidiFormatter.ReformatText;
textReader := bidiFormatter.ReorderLine(452,48);
text.ReleaseRead;
END;
END TestReformatting;
BEGIN
showUnicodeControlCharacters := FALSE;
END UnicodeBidirectionality.
SystemTools.Free UnicodeProperties ~
SystemTools.Free UnicodeBidirectionality ~
UnicodeBidirectionality.TestReformatting "BidiTestData.txt"~