MODULE IDCT;
IMPORT
SYSTEM, Commands, Machine, KernelLog;
CONST
NrBlocks=4;
BITSINVACC = 4;
SHIFTINVROW = 16 - BITSINVACC;
SHIFTINVCOL = 1 + BITSINVACC;
W1 = 2841;
W2 = 2676;
W3 = 2408;
W5 = 1609;
W6 = 1108;
W7 = 565;
INT* = 0;
SSE* = 1;
SSE2* = 2;
VAR
RNDINVROW, RNDINVCOL, RNDINVCORR: INTEGER;
M128onecorr, M128roundinvrow, M128roundinvcol, M128roundinvcorr, M128tg116, M128tg216, M128tg316, M128cos416: POINTER TO ARRAY OF INTEGER;
M128tabi04, M128tabi17, M128tabi26, M128tabi35: POINTER TO ARRAY OF INTEGER;
tabi04adr, tabi17adr, tabi26adr, tabi35adr, onecorradr, roundinvrowadr, roundinvcoladr, roundinvcorradr, tg116adr, tg216adr, tg316adr, cos416adr: LONGINT;
Transform*: PROCEDURE (src, dst: LONGINT);
intTab: POINTER TO ARRAY OF INTEGER;
status*: LONGINT;
PROCEDURE -MOVDQA1;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOVDQA XMM0, [EAX]
MOVDQA XMM4, [EAX+32]
END MOVDQA1;
PROCEDURE -MOVDQA2;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOVDQA [EDX], XMM0
MOVDQA [EDX+32], XMM4
MOVDQA XMM0, [EAX+64]
MOVDQA XMM4, [EAX+96]
END MOVDQA2;
PROCEDURE -MOVDQA3;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOVDQA [EDX+64], XMM0
MOVDQA [EDX+96], XMM4
MOVDQA XMM0, [EAX+48]
MOVDQA XMM4, [EAX+16]
END MOVDQA3;
PROCEDURE -MOVDQA4;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOVDQA [EDX+48], XMM0
MOVDQA [EDX+16], XMM4
MOVDQA XMM0, [EAX+80]
MOVDQA XMM4, [EAX+112]
END MOVDQA4;
PROCEDURE -MOVDQA5;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOVDQA [EDX+80], XMM0
MOVDQA XMM0, [EDX+80] ; necessary ?
MOVDQA [EDX+112], XMM4
MOVDQA XMM4, [EDX+112] ; necessary ?
END MOVDQA5;
PROCEDURE TransformSSE2*(src, dst: LONGINT);
VAR tg116, tg216, tg316, cos416, onecorr, roundinvcol, roundinvcorr: LONGINT;
BEGIN
tg116 := tg116adr;
tg216 := tg216adr;
tg316 := tg316adr;
cos416 := cos416adr;
onecorr := onecorradr;
roundinvcol := roundinvcoladr;
roundinvcorr := roundinvcorradr;
Machine.SetEAX(src);
Machine.SetEDX(dst);
Machine.SetECX(roundinvrowadr);
MOVDQA1;
Machine.SetESI(tabi04adr );
Machine.SetECX(tabi26adr );
DCT8INVROWSSE2;
MOVDQA2;
DCT8INVROWSSE2;
MOVDQA3;
Machine.SetESI(tabi35adr );
Machine.SetECX(tabi17adr );
DCT8INVROWSSE2;
MOVDQA4;
DCT8INVROWSSE2;
MOVDQA5;
DCT8INVCOLSSE2
END TransformSSE2;
PROCEDURE -DCT8INVROWSSE2;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
PSHUFLW XMM0, XMM0, 0D8H
PSHUFD XMM1, XMM0, 0
PMADDWD XMM1, [ESI]
PSHUFD XMM3, XMM0, 055H
PSHUFHW XMM0, XMM0, 0D8H
PMADDWD XMM3, [ESI+32]
PSHUFD XMM2, XMM0, 0AAH
PSHUFD XMM0, XMM0, 0FFH
PMADDWD XMM2, [ESI+16]
PSHUFHW XMM4, XMM4, 0D8H
PADDD XMM1, [EBX]
PSHUFLW XMM4, XMM4, 0D8H
PMADDWD XMM0, [ESI+48]
PSHUFD XMM5, XMM4, 0
PSHUFD XMM6, XMM4, 0AAH
PMADDWD XMM5, [ECX]
PADDD XMM1, XMM2
MOVDQA XMM2, XMM1
PSHUFD XMM7, XMM4, 055H
PMADDWD XMM6, [ECX+16]
PADDD XMM0, XMM3
PSHUFD XMM4, XMM4, 0FFH
PSUBD XMM2, XMM0
PMADDWD XMM7, [ECX+32]
PADDD XMM0, XMM1
PSRAD XMM2, 12
PADDD XMM5, [EBX]
PMADDWD XMM4, [ECX+48]
PADDD XMM5, XMM6
MOVDQA XMM6, XMM5
PSRAD XMM0, 12
PSHUFD XMM2, XMM2, 01BH
PACKSSDW XMM0, XMM2
PADDD XMM4, XMM7
PSUBD XMM6, XMM4
PADDD XMM4, XMM5
PSRAD XMM6, 12
PSRAD XMM4, 12
PSHUFD XMM6, XMM6, 01BH
PACKSSDW XMM4, XMM6
END DCT8INVROWSSE2;
PROCEDURE -DCT8INVCOLSSE2;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX, SYSTEM.SSE2}
MOV EDI, [EBP-12]
MOVDQA XMM1, [EDI]
MOVDQA XMM2, XMM0
MOVDQA XMM3, [EDX+48]
PMULHW XMM0, XMM1
PMULHW XMM1, XMM3
MOV EDI, [EBP-4]
MOVDQA XMM5, [EDI]
MOVDQA XMM6, XMM4
PMULHW XMM4, XMM5
PADDSW XMM0, XMM2
PMULHW XMM5, [EDX+16]
PADDSW XMM1, XMM3
MOVDQA XMM7, [EDX+96]
PADDSW XMM0, XMM3
MOV EDI, [EBP-8]
MOVDQA XMM3, [EDI]
PSUBSW XMM2, XMM1
PMULHW XMM7, XMM3
MOVDQA XMM1, XMM0
PMULHW XMM3, [EDX+32]
PSUBSW XMM5, XMM6
PADDSW XMM4, [EDX+16]
PADDSW XMM0, XMM4
MOV EDI, [EBP-20]
PADDSW XMM0, [EDI]
PSUBSW XMM4, XMM1
MOVDQA XMM6, XMM5
PSUBSW XMM5, XMM2
MOV EDI, [EBP-20]
PADDSW XMM5, [EDI]
PADDSW XMM6, XMM2
MOVDQA [EDX+112], XMM0
MOVDQA XMM1, XMM4
MOV EDI, [EBP-16]
MOVDQA XMM0, [EDI]
PADDSW XMM4, XMM5
MOV EDI, [EBP-16]
MOVDQA XMM2, [EDI]
PMULHW XMM2, XMM4
MOVDQA [EDX+48], XMM6
PSUBSW XMM1, XMM5
PADDSW XMM7, [EDX+32]
PSUBSW XMM3, [EDX+96]
MOVDQA XMM6, [EDX]
PMULHW XMM0, XMM1
MOVDQA XMM5, [EDX+64]
PADDSW XMM5, XMM6
PSUBSW XMM6, [EDX+64]
PADDSW XMM4, XMM2
MOV EDI, [EBP-20]
POR XMM4, [EDI]
PADDSW XMM0, XMM1
MOV EDI, [EBP-20]
POR XMM0, [EDI]
MOVDQA XMM2, XMM5
PADDSW XMM5, XMM7
MOVDQA XMM1, XMM6
MOV EDI, [EBP-24]
PADDSW XMM5, [EDI]
PSUBSW XMM2, XMM7
MOVDQA XMM7, [EDX+112]
PADDSW XMM6, XMM3
MOV EDI, [EBP-24]
PADDSW XMM6, [EDI]
PADDSW XMM7, XMM5
PSRAW XMM7, SHIFTINVCOL
PSUBSW XMM1, XMM3
MOV EDI, [EBP-28]
PADDSW XMM1, [EDI]
MOVDQA XMM3, XMM6
MOV EDI, [EBP-28]
PADDSW XMM2, [EDI]
PADDSW XMM6, XMM4
MOVDQA [EDX], XMM7
PSRAW XMM6, SHIFTINVCOL
MOVDQA XMM7, XMM1
PADDSW XMM1, XMM0
MOVDQA [EDX+16], XMM6
PSRAW XMM1, SHIFTINVCOL
MOVDQA XMM6, [EDX+48]
PSUBSW XMM7, XMM0
PSRAW XMM7, SHIFTINVCOL
MOVDQA [EDX+32], XMM1
PSUBSW XMM5, [EDX+112]
PSRAW XMM5, SHIFTINVCOL
MOVDQA [EDX+112], XMM5
PSUBSW XMM3, XMM4
PADDSW XMM6, XMM2
PSUBSW XMM2, [EDX+48]
PSRAW XMM6, SHIFTINVCOL
PSRAW XMM2, SHIFTINVCOL
MOVDQA [EDX+48], XMM6
PSRAW XMM3, SHIFTINVCOL
MOVDQA [EDX+64], XMM2
MOVDQA [EDX+80], XMM7
MOVDQA [EDX+96], XMM3
END DCT8INVCOLSSE2;
PROCEDURE -MOVQ1;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX]
MOVQ MMX1, [ECX+8]
END MOVQ1;
PROCEDURE -MOVQ2;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+16]
MOVQ [EDX], MMX3
MOVQ MMX1, [ECX+24]
MOVQ [EDX+8], MMX7
END MOVQ2;
PROCEDURE -MOVQ3;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+32]
MOVQ [EDX+16], MMX3
MOVQ MMX1, [ECX+40]
MOVQ [EDX+24], MMX7
END MOVQ3;
PROCEDURE -MOVQ4;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+48]
MOVQ [EDX+32], MMX3
MOVQ MMX1, [ECX+56]
MOVQ [EDX+40], MMX7
END MOVQ4;
PROCEDURE -MOVQ5;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+64]
MOVQ [EDX+48], MMX3
MOVQ MMX1, [ECX+72]
MOVQ [EDX+56], MMX7
END MOVQ5;
PROCEDURE -MOVQ6;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+80]
MOVQ [EDX+64], MMX3
MOVQ MMX1, [ECX+88]
MOVQ [EDX+72], MMX7
END MOVQ6;
PROCEDURE -MOVQ7;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+96]
MOVQ [EDX+80], MMX3
MOVQ MMX1, [ECX+104]
MOVQ [EDX+88], MMX7
END MOVQ7;
PROCEDURE -MOVQ8;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [ECX+112]
MOVQ [EDX+96], MMX3
MOVQ MMX1, [ECX+120]
MOVQ [EDX+104], MMX7
END MOVQ8;
PROCEDURE -MOVQ9;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ [EDX+112], MMX3
MOVQ MMX0, [EDX+80]
MOVQ [EDX+120], MMX7
END MOVQ9;
PROCEDURE -MOVQ10;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOVQ MMX0, [EDX+88]
ADD EDX, 8
END MOVQ10;
PROCEDURE TransformSSE*(src, dst: LONGINT);
VAR tg116, tg216, tg316, cos416, onecorr, roundinvcol, roundinvcorr: LONGINT;
BEGIN
tg116 := tg116adr;
tg216 := tg216adr;
tg316 := tg316adr;
cos416 := cos416adr;
onecorr := onecorradr;
roundinvcol := roundinvcoladr;
roundinvcorr := roundinvcorradr;
Machine.SetECX(src);
Machine.SetEDX(dst);
Machine.SetEBX(roundinvrowadr );
MOVQ1;
Machine.SetESI(tabi04adr );
DCT8INVROWSSE;
MOVQ2;
Machine.SetESI(tabi17adr );
DCT8INVROWSSE;
MOVQ3;
Machine.SetESI(tabi26adr );
DCT8INVROWSSE;
MOVQ4;
Machine.SetESI(tabi35adr );
DCT8INVROWSSE;
MOVQ5;
Machine.SetESI(tabi04adr );
DCT8INVROWSSE;
MOVQ6;
Machine.SetESI(tabi35adr );
DCT8INVROWSSE;
MOVQ7;
Machine.SetESI(tabi26adr );
DCT8INVROWSSE;
MOVQ8;
Machine.SetESI(tabi17adr );
DCT8INVROWSSE;
MOVQ9;
DCT8INVCOLSSE;
MOVQ10;
DCT8INVCOLSSE;
END TransformSSE;
PROCEDURE -DCT8INVROWSSE;
CODE{SYSTEM.i386, SYSTEM.SSE2, SYSTEM.MMX}
MOVQ MMX2, MMX0
MOVQ MMX3, [ESI]
PSHUFW MMX0, MMX0, 88H
MOVQ MMX4, [ESI+8]
MOVQ MMX5, MMX1
PMADDWD MMX3, MMX0
MOVQ MMX6, [ESI+32]
PSHUFW MMX1, MMX1, 88H
PMADDWD MMX4, MMX1
MOVQ MMX7, [ESI+40]
PSHUFW MMX2, MMX2, 0DDH
PMADDWD MMX6, MMX2
PSHUFW MMX5, MMX5, 0DDH
PMADDWD MMX7, MMX5
PADDD MMX3, [EBX] ; [EBX] = roundinvrowadr
PMADDWD MMX0, [ESI+16]
PADDD MMX3, MMX4
PMADDWD MMX1, [ESI+24]
MOVQ MMX4, MMX3
PMADDWD MMX2, [ESI+48]
PADDD MMX6, MMX7
PMADDWD MMX5, [ESI+56]
PADDD MMX3, MMX6
PADDD MMX0, [EBX] ; [EBX] = roundinvrowadr
PSRAD MMX3, SHIFTINVROW
PADDD MMX0, MMX1
PSUBD MMX4, MMX6
MOVQ MMX7, MMX0
PADDD MMX2, MMX5
PADDD MMX0, MMX2
PSRAD MMX4, SHIFTINVROW
PSUBD MMX7, MMX2
PSRAD MMX0, SHIFTINVROW
PSRAD MMX7, SHIFTINVROW
PACKSSDW MMX3, MMX0
PACKSSDW MMX7, MMX4
PSHUFW MMX7, MMX7, 0B1H
END DCT8INVROWSSE;
PROCEDURE -DCT8INVCOLSSE;
CODE{SYSTEM.i386, SYSTEM.SSE, SYSTEM.MMX}
MOV EDI, [EBP-12]
MOVQ MMX1, [EDI]
MOVQ MMX2, MMX0
MOVQ MMX3, [EDX+48]
PMULHW MMX0, MMX1
MOVQ MMX4, [EDX+112]
PMULHW MMX1, MMX3
MOV EDI, [EBP-4]
MOVQ MMX5, [EDI]
MOVQ MMX6, MMX4
PMULHW MMX4, MMX5
PADDSW MMX0, MMX2
PMULHW MMX5, [EDX+16]
PADDSW MMX1, MMX3
MOVQ MMX7, [EDX+96]
PADDSW MMX0, MMX3
MOV EDI, [EBP-8]
MOVQ MMX3, [EDI]
PSUBSW MMX2, MMX1
PMULHW MMX7, MMX3
MOVQ MMX1, MMX0
PMULHW MMX3, [EDX-32]
PSUBSW MMX5, MMX6
PADDSW MMX4, [EDX+16]
PADDSW MMX0, MMX4
MOV EDI, [EBP-20]
PADDSW MMX0, [EDI]
PSUBSW MMX4, MMX1
PADDSW MMX7, [EDX+32]
MOVQ MMX6, MMX5
PSUBSW MMX3, [EDX+96]
PSUBSW MMX5, MMX2
MOV EDI, [EBP-20]
PADDSW MMX5, [EDI]
PADDSW MMX6, MMX2
MOVQ [EDX+112], MMX0
MOVQ MMX1, MMX4
MOV EDI, [EBP-16]
MOVQ MMX2, [EDI]
PADDSW MMX4, MMX5
MOV EDI, [EBP]
MOVQ MMX0, [EDI-16]
PMULHW MMX2, MMX4
MOVQ [EDX+48], MMX6
PSUBSW MMX1, MMX5
MOVQ MMX6, [EDX]
PMULHW MMX0, MMX1
MOVQ MMX5, [EDX+64]
PADDSW MMX4, MMX2
MOV EDI, [EBP-20]
POR MMX4, [EDI]
PADDSW MMX5, MMX6
PSUBSW MMX6, [EDX+64]
PADDSW MMX0, MMX1
MOV EDI, [EBP-20]
POR MMX0, [EDI]
MOVQ MMX2, MMX5
PADDSW MMX5, MMX7
MOVQ MMX1, MMX6
MOV EDI, [EBP-24]
PADDSW MMX5, [EDI]
PSUBSW MMX2, MMX7
MOVQ MMX7, [EDX+112]
PADDSW MMX6, MMX3
MOV EDI, [EBP-24]
PADDSW MMX6, [EDI]
PADDSW MMX7, MMX5
PSRAW MMX7, SHIFTINVCOL
PSUBSW MMX1, MMX3
MOV EDI, [EBP-28]
PADDSW MMX2, [EDI]
MOVQ MMX3, MMX6
MOV EDI, [EBP-28]
PADDSW MMX1, [EDI]
PADDSW MMX6, MMX4
MOVQ [EDX], MMX7
PSRAW MMX6, SHIFTINVCOL
MOVQ MMX7, MMX1
PADDSW MMX1, MMX0
MOVQ [EDX+16], MMX6
PSRAW MMX1, SHIFTINVCOL
MOVQ MMX6, [EDX+48]
PSUBSW MMX7, MMX0
PADDSW MMX6, MMX2
PSUBSW MMX2, [EDX+48]
PSRAW MMX7, SHIFTINVCOL
MOVQ [EDX+32], MMX1
PSRAW MMX6, SHIFTINVCOL
PSUBSW MMX5, [EDX+112]
PSRAW MMX2, SHIFTINVCOL
MOVQ [EDX+48], MMX6
PSUBSW MMX3, MMX4
MOVQ [EDX+64], MMX2
PSRAW MMX3, SHIFTINVCOL
MOVQ [EDX+80], MMX7
PSRAW MMX5, SHIFTINVCOL
MOVQ [EDX+96], MMX3
MOVQ [EDX+112], MMX5
END DCT8INVCOLSSE;
PROCEDURE TransformINT*(src, dst: LONGINT);
VAR i: LONGINT;
BEGIN
Row(src + 0);
Row(src + 16);
Row(src + 32);
Row(src + 48);
Row(src + 64);
Row(src + 80);
Row(src + 96);
Row(src + 112);
Col( src + 0);
Col( src + 2);
Col( src + 4);
Col( src + 6);
Col( src + 8);
Col( src + 10);
Col( src + 12);
Col( src + 14);
FOR i := 0 TO 63 DO
SYSTEM.PUT16(dst + SYSTEM.SIZEOF(INTEGER) * i, SYSTEM.GET16(src + SYSTEM.SIZEOF(INTEGER) * i) )
END
END TransformINT;
PROCEDURE Row( src: LONGINT);
VAR
x0, x1, x2, x3, x4, x5, x6, x7, x8: LONGINT;
adr, tempAdr: LONGINT;
BEGIN
adr := src;
x1 := LONG(SYSTEM.GET16( adr + 4*SYSTEM.SIZEOF(INTEGER))) * 2048;
x2 := LONG(SYSTEM.GET16( adr + 6*SYSTEM.SIZEOF(INTEGER)));
x3 := LONG(SYSTEM.GET16( adr + 2*SYSTEM.SIZEOF(INTEGER)));
x4 := LONG(SYSTEM.GET16( adr + 1*SYSTEM.SIZEOF(INTEGER)));
x5 := LONG(SYSTEM.GET16( adr + 7*SYSTEM.SIZEOF(INTEGER)));
x6 := LONG(SYSTEM.GET16( adr + 5*SYSTEM.SIZEOF(INTEGER)));
x7 := LONG(SYSTEM.GET16( adr + 3*SYSTEM.SIZEOF(INTEGER)));
IF ( x1 = 0 ) & ( x2 = 0 ) & ( x3 = 0 ) & ( x4 = 0 ) & ( x5 = 0 ) & ( x6 = 0 ) & ( x7 = 0 ) THEN
x0 := SYSTEM.GET16( adr ) * 8;
SYSTEM.PUT16( adr , x0 );
tempAdr := adr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(x0));
RETURN
END;
x0 := ( LONG(SYSTEM.GET16( adr )) * 2048 ) + 128;
x8 := W7 * ( x4 + x5 );
x4 := x8 + ( W1 - W7 ) * x4;
x5 := x8 - ( W1 + W7 ) * x5;
x8 := W3 * ( x6 + x7 );
x6 := x8 - ( W3 - W5 ) * x6;
x7 := x8 - ( W3 + W5 ) * x7;
x8 := x0 + x1;
x0 := x0 - x1;
x1 := W6 * ( x3 + x2 );
x2 := x1 - ( W2 + W6 ) * x2;
x3 := x1 + ( W2 - W6 ) * x3;
x1 := x4 + x6;
x4 := x4 - x6;
x6 := x5 + x7;
x5 := x5 - x7;
x7 := x8 + x3;
x8 := x8 - x3;
x3 := x0 + x2;
x0 := x0 - x2;
x2 := ( 181 * ( x4 + x5 ) + 128 ) DIV 256;
x4 := ( 181 * ( x4 - x5 ) + 128 ) DIV 256;
SYSTEM.PUT16( adr, SHORT(( x7 + x1 ) DIV 256));
tempAdr := adr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x3 + x2 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x0 + x4 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x8 + x6 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x8 - x6 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x0 - x4 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x3 - x2 ) DIV 256));
tempAdr := tempAdr + SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SHORT(( x7 - x1 ) DIV 256))
END Row;
PROCEDURE Col( src: LONGINT);
VAR
x0, x1, x2, x3, x4, x5, x6, x7, x8: LONGINT;
adr, tempAdr, sourceAdr: LONGINT;
BEGIN
adr := src;
x1 := LONG(SYSTEM.GET16( adr + 32*SYSTEM.SIZEOF(INTEGER))) * 256;
x2 := LONG(SYSTEM.GET16( adr + 48*SYSTEM.SIZEOF(INTEGER)));
x3 := LONG(SYSTEM.GET16( adr + 16*SYSTEM.SIZEOF(INTEGER)));
x4 := LONG(SYSTEM.GET16( adr + 8*SYSTEM.SIZEOF(INTEGER)));
x5 := LONG(SYSTEM.GET16( adr + 56*SYSTEM.SIZEOF(INTEGER)));
x6 := LONG(SYSTEM.GET16( adr + 40*SYSTEM.SIZEOF(INTEGER)));
x7 := LONG(SYSTEM.GET16( adr + 24*SYSTEM.SIZEOF(INTEGER)));
IF ( x1 = 0 ) & ( x2 = 0 ) & ( x3 = 0 ) & ( x4 = 0 ) & ( x5 = 0 ) & ( x6 = 0 ) & ( x7 = 0 ) THEN
x0 := LONG(intTab[(( SYSTEM.GET16(adr) + 32 ) DIV 64 ) + 512]);
SYSTEM.PUT16( adr , SHORT(x0));
tempAdr := adr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr , SHORT(x0));
RETURN
END;
x0 := (LONG(SYSTEM.GET16( adr))* 256) + 8192;
x8 := W7 * ( x4 + x5 ) + 4;
x4 := ( x8 + ( W1 - W7 ) * x4 ) DIV 8;
x5 := ( x8 - ( W1 + W7) * x5 ) DIV 8;
x8 := W3 * ( x6 + x7 ) + 4;
x6 := ( x8 - ( W3 - W5 ) * x6 )DIV 8;
x7 := ( x8 - ( W3 + W5 ) * x7 ) DIV 8;
x8 := x0 + x1;
x0 := x0 - x1;
x1 := W6 * ( x3 + x2 ) + 4;
x2 := ( x1 - ( W2 + W6 ) * x2 ) DIV 8;
x3 := ( x1 + ( W2 - W6 ) * x3 ) DIV 8;
x1 := x4 + x6;
x4 := x4 - x6;
x6 := x5 + x7;
x5 := x5 - x7;
x7 := x8 + x3;
x8 := x8 - x3;
x3 := x0 + x2;
x0 := x0 - x2;
x2 := ( 181 * ( x4 + x5 ) + 128 ) DIV 256;
x4 := ( 181 * ( x4 - x5 ) + 128 ) DIV 256;
tempAdr := adr;
sourceAdr := SYSTEM.ADR( intTab[512] );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x7 + x1 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x3 + x2 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x0 + x4 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x8 + x6 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x8 - x6 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x0 - x4 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x3 - x2 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ));
tempAdr := tempAdr + 8*SYSTEM.SIZEOF( INTEGER );
SYSTEM.PUT16( tempAdr, SYSTEM.GET16( ( ( ( x7 - x1 ) DIV 16384 )*SYSTEM.SIZEOF(INTEGER) ) + sourceAdr ))
END Col;
PROCEDURE FillTablesSSE2;
VAR i: LONGINT;
BEGIN
NEW(M128onecorr, 12);
FOR i := 4 TO 11 DO M128onecorr[i] := 1 END;
onecorradr := SYSTEM.ADR(M128onecorr[4]);
NEW(M128roundinvrow, 12);
M128roundinvrow[4] := RNDINVROW; M128roundinvrow[5] := 0; M128roundinvrow[6] := RNDINVROW; M128roundinvrow[7] := 0;
M128roundinvrow[8] := RNDINVROW; M128roundinvrow[9] := 0; M128roundinvrow[10] := RNDINVROW; M128roundinvrow[11] := 0;
roundinvrowadr := SYSTEM.ADR(M128roundinvrow[4]);
NEW(M128roundinvcol, 12);
FOR i := 4 TO 11 DO M128roundinvcol[i] := RNDINVCOL END;
roundinvcoladr := SYSTEM.ADR(M128roundinvcol[4]);
NEW(M128roundinvcorr, 12);
FOR i := 4 TO 11 DO M128roundinvcorr[i] := RNDINVCORR END;
roundinvcorradr := SYSTEM.ADR(M128roundinvcorr[4]);
NEW(M128tg116, 12);
FOR i := 4 TO 11 DO M128tg116[i] := 13036 END;
tg116adr := SYSTEM.ADR(M128tg116[4]);
NEW(M128tg216, 12);
FOR i := 4 TO 11 DO M128tg216[i] := 21746 END;
tg216adr := SYSTEM.ADR(M128tg216[4]);
NEW(M128tg316, 12);
FOR i := 4 TO 11 DO M128tg316[i] := -21746 END;
tg316adr := SYSTEM.ADR(M128tg316[4]);
NEW(M128cos416, 12);
FOR i := 4 TO 11 DO M128cos416[i] := -19195 END;
cos416adr := SYSTEM.ADR(M128cos416[4]);
NEW(M128tabi04, 36);
M128tabi04[4] := 16384; M128tabi04[5] := 21407; M128tabi04[6] := 16384; M128tabi04[7] := 8867;
M128tabi04[8] := 16384; M128tabi04[9] := -8867; M128tabi04[10] := 16384; M128tabi04[11] := -21407;
M128tabi04[12] := 16384; M128tabi04[13] := 8867; M128tabi04[14] := -16384; M128tabi04[15] := -21407;
M128tabi04[16] := -16384; M128tabi04[17] := 21407; M128tabi04[18] := 16384; M128tabi04[19] := -8867;
M128tabi04[20] := 22725; M128tabi04[21] := 19266; M128tabi04[22] := 19266; M128tabi04[23] := -4520;
M128tabi04[24] := 12873; M128tabi04[25] := -22725; M128tabi04[26] := 4520; M128tabi04[27] := -12873;
M128tabi04[28] := 12873; M128tabi04[29] := 4520; M128tabi04[30] := -22725; M128tabi04[31] := -12873;
M128tabi04[32] := 4520; M128tabi04[33] := 19266; M128tabi04[34] := 19266; M128tabi04[35] := -22725;
tabi04adr := SYSTEM.ADR(M128tabi04[4]);
NEW(M128tabi17, 36);
M128tabi17[4] := 22725; M128tabi17[5] := 29692; M128tabi17[6] := 22725; M128tabi17[7] := 12299;
M128tabi17[8] := 22725; M128tabi17[9] := -12299; M128tabi17[10] := 22725; M128tabi17[11] := -29692;
M128tabi17[12] := 22725; M128tabi17[13] := 12299; M128tabi17[14] := -22725; M128tabi17[15] := -29692;
M128tabi17[16] := -22725; M128tabi17[17] := 29692; M128tabi17[18] := 22725; M128tabi17[19] := -12299;
M128tabi17[20] := 31521; M128tabi17[21] := 26722; M128tabi17[22] := 26722; M128tabi17[23] := -6270;
M128tabi17[24] := 17855; M128tabi17[25] := -31521; M128tabi17[26] := 6270; M128tabi17[27] := -17855;
M128tabi17[28] := 17855; M128tabi17[29] := 6270; M128tabi17[30] := -31521; M128tabi17[31] := -17855;
M128tabi17[32] := 6270; M128tabi17[33] := 26722; M128tabi17[34] := 26722; M128tabi17[35] := -31521;
tabi17adr := SYSTEM.ADR(M128tabi17[4]);
NEW(M128tabi26, 36);
M128tabi26[4] := 21407; M128tabi26[5] := 27969; M128tabi26[6] := 21407; M128tabi26[7] := 11585;
M128tabi26[8] := 21407; M128tabi26[9] := -11585; M128tabi26[10] := 21407; M128tabi26[11] := -27969;
M128tabi26[12] := 21407; M128tabi26[13] := 11585; M128tabi26[14] := -21407; M128tabi26[15] := -27969;
M128tabi26[16] := -21407; M128tabi26[17] := 27969; M128tabi26[18] := 21407; M128tabi26[19] := -11585;
M128tabi26[20] := 29692; M128tabi26[21] := 25172; M128tabi26[22] := 25172; M128tabi26[23] := -5906;
M128tabi26[24] := 16819; M128tabi26[25] := -29692; M128tabi26[26] := 5906; M128tabi26[27] := -16819;
M128tabi26[28] := 16819; M128tabi26[29] := 5906; M128tabi26[30] := -29692; M128tabi26[31] := -16819;
M128tabi26[32] := 5906; M128tabi26[33] := 25172; M128tabi26[34] := 25172; M128tabi26[35] := -29692;
tabi26adr := SYSTEM.ADR(M128tabi26[4]);
NEW(M128tabi35, 36);
M128tabi35[4] := 19266; M128tabi35[5] := 25172; M128tabi35[6] := 19266; M128tabi35[7] := 10426;
M128tabi35[8] := 19266; M128tabi35[9] := -10426; M128tabi35[10] := 19266; M128tabi35[11] := -25172;
M128tabi35[12] := 19266; M128tabi35[13] := 10426; M128tabi35[14] := -19266; M128tabi35[15] := -25172;
M128tabi35[16] := -19266; M128tabi35[17] := 25172; M128tabi35[18] := 19266; M128tabi35[19] := -10426;
M128tabi35[20] := 26722; M128tabi35[21] := 22654; M128tabi35[22] := 22654; M128tabi35[23] := -5315;
M128tabi35[24] := 15137; M128tabi35[25] := -26722; M128tabi35[26] := 5315; M128tabi35[27] := -15137;
M128tabi35[28] := 15137; M128tabi35[29] := 5315; M128tabi35[30] := -26722; M128tabi35[31] := -15137;
M128tabi35[32] := 5315; M128tabi35[33] := 22654; M128tabi35[34] := 22654; M128tabi35[35] := -26722;
tabi35adr := SYSTEM.ADR(M128tabi35[4]);
END FillTablesSSE2;
PROCEDURE FillTablesSSE;
VAR i: LONGINT;
BEGIN
NEW(M128onecorr, 4);
FOR i := 0 TO 3 DO M128onecorr[i] := 1 END;
onecorradr := SYSTEM.ADR(M128onecorr[0]);
NEW(M128roundinvrow, 4);
M128roundinvrow[0] := RNDINVROW; M128roundinvrow[1] := 0; M128roundinvrow[2] := RNDINVROW; M128roundinvrow[3] := 0;
roundinvrowadr := SYSTEM.ADR(M128roundinvrow[0]);
NEW(M128roundinvcol, 4);
FOR i := 0 TO 3 DO M128roundinvcol[i] := RNDINVCOL END;
roundinvcoladr := SYSTEM.ADR(M128roundinvcol[0]);
NEW(M128roundinvcorr, 4);
FOR i := 0 TO 3 DO M128roundinvcorr[i] := RNDINVCORR END;
roundinvcorradr := SYSTEM.ADR(M128roundinvcorr[0]);
NEW(M128tg116, 4);
FOR i := 0 TO 3 DO M128tg116[i] := 13036 END;
tg116adr := SYSTEM.ADR(M128tg116[0]);
NEW(M128tg216, 4);
FOR i := 0 TO 3 DO M128tg216[i] := 21746 END;
tg216adr := SYSTEM.ADR(M128tg216[0]);
NEW(M128tg316, 4);
FOR i := 0 TO 3 DO M128tg316[i] := -21746 END;
tg316adr := SYSTEM.ADR(M128tg316[0]);
NEW(M128cos416, 4);
FOR i := 0 TO 3 DO M128cos416[i] := -19195 END;
cos416adr := SYSTEM.ADR(M128cos416[0]);
NEW(M128tabi04, 32);
M128tabi04[0] := 16384; M128tabi04[1] := 21407; M128tabi04[2] := 16384; M128tabi04[3] := 8867;
M128tabi04[4] := 16384; M128tabi04[5] := 8867; M128tabi04[6] := -16384; M128tabi04[7] := -21407;
M128tabi04[8] := 16384; M128tabi04[9] := -8867; M128tabi04[10] := 16384; M128tabi04[11] := -21407;
M128tabi04[12] := -16384; M128tabi04[13] := 21407; M128tabi04[14] := 16384; M128tabi04[15] := -8867;
M128tabi04[16] := 22725; M128tabi04[17] := 19266; M128tabi04[18] := 19266; M128tabi04[19] := -4520;
M128tabi04[20] := 12873; M128tabi04[21] := 4520; M128tabi04[22] := -22725; M128tabi04[23] := -12873;
M128tabi04[24] := 12873; M128tabi04[25] := -22725; M128tabi04[26] := 4520; M128tabi04[27] := -12873;
M128tabi04[28] := 4520; M128tabi04[29] := 19266; M128tabi04[30] := 19266; M128tabi04[31] := -22725;
tabi04adr := SYSTEM.ADR(M128tabi04[0]);
NEW(M128tabi17, 32);
M128tabi17[0] := 22725; M128tabi17[1] := 29692; M128tabi17[2] := 22725; M128tabi17[3] := 12299;
M128tabi17[4] := 22725; M128tabi17[5] := 12299; M128tabi17[6] := -22725; M128tabi17[7] := -29692;
M128tabi17[8] := 22725; M128tabi17[9] := -12299; M128tabi17[10] := 22725; M128tabi17[11] := -29692;
M128tabi17[12] := -22725; M128tabi17[13] := 29692; M128tabi17[14] := 22725; M128tabi17[15] := -12299;
M128tabi17[16] := 31521; M128tabi17[17] := 26722; M128tabi17[18] := 26722; M128tabi17[19] := -6270;
M128tabi17[20] := 17855; M128tabi17[21] := 6270; M128tabi17[22] := -31521; M128tabi17[23] := -17855;
M128tabi17[24] := 17855; M128tabi17[25] := -31521; M128tabi17[26] := 6270; M128tabi17[27] := -17855;
M128tabi17[28] := 6270; M128tabi17[29] := 26722; M128tabi17[30] := 26722; M128tabi17[31] := -31521;
tabi17adr := SYSTEM.ADR(M128tabi17[0]);
NEW(M128tabi26, 32);
M128tabi26[0] := 21407; M128tabi26[1] := 27969; M128tabi26[2] := 21407; M128tabi26[3] := 11585;
M128tabi26[4] := 21407; M128tabi26[5] := 11585; M128tabi26[6] := -21407; M128tabi26[7] := -27969;
M128tabi26[8] := 21407; M128tabi26[9] := -11585; M128tabi26[10] := 21407; M128tabi26[11] := -27969;
M128tabi26[12] := -21407; M128tabi26[13] := 27969; M128tabi26[14] := 21407; M128tabi26[15] := -11585;
M128tabi26[16] := 29692; M128tabi26[17] := 25172; M128tabi26[18] := 25172; M128tabi26[19] := -5906;
M128tabi26[20] := 16819; M128tabi26[21] := 5906; M128tabi26[22] := -29692; M128tabi26[23] := -16819;
M128tabi26[24] := 16819; M128tabi26[25] := -29692; M128tabi26[26] := 5906; M128tabi26[27] := -16819;
M128tabi26[28] := 5906; M128tabi26[29] := 25172; M128tabi26[30] := 25172; M128tabi26[31] := -29692;
tabi26adr := SYSTEM.ADR(M128tabi26[0]);
NEW(M128tabi35, 32);
M128tabi35[0] := 19266; M128tabi35[1] := 25172; M128tabi35[2] := 19266; M128tabi35[3] := 10426;
M128tabi35[4] := 19266; M128tabi35[5] := 10426; M128tabi35[6] := -19266; M128tabi35[7] := -25172;
M128tabi35[8] := 19266; M128tabi35[9] := -10426; M128tabi35[10] := 19266; M128tabi35[11] := -25172;
M128tabi35[12] := -19266; M128tabi35[13] := 25172; M128tabi35[14] := 19266; M128tabi35[15] := -10426;
M128tabi35[16] := 26722; M128tabi35[17] := 22654; M128tabi35[18] := 22654; M128tabi35[19] := -5315;
M128tabi35[20] := 15137; M128tabi35[21] := 5315; M128tabi35[22] := -26722; M128tabi35[23] := -15137;
M128tabi35[24] := 15137; M128tabi35[25] := -26722; M128tabi35[26] := 5315; M128tabi35[27] := -15137;
M128tabi35[28] := 5315; M128tabi35[29] := 22654; M128tabi35[30] := 22654; M128tabi35[31] := -26722;
tabi35adr := SYSTEM.ADR(M128tabi35[0]);
END FillTablesSSE;
PROCEDURE FillTablesINT;
VAR i: INTEGER;
BEGIN
NEW(intTab, 1024);
FOR i := -512 TO 511 DO
IF i < -256 THEN
intTab[i + 512] := -256
ELSIF i > 255 THEN
intTab[i + 512] := 255
ELSE
intTab[i + 512] := i
END;
END;
END FillTablesINT;
PROCEDURE CheckFeatures;
BEGIN
IF FALSE & Machine.SSE2Support THEN
FillTablesSSE2;
Transform := TransformSSE2;
status := SSE2;
KernelLog.String("IDCT: SSE2 method"); KernelLog.Ln;
ELSIF FALSE & Machine.SSESupport THEN
FillTablesSSE;
Transform := TransformSSE;
status := SSE;
KernelLog.String("IDCT: SSE method"); KernelLog.Ln;
ELSE
FillTablesINT;
Transform := TransformINT;
status := INT;
KernelLog.String("IDCT: INT method"); KernelLog.Ln;
END
END CheckFeatures;
PROCEDURE Change*(context : Commands.Context);
VAR name: ARRAY 12 OF CHAR;
BEGIN
context.arg.String(name);
IF name = "SSE2" THEN
FillTablesSSE2;
Transform := TransformSSE2;
status := SSE2;
context.out.String("IDCT: SSE2 method"); context.out.Ln;
ELSIF name = "SSE" THEN
FillTablesSSE;
Transform := TransformSSE;
status := SSE;
context.out.String("IDCT: SSE method"); context.out.Ln;
ELSIF name = "INT" THEN
FillTablesINT;
Transform := TransformINT;
status := INT;
context.out.String("IDCT: INT method"); context.out.Ln;
ELSE
CheckFeatures;
END;
END Change;
BEGIN
RNDINVROW := 1024 * (6 - BITSINVACC);
RNDINVCOL := 16 * (BITSINVACC - 3);
RNDINVCORR := RNDINVCOL - 1;
CheckFeatures;
END IDCT.
IDCT.Change INT ~
IDCT.Change SSE ~
IDCT.Change SSE2 ~
IDCT.Obx ~
SystemTools.Free IDCT ~