MODULE Machine;	(** AUTHOR "pjm"; PURPOSE "Bootstrapping, configuration and machine interface"; *)

(* The code of this module body must be the first in the statically linked boot file. *)

IMPORT SYSTEM, Trace;

CONST
	Version = "A2 Revision 4186 (17.06.2011)";

	MaxCPU* = 8;	(** maximum number of processors (up to 16) *)

	DefaultObjectFileExtension* = ".Obx";

	(** bits in features variable *)
	MTTR* = 12; MMX* = 23; HTT* = 28;

	MaxDisks = 2;	(* maximum number of disks with BIOS parameters *)

	HeapAdr = 100000H;

	MaxMemTop = SHORT(80000000H);

	DefaultDMASize = 20;	(* default size of ISA DMA area in KB *)

CONST
	StrongChecks = FALSE;	(* perform strong checks *)
	Stats* = FALSE;	(* acquire statistics *)
	TimeCount = 0	(* 100000 *);	(* number of lock tries before checking timeout - 0 to disable *)

	(** standard lock levels (in order) *)	(* also refer to Traps.Show *)
	TraceOutput* = 0;		(* Trace output *)
	Memory* = 1;			(* Virtual memory management, stack and page allocation *)
	Heaps* = 2;				(* Storage allocation and Garbage collection *)
	Interrupts* = 3	;		(* Interrupt handling. *)
	Modules* = 4;			(* Module list *)
	Objects* = 5;			(* Ready queue *)
	Processors* = 6;	(* Interprocessor interrupts *)
	KernelLog* = 7;			(* Atomic output *)
	(** highest level is all object locks *)

	Preemption* = 31;	(** flag for BreakAll *)

	MaxLocks = 8;	(* { <= 32 } *)

	LowestLock = 0; HighestLock = MaxLocks-1;

CONST
	TraceVerbose = FALSE;	(* write out verbose trace info *)

	AddressSize = SYSTEM.SIZEOF(SYSTEM.ADDRESS);
	SetSize = MAX (SET) + 1;

	(** error codes *)
	Ok* = 0;

	(* standard multipliers *)
	K = 1024; M = 100000H; (* 1K, 1M *)

	(* paging sizes *)
	PS = 4096;			(* page size in bytes *)
	PSlog2 = 12;		(* ASH(1, PSlog2) = PS *)
	RS = 4*M;			(* region covered by a page table in bytes *)
	PTEs = RS DIV PS;	(* number of page table/directory entries *)

	ReservedPages = 8;	(* pages reserved on page heap (not for normal heap use) *)

	NilAdr* = -1;		(** nil value for addresses (not same as pointer NIL value) *)

	(* free page stack page node layout *)
	NodeSP = 0;
	NodeNext = AddressSize;
	NodePrev = AddressSize*2;
	MinSP = AddressSize*3; MaxSP = PS;

(*
0				sp
AddressSize		nextAdr
AddressSize*2	prevAdr
AddressSize*3	first entry
4092			last entry
*)

	(* virtual memory layout. no area will cross the 2G boundary, to avoid LONGINT sign problems. *)
	MapAreaAdr = SHORT(80000000H);	(* dynamic mappings: bottom part of 2G..4G *)
	MapAreaSize = 64*M;
	IntelAreaAdr = SHORT(0FEE00000H);	(* reserved by Intel for APIC: 4G-18M..4G-18M+4K *)
	IntelAreaSize = 00001000H;
	StackAreaAdr = MapAreaAdr+MapAreaSize;	(* stacks: middle part of 2G..4G *)
	StackAreaSize = IntelAreaAdr-StackAreaAdr;

	(* stack sizes *)
	KernelStackSize = 2*PS;		(* multiple of PS *)
	MaxUserStackSize = 128*K;	(* multiple of PS *)
	InitUserStackSize = PS;		(* must be PS (or change NewStack) *)
	UserStackGuardSize = PS;	(* multiple of PS left unallocated at bottom of stack virtual area *)
	MaxUserStacks = StackAreaSize DIV MaxUserStackSize;

	(* physical memory layout *)
	LowAdr = PS;				(* lowest physical address used *)
	LinkAdr = M;				(* address where kernel is linked, also address where heap begins *)
	StaticBlockSize = 32;		(* static heap block size *)
	BlockHeaderSize = 2 * AddressSize;
	RecordDescSize = 4 * AddressSize;  (* needs to be adapted in case Heaps.RecordDesc is changed *)

	(* gdt indices *)
	TSSOfs = 6;					(* offset in GDT of TSSs *)
	StackOfs = TSSOfs + MaxCPU;	(* offset in GDT of stacks *)
	GDTSize = StackOfs + MaxCPU;

	(* gdt selectors *)
	KernelCodeSel = 1*8;		(* selector 1 in gdt, RPL 0 *)
	KernelStackSel = 2*8;		(* selector 2 in gdt, RPL 0 *)
	UserCodeSel = 3*8 + 3;		(* selector 3 in gdt, RPL 3 *)
	DataSel = 4*8;				(* selector 4 in gdt, RPL 0 *)
	UserStackSel = 5*8 + 3;		(* selector 5 in gdt, RPL 3 *)
	KernelTR = TSSOfs*8;		(* selector in gdt, RPL 0 *)

	(* paging flags *)
	PageNotPresent = 0;		(* not present page *)
	KernelPage = 3;				(* supervisor, present, r/w *)
	UserPage = 7;				(* user, present, r/w *)

	HeapMin = 50;				(* "minimum" heap size as percentage of total memory size (used for heap expansion in scope of GC ) *)
	HeapMax = 95;				(* "maximum" heap size as percentage of total memory size (used for heap expansion in scope of GC) *)
	ExpandRate = 1;				(* always extend heap with at least this percentage of total memory size *)
	Threshold = 10;				(* periodic GC initiated when this percentage of total memory size bytes has "passed through" NewBlock *)
	InitialHeapIncrement = 4096;

	HeaderSize = 40H; (* cf. Linker0 *)
	EndBlockOfs = 38H;	(* cf. Linker0 *)
	MemoryBlockOfs = BlockHeaderSize + RecordDescSize + BlockHeaderSize; (* memory block (including header) starts at offset HeaderSize *)

CONST
	(** pre-defined interrupts 0-31, used with InstallHandler *)
	DE* = 0; DB* = 1; NMI* = 2; BP* = 3; OVF* = 4; BR* = 5; UD* = 6; NM* = 7;
	DF* = 8; TS* = 10; NP* = 11; SSF* = 12; GP* = 13; PF* = 14; MF*= 16; AC*= 17; MC* = 18;

	IRQ0* = 32;			(* {IRQ0 MOD 8 = 0} *)
	IRQ2 = IRQ0 + 2;
	IRQ7 = IRQ0 + 7;
	IRQ8 = IRQ0 + 8;
	IRQ15 = 47;
	MaxIRQ* = IRQ15;	(** hardware interrupt numbers *)

	MPKC* = 49;		(** SMP: kernel call *)
	SoftInt* = 58;		(** temporary software interrupt *)
	MPIPCLocal* = 59;	(** SMP: local interprocessor interrupt *)
	MPTMR* = 60;		(** SMP: timer interrupt *)
	MPIPC* = 61;		(** SMP: interprocessor interrupt *)
	MPERR* = 62;		(** SMP: error interrupt *)
	MPSPU* = 63;		(** SMP: spurious interrupt {MOD 16 = 15} *)

	IDTSize = 64;
	MaxNumHandlers = 16;

	TraceSpurious = FALSE;						(* no message on spurious hardware interrupts *)
	HandleSpurious = TRUE OR TraceSpurious;	(* do not trap on spurious interrupts *)

	IntA0 = 020H;	IntA1 = 021H;	(* Interrupt Controller 1 *)
	IntB0 = 0A0H;	IntB1 = 0A1H;	(* Interrupt Controller 2 *)

	(** EFLAGS bits *)
	IFBit* = 9; VMBit* = 17;

	KernelLevel* = 0; UserLevel* = 3;	(** CS MOD 4 *)

	Second* = 1000; (* frequency of ticks increments in Hz *)

CONST
	Self* = 0; FrontBarrier* = 1; BackBarrier* = 2;	(** Broadcast flags. *)

	TraceApic = FALSE;
	TraceProcessor = FALSE;	(* remove this hack! *)

	ClockRateDelay = 50;	(* ms - delay when timing bus clock rate *)

	TimerClock = 1193180;	(* timer clock is 1.19318 MHz *)

CONST
	(* low level tracing *)
	TraceV24 = 2; TraceScreen = 0;
	TraceWidth = 80; TraceHeight = 25;
	TraceLen = TraceWidth * SYSTEM.SIZEOF (INTEGER);
	TraceSize = TraceLen * TraceHeight;

TYPE
	Vendor* = ARRAY 13 OF CHAR;

	IDMap* = ARRAY 16 OF SHORTINT;

TYPE
	Stack* = RECORD	(** values are read-only *)
		low: SYSTEM.ADDRESS;		(* lowest virtual address that may be allocated for stack *)
		adr*: SYSTEM.ADDRESS;		(* lowest address on allocated stack *)	(* exported for Objects only *)
		high*: SYSTEM.ADDRESS;	(* next virtual address after stack *)	(* exported for Objects only *)
	END;

	(* task state segment *)
	TSSDesc = RECORD	(* 1, p. 485 and p. 612 for required fields *)
		Link: LONGINT;	(* lower 16 bits significant *)
		ESP0: LONGINT;
		ESS0: LONGINT;	(* lower 16 bits significant *)
		ESP1: LONGINT;
		ESS1: LONGINT;	(* lower 16 bits significant *)
		ESP2: LONGINT;
		ESS2: LONGINT;	(* lower 16 bits significant *)
		CR3: LONGINT;
		EIP: LONGINT;
		EFLAGS: SET;
		EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI: LONGINT;
		ES, CS, SS, DS, FS, GS: LONGINT;	(* lower 16 bits significant *)
		LDT: LONGINT;	(* lower 16 bits significant *)
		TaskAttributes: INTEGER;
		IOBitmapOffset: INTEGER
	(* Implicit: IOBitmap: ARRAY 8192 DIV 4 OF SET *)
	END;

	Startup* = PROCEDURE;	(** can not be a method *)

	(* global descriptor table *)
	SegDesc = RECORD
		low, high: LONGINT
	END;
	GDT = ARRAY GDTSize OF SegDesc;

	Range* = RECORD
		adr*: SYSTEM.ADDRESS; size*: SYSTEM.SIZE;
	END;

TYPE
	(** processor state, ordering of record fields is predefined! *)
	State* = RECORD					(* offsets used in FieldInterrupt, FieldIRQ and Objects.RestoreState *)
		EDI*, ESI*, ERR*, ESP0*, EBX*, EDX*, ECX*, EAX*: LONGINT;	(** ESP0 = ADR(s.INT) *)
		INT*, BP*, PC*, CS*: LONGINT;	(* BP and ERR are exchanged by glue code, for procedure link *)
		FLAGS*: SET;
		SP*, SS*: LONGINT;			(** only valid if (VMBit IN s.EFLAGS) OR (CS MOD 4 < s.CS MOD 4) *)
		ES*, DS*, FS*, GS*: LONGINT;	(** only valid if (VMBit IN s.FLAGS) *)
	END;

	(** exception state, ordering of record fields is predefined! *)
	ExceptionState* = RECORD
		halt*: SYSTEM.ADDRESS;	(** halt code *)
		pf*: SYSTEM.ADDRESS;		(** page fault address *)
		locks*: SET;					(** active locks *)
		SP*: SYSTEM.ADDRESS;	(** actual ESP value at time of interrupt *)
		SS*, ES*, DS*, FS*, GS*: LONGINT;	(** segment registers *)
		CR*: ARRAY 5 OF LONGINT;	(** control registers *)
		DR*: ARRAY 8 OF LONGINT;	(** debug registers *)
		FPU*: ARRAY 7 OF SET		(** floating-point state *)
	END;

	Handler* = PROCEDURE {DELEGATE} (VAR state: State);

	HandlerRec = RECORD
		valid: BOOLEAN;	(* offset 0 *)
		handler: Handler	(* offset 4 *)
	END;

	GateDescriptor = RECORD
		offsetBits0to15: INTEGER;
		selector: INTEGER;
		gateType: INTEGER;
		offsetBits16to31: INTEGER
	END;
	IDT = ARRAY IDTSize OF GateDescriptor;

	SSEState* = ARRAY (512+16) OF CHAR;

TYPE
	MemoryBlock* = POINTER TO MemoryBlockDesc;
	MemoryBlockDesc* = RECORD
		next- {UNTRACED}: MemoryBlock;
		startAdr-: SYSTEM.ADDRESS; 		(* unused field for I386 *)
		size-: SYSTEM.SIZE; 					(* unused field for I386 *)
		beginBlockAdr-, endBlockAdr-: SYSTEM.ADDRESS
	END;

TYPE
	EventHandler = PROCEDURE (id: LONGINT; CONST state: State);

	Message* = POINTER TO RECORD END;	(** Broadcast message. *)
	BroadcastHandler = PROCEDURE (id: LONGINT; CONST state: State; msg: Message);

	TimeArray = ARRAY MaxCPU OF HUGEINT;

	Address32* = LONGINT;

VAR
	lowTop*: SYSTEM.ADDRESS;	(** top of low memory *)
	memTop*: SYSTEM.ADDRESS;	(** top of memory *)
	dmaSize*: SYSTEM.SIZE;	(** size of ISA dma area, above lowTop (for use in Aos.Diskettes) *)

	configMP: SYSTEM.ADDRESS;	(** MP spec config table physical address (outside reported RAM) *)
	revMP: CHAR;					(** MP spec revision *)
	featureMP: ARRAY 5 OF CHAR;	(** MP spec feature bytes 1-5 *)

	version-: ARRAY 64 OF CHAR;	(** Aos version *)

	SSESupport-: BOOLEAN;
	SSE2Support-: BOOLEAN;
	SSE3Support-: BOOLEAN; (* PH 04/11*)
	SSSE3Support-: BOOLEAN;
	SSE41Support-: BOOLEAN;
	SSE42Support-: BOOLEAN;
	SSE5Support-: BOOLEAN;
	AVXSupport-: BOOLEAN;

	features-, features2-: SET;	(** processor features *)
	fcr*: SET;	(** default floating-point control register value (default rounding mode is towards -infinity, for ENTIER) *)
	mhz*: HUGEINT;	(** clock rate of GetTimer in MHz, or 0 if not known *)

	chs: ARRAY MaxDisks OF RECORD cyls, hds, spt: LONGINT END;
	initRegs: ARRAY 2 OF LONGINT;	(* kernel parameters *)
	config: ARRAY 2048 OF CHAR;	(* config strings *)
	bootFlag: SYSTEM.ADDRESS;

	idAdr: SYSTEM.ADDRESS;	(* address of processor ID register *)
	map: IDMap;
	bootID: LONGINT;	(* ID of boot processor (0) *)

	numberOfProcessors: LONGINT; (* number of processors installed during start up *)
	coresPerProcessor : LONGINT; (* number of cores per physical package *)
	threadsPerCore : LONGINT; (* number of threads per core *)

CONST
	CacheLineSize = 128;

TYPE
	(* Synchronization variables should reside in own cache line. This data structure should be aligned to CacheLineSize. *)
	Lock = RECORD
		locked : BOOLEAN;
		filler : ARRAY CacheLineSize - 1 OF CHAR;
	END;

VAR
	lock: ARRAY MaxLocks OF Lock;	(** all locks *)

(*
Every element in the proc array belongs to one processor. It is therefore sufficient to disable interrupts to protect the consistency of these elements. Race conditions with interrupts handled on the same processor are avoided by disabling interrupts for the entire time that a lock is held (using locksHeld & state). The data structures  are padded to CacheLineSize to separate the locks out on cache lines of their own, to avoid false sharing.
*)
	proc-, trapState-: ARRAY MaxCPU OF RECORD
		locksHeld-: SET;	(** locks held by a processor *)
		state-: SET;	(** processor flags (interrupt state) at entry to its first lock *)
		preemptCount-: LONGINT;	(** if 0, preemption is allowed *)
		padding : ARRAY CacheLineSize - 12 OF CHAR;
	END;

	(* the data structures above should be aligned to CacheLineSize *)
	padding : ARRAY 92 OF CHAR;

	trapLocksBusy-: SET;
	maxTime: LONGINT;

VAR
	gdt: GDT;					(* global descriptor table *)
	procm: ARRAY MaxCPU OF RECORD	(* indexed by ID () *)
		tss: TSSDesc;
		sp: SYSTEM.ADDRESS;	(* snapshot for GC *)
		stack: Stack
	END;
	kernelPD: SYSTEM.ADDRESS;	(* physical address of page directory *)

	freeLowPage: SYSTEM.ADDRESS;	(* free low page stack pointer (link at offset 0 in page). All addresses physical. NIL = -1 *)
	freeLowPages, freeHighPages, totalPages: LONGINT;	(* number of free pages and total number of pages *)

	mapTop: SYSTEM.ADDRESS;	(* virtual address of end of memory mapping area *)
	heapEndAdr: SYSTEM.ADDRESS;	(* virtual address of end of heap (page aligned) *)

	topPageNum: LONGINT;		(* page containing byte memTop-1 *)
	pageHeapAdr: SYSTEM.ADDRESS;	(* address (physical and virtual) of bottom of page heap area *)
	pageStackAdr: SYSTEM.ADDRESS;	(* virtual address of top page of free page stack *)

	freeStack: ARRAY (MaxUserStacks+SetSize-1) DIV SetSize OF SET;	(* free stack bitmap *)
	freeStackIndex: LONGINT;	(* current position in bitmap (rotates) *)

	Nbigskips-: LONGINT;	(* number of times a stack was extended leaving a hole *)
	Nfilled-: LONGINT;	(* number of times a "hole" in a stack was filled *)
	NnewStacks-, NnewStackLoops-, NnewStackInnerLoops-, NdisposeStacks-,
	NlostPages-, NreservePagesUsed-, NmaxUserStacks-: LONGINT;

VAR
	idt: IDT;	(* interrupt descriptor table *)
	glue: ARRAY IDTSize OF ARRAY 15 OF CHAR;	(* code *)
	intHandler: ARRAY IDTSize, MaxNumHandlers OF HandlerRec; (* array of handlers for interrupts, the table is only filled up to MaxNumHandlers - 1, the last element in each row acts as a sentinel *)
	stateTag: SYSTEM.ADDRESS;
	default: HandlerRec;
	ticks*: LONGINT;	(** timer ticks. Use Kernel.GetTicks() to read, don't write *)

VAR
	ipcBusy, ipcFlags, ipcFrontBarrier, ipcBackBarrier: SET;
	ipcHandler: BroadcastHandler;
	ipcMessage: Message;
	numProcessors-: LONGINT;	(* number of processors we attempted to boot (some may have failed) *)
	maxProcessors: LONGINT;	(* max number of processors we are allowed to boot (-1 for uni) *)
	allProcessors-: SET;	(* IDs of all successfully booted processors *)
	localAPIC: SYSTEM.ADDRESS;	(* address of local APIC, 0 if not present *)
	apicVer: ARRAY MaxCPU OF LONGINT;	(* APIC version *)
	started: ARRAY MaxCPU OF BOOLEAN;	(* CPU started successfully / CPU halted *)
	busHz0, busHz1: ARRAY MaxCPU OF LONGINT;	(* unrounded and rounded bus speed in Hz *)
	timer: EventHandler;
	timerRate: LONGINT;	(* Hz - rate at which CPU timers run - for timeslicing and profiling *)
	stopped: BOOLEAN;	(* StopAll was called *)
	idMap: IDMap;
	revIDmap: ARRAY MaxCPU OF SHORTINT;
	time: TimeArray;
	eventCount, eventMax: LONGINT;
	event: Handler;
	expandMin, heapMinKB, heapMaxKB : SYSTEM.SIZE;

	gcThreshold-: SYSTEM.SIZE;
	memBlockHead-{UNTRACED}, memBlockTail-{UNTRACED}: MemoryBlock; (* refer to the same memory block for I386, not traced by GC *)

	traceProcessorProc*: EventHandler;	(** temp tracing *)
	traceProcessor: BOOLEAN;

	Timeslice*: Handler;

	start*: PROCEDURE;

VAR
	traceMode: SET;	(* tracing mode: Screen or V24 *)
	traceBase: SYSTEM.ADDRESS;	(* screen buffer base address *)
	tracePos: SYSTEM.SIZE;	(* current screen cursor *)
	tracePort: LONGINT;	(* serial base port *)
	traceColor: SHORTINT;	(* current screen tracing color *)

(** -- Processor identification -- *)

(** Return current processor ID (0 to MaxNum-1). *)
PROCEDURE ID* (): LONGINT;
CODE {SYSTEM.i386}
	MOV EAX, idAdr
	LEA EBX, map
	MOV EAX, [EAX]
	SHR EAX, 24
	AND EAX, 15
	MOV AL, [EBX+EAX]
END ID;

(** -- Miscellaneous -- *)

(** This procedure should be called in all spin loops as a hint to the processor (e.g. Pentium 4). *)
PROCEDURE  -SpinHint*;
CODE {SYSTEM.i386}
	XOR ECX, ECX	; just in case some processor interprets REP this way
	REP NOP		; PAUSE instruction; NOP on pre-P4 processors, Spin Loop Hint on P4 and after
END SpinHint;

(* Compare two unsigned addresses *)
PROCEDURE -LessThan* (a, b: SYSTEM.ADDRESS): BOOLEAN;
CODE {SYSTEM.i386}
	POP EBX
	POP EAX
	CMP EAX, EBX
	SETB AL
END LessThan;

PROCEDURE -LessOrEqual* (a, b: SYSTEM.ADDRESS): BOOLEAN;
CODE {SYSTEM.i386}
	POP EBX
	POP EAX
	CMP EAX, EBX
	SETBE AL
END LessOrEqual;

PROCEDURE -GreaterThan* (a, b: SYSTEM.ADDRESS): BOOLEAN;
CODE {SYSTEM.i386}
	POP EBX
	POP EAX
	CMP EAX, EBX
	SETA AL
END GreaterThan;

PROCEDURE -GreaterOrEqual* (a, b: SYSTEM.ADDRESS): BOOLEAN;
CODE {SYSTEM.i386}
	POP EBX
	POP EAX
	CMP EAX, EBX
	SETAE AL
END GreaterOrEqual;

(** Fill "size" bytes at "destAdr" with "filler". "size" must be multiple of 4. *)
PROCEDURE Fill32* (destAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE; filler: LONGINT);
CODE {SYSTEM.i386}
	MOV EDI, [EBP+destAdr]
	MOV ECX, [EBP+size]
	MOV EAX, [EBP+filler]
	TEST ECX, 3
	JZ ok
	PUSH 8	; ASSERT failure
	INT 3
ok:
	SHR ECX, 2
	CLD
	REP STOSD
END Fill32;

(** Return timer value of the current processor, or 0 if not available. *)

(* e.g. ARM does not have a fine-grained timer *)
PROCEDURE -GetTimer* (): HUGEINT;
CODE {SYSTEM.Pentium}
	RDTSC	; set EDX:EAX
END GetTimer;

(** Disable interrupts and return old interrupt state. *)
PROCEDURE -DisableInterrupts* (): SET;
CODE {SYSTEM.i386}
	PUSHFD
	CLI
	POP EAX
END DisableInterrupts;

(** Restore interrupt state. Parameter s must be return value of earlier DisableInterrupts call on same processor. *)
PROCEDURE -RestoreInterrupts* (s: SET);
CODE {SYSTEM.i386}
	POPFD
END RestoreInterrupts;

(** Return TRUE iff interrupts are enabled on the current processor. *)
PROCEDURE -InterruptsEnabled* (): BOOLEAN;
CODE {SYSTEM.i386}
	PUSHFD
	POP EAX
	SHR EAX, 9
	AND AL, 1
END InterruptsEnabled;

(** -- HUGEINT operations -- *)

(** Return h*g. based on code from "AMD Athlon Processor x86 code optimization guide" *)
PROCEDURE  MulH* (h, g: HUGEINT): HUGEINT;
CODE {SYSTEM.i386}
	MOV EDX, [EBP+12]	; y_hi
	MOV ECX, [EBP+20]	; x_hi
	OR EDX, ECX		; are x_hi and y_hi both zeros?
	MOV EDX, [EBP+16]	; x_lo
	MOV EAX, [EBP+8]	; y_lo
	JNZ fullMul			; yes, requires full multiplication
	MUL EDX			; EDX:EAX := y_lo * x_lo
	JMP exit			; done, return to caller

fullMul:					; full multiplication is required

	MUL ECX			; EAX := LO(y_lo*x_hi)
	MOV EBX, EAX		; keep the result

	MOV EAX, [EBP+12] 	; y_hi
	MUL DWORD  [EBP+16]	; EAX := LO(y_hi*x_lo)
	ADD EBX, EAX 		; EBX := LO(y_lo*x_hi) + LO(y_hi*x_lo)

	MOV EAX, [EBP+8]	; y_lo
	MUL DWORD   [EBP+16]	; EDX := HI(y_lo*x_lo), EAX := LO(y_lo*x_lo)
	ADD EDX, EBX		; EDX := y_lo*x_hi + y_hi*x_lo + HI(y_lo*x_lo)
exit:
END MulH;

(** Return h DIV g. Rounding and division by zero behaviour is currently undefined. *)
PROCEDURE DivH* (x, y: HUGEINT): HUGEINT;
CODE {SYSTEM.i386}
	MOV ECX, [EBP+12]	; y-hi
	MOV EBX, [EBP+8]	; y-lo
	MOV EDX, [EBP+20]	; x-hi
	MOV EAX, [EBP+16]	; x-lo

	MOV ESI, ECX		; y-hi
	XOR ESI, EDX		; y-hi ^ x-hi
	SAR ESI, 31			; (quotient < 0) ? -1 : 0
	MOV EDI, EDX		; x-hi
	SAR EDI, 31			; (x < 0) ? -1 : 0
	XOR EAX, EDI		; if (x < 0)
	XOR EDX, EDI		; compute 1s complement of x
	SUB EAX, EDI		; if (x < 0)
	SBB EDX, EDI		; compute 2s complement of x
	MOV EDI, ECX		; y-hi
	SAR EDI, 31			; (y < 0) ? -1 : 0
	XOR EBX, EDI		; if (y < 0)
	XOR ECX, EDI		; compute 1s complement of y
	SUB EBX, EDI		; if (y < 0)
	SBB ECX, EDI		; compute 2s complement of y
	JNZ bigDivisor		; y > 2^32-1
	CMP EDX, EBX		; only one division needed ? (ECX = 0)
	JAE twoDivs			; need two divisions
	DIV EBX			; EAX = quotient-lo
	MOV EDX, ECX		; EDX = quotient-hi = 0
	; quotient in EDX:EAX
	XOR EAX, ESI		; if (quotient < 0)
	XOR EDX, ESI		; compute 1s complement of result
	SUB EAX, ESI		; if (quotient < 0)
	SBB EDX, ESI		; compute 2s complement of result
	JMP exit			; done, return to caller

twoDivs:
	MOV ECX, EAX		; save x-lo in ECX
	MOV EAX, EDX		; get x-hi
	XOR EDX, EDX		; zero extend it into EDX:EAX
	DIV EBX			; quotient-hi in EAX
	XCHG EAX, ECX		; ECX = quotient-hi, EAX = x-lo
	DIV EBX			; EAX = quotient-lo
	MOV EDX, ECX		; EDX = quotient-hi
	; quotient in EDX:EAX
	JMP makeSign		; make quotient signed

bigDivisor:
	SUB ESP, 12			; create three local variables
	MOV [ESP], EAX		; x-lo
	MOV [ESP+4], EBX	; y-lo
	MOV [ESP+8], EDX	; x-hi
	MOV EDI, ECX		; save y-hi
	SHR EDX, 1			; shift both
	RCR EAX, 1			; y and
	ROR EDI, 1			; and x
	RCR EBX, 1			; right by 1 bit
	BSR ECX, ECX		; ECX = number of remaining shifts
	SHRD EBX, EDI, CL	; scale down y and
	SHRD EAX, EDX, CL	; x such that y
	SHR EDX, CL		; less than 2^32 (i.e. fits in EBX)
	ROL EDI, 1			; restore original y-hi
	DIV EBX			; compute quotient
	MOV EBX, [ESP]		; x-lo
	MOV ECX, EAX		; save quotient
	IMUL EDI, EAX		; quotient * y hi-word (low only)
	MUL DWORD [ESP+4]	; quotient * y lo-word
	ADD EDX, EDI		; EDX:EAX = quotient * y
	SUB EBX, EAX		; x-lo - (quot.*y)-lo
	MOV EAX, ECX		; get quotient
	MOV ECX, [ESP+8]	; x-hi
	SBB ECX, EDX		; subtract y * quot. from x
	SBB EAX, 0			; adjust quotient if remainder negative
	XOR EDX, EDX		; clear hi-word of quotient
	ADD ESP, 12		; remove local variables

makeSign:
	XOR EAX, ESI		; if (quotient < 0)
	XOR EDX, ESI		; compute 1s complement of result
	SUB EAX, ESI		; if (quotient < 0)
	SBB EDX, ESI		; compute 2s complement of result
exit:
END DivH;

(** Return ASH(h, n). *)
PROCEDURE  -ASHH* (h: HUGEINT; n: LONGINT): HUGEINT;
CODE {SYSTEM.i386}
	POP ECX
	POP EAX
	POP EDX
	CMP ECX, 0
	JL right
	AND ECX, 63	; limit count, like ASH
	JZ exit
ll:
	SHL EAX, 1
	RCL EDX, 1
	DEC ECX
	JNZ ll
	JMP exit
right:
	NEG ECX
	AND ECX, 63	; limit count, like ASH
	JZ exit
lr:
	SAR EDX, 1
	RCR EAX, 1
	DEC ECX
	JNZ lr
exit:
END ASHH;

(** Return a HUGEINT composed of high and low. *)
PROCEDURE  -LInt2ToHInt* (high, low: LONGINT): HUGEINT;
CODE {SYSTEM.i386}
	POP EAX
	POP EDX
END LInt2ToHInt;

(** Return h as a LONGREAL, with possible loss of precision. *)
PROCEDURE -HIntToLReal* (h: HUGEINT): LONGREAL;
CODE {SYSTEM.i386, SYSTEM.FPU}
	FILD QWORD [ESP]
	FWAIT
	ADD ESP, 8
END HIntToLReal;

(** -- Processor initialization -- *)
PROCEDURE -SetFCR (s: SET);
CODE {SYSTEM.i386, SYSTEM.FPU}
	FLDCW [ESP]	; parameter s
	POP EAX
END SetFCR;

PROCEDURE -FCR (): SET;
CODE {SYSTEM.i386, SYSTEM.FPU}
	PUSH 0
	FNSTCW [ESP]
	FWAIT
	POP EAX
END FCR;

PROCEDURE -InitFPU;
CODE {SYSTEM.i386, SYSTEM.FPU}
	FNINIT
END InitFPU;

(** Setup FPU control word of current processor. *)

PROCEDURE SetupFPU*;
BEGIN
	InitFPU; SetFCR(fcr)
END SetupFPU;

(* Set up flags (3, p. 20)
	Bit
	1,3,5,15,19..31 - no change
	0,2,4,6..7,11 - CF,PF,AF,ZF,SF,OF off
	8 - TF off
	9 - IF off (no interrupts)
	10 - DF off
	12..13 - IOPL = 3
	14 - NT off (no Windows)
	16 - RF off (no Interference)
	17- VM off (no virtual 8086 mode)
	18 - AC off (no 486 alignment checks) *)

PROCEDURE -SetupFlags;
CODE {SYSTEM.i386}
	PUSHFD
	AND DWORD [ESP], 0FFF8802AH
	OR DWORD [ESP], 3000H
	POPFD
END SetupFlags;

(* Set up various 486-specific flags (3, p. 23)
	1. Enable exception 16 on math errors.
	2. Disable supervisor mode faults on write to read-only pages
		(386-compatible for stack checking).
	3. Enable the Alignment Check field in EFLAGS *)

PROCEDURE -Setup486Flags;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EAX, CR0
	OR EAX, 00040020H
	AND EAX, 0FFFEFFFFH
	MOV CR0, EAX
END Setup486Flags;

(* Set up 586-specific things *)
PROCEDURE -Setup586Flags;
CODE {SYSTEM.Pentium, SYSTEM.Privileged}
	MOV EAX, CR4
	BTR EAX, 2		; clear TSD
	MOV CR4, EAX
END Setup586Flags;

(* setup SSE and SSE2 extension *)
PROCEDURE SetupSSE2Ext;
CONST
	FXSRFlag = 24; (*IN features from EBX*)
	SSEFlag = 25;
	SSE2Flag = 26;
	SSE3Flag = 0; (*IN features2 from ECX*) (*PH 04/11*)
	SSSE3Flag =9;
	SSE41Flag =19; 
	SSE42Flag =20;
	SSE5Flag = 11; 
	AVXFlag = 28;
BEGIN
	SSE2Support := FALSE;
	SSE3Support := FALSE;
	SSSE3Support := FALSE;
	SSE41Support := FALSE;
	SSE42Support := FALSE;
	SSE5Support := FALSE;
	AVXSupport := FALSE;
	(* checking for SSE support *)
	IF SSEFlag IN features THEN
		SSESupport := TRUE;
		(* checking for SSE2 support *)
		IF SSE2Flag IN features THEN SSE2Support := TRUE;
			(* checking for SSE3... support*)(*PH 04/11*)
			IF SSE3Flag IN features2 THEN SSE3Support := TRUE;
				IF SSSE3Flag IN features2 THEN	SSSE3Support := TRUE END;
				IF SSE41Flag IN features2 THEN	SSE41Support := TRUE;
					IF SSE42Flag IN features2 THEN	SSE42Support := TRUE END;
				END;
				IF SSE5Flag IN features2 THEN SSE5Support := TRUE END;
				IF AVXFlag IN features2 THEN AVXSupport := TRUE END;
			END;
		END;
		(* checking for support for the FXSAVE and FXRSTOR instruction *)
		IF FXSRFlag IN features THEN InitSSE END;
	END;
END SetupSSE2Ext;

PROCEDURE -InitSSE;
CODE {SYSTEM.Pentium, SYSTEM.Privileged}
	MOV	EAX, CR4
	OR	EAX, 00000200H		; set bit 9 (OSFXSR)
	AND	EAX, 0FFFFFBFFH	; delete bit 10 (OSXMMEXCPT)
	MOV	CR4, EAX
END InitSSE;

(* Disable exceptions caused by math in new task. (1, p. 479) *)
PROCEDURE -DisableMathTaskEx;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EAX,CR0
	AND AL, 0F5H
	MOV CR0, EAX;
END DisableMathTaskEx;

(* Disable math emulation (1, p. 479) , bit 2 of CR0 *)
PROCEDURE -DisableEmulation;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EAX, CR0
	AND AL, 0FBH
	MOV CR0, EAX
END DisableEmulation;

(** CPU identification *)
PROCEDURE CPUID*(function : LONGINT; VAR eax, ebx, ecx, edx : SET);
CODE {SYSTEM.i386, SYSTEM.Pentium}
	MOV EAX, [EBP+function]	; CPUID function parameter

	MOV ESI, [EBP+ecx]		; copy ecx into ECX (sometimes used as input parameter)
	MOV ECX, [ESI]

	CPUID					; execute CPUID

	MOV ESI, [EBP+eax]		; copy EAX into eax;
	MOV [ESI], EAX
	MOV ESI, [EBP+ebx]		; copy EBX into ebx
	MOV [ESI], EBX
	MOV ESI, [EBP+ecx]		; copy ECX into ecx
	MOV [ESI], ECX
	MOV ESI, [EBP+edx]		; copy EDX into edx
	MOV [ESI], EDX
END CPUID;

(* If the CPUID instruction is supported, the ID flag (bit 21) of the EFLAGS register is r/w *)
PROCEDURE CpuIdSupported*() : BOOLEAN;
CODE {SYSTEM.i386}
	PUSHFD					; save EFLAGS
	POP EAX				; store EFLAGS in EAX
	MOV EBX, EAX			; save EBX for later testing
	XOR EAX, 00200000H	; toggle bit 21
	PUSH EAX				; push to stack
	POPFD					; save changed EAX to EFLAGS
	PUSHFD					; push EFLAGS to TOS
	POP EAX				; store EFLAGS in EAX
	CMP EAX, EBX			; see if bit 21 has changed
	SETNE AL;				; return TRUE if bit 21 has changed, FALSE otherwise
END CpuIdSupported;

(** Initialise current processor. Must be called by every processor. *)
PROCEDURE InitProcessor*;
BEGIN
	SetupFlags;
	Setup486Flags;
	Setup586Flags;
	DisableMathTaskEx;
	DisableEmulation;
	SetupFPU;
	SetupSSE2Ext
END InitProcessor;

(** Initialize APIC ID address. *)
PROCEDURE InitAPICIDAdr* (adr: SYSTEM.ADDRESS; CONST m: IDMap);
VAR s: SET;
BEGIN
	s := DisableInterrupts ();
	idAdr := adr; map := m;
	RestoreInterrupts (s)
END InitAPICIDAdr;

PROCEDURE InitBoot;
VAR
	largestFunction, i: LONGINT;
	eax, ebx, ecx, edx : SET;
	logicalProcessorCount : LONGINT;
	u: ARRAY 8 OF CHAR; vendor : Vendor;

	PROCEDURE GetString(VAR string : ARRAY OF CHAR; offset : LONGINT; register : SET);
	BEGIN
		string[offset] :=CHR(SYSTEM.VAL(LONGINT, register * {0..7}));
		string[offset+1] := CHR(SYSTEM.VAL(LONGINT, SYSTEM.LSH(register * {8..15}, -8)));
		string[offset+2] := CHR(SYSTEM.VAL(LONGINT, SYSTEM.LSH(register * {16..23}, -16)));
		string[offset+3] := CHR(SYSTEM.VAL(LONGINT, SYSTEM.LSH(register * {24..31}, -24)));
	END GetString;

BEGIN
	vendor := "Unknown"; features := {}; features2 := {};
	coresPerProcessor := 1; threadsPerCore := 1;
	IF CpuIdSupported() THEN
		(* Assume that all processors are the same *)

		(* CPUID standard function 0 returns:  eax: largest CPUID standard function supported, ebx, edx, ecx: vendor string *)
		CPUID(0, eax, ebx, ecx, edx);
		largestFunction := SYSTEM.VAL(LONGINT, eax);
		ASSERT(LEN(vendor) >= 13);
		GetString(vendor, 0, ebx); GetString(vendor, 4, edx); GetString(vendor, 8, ecx); vendor[12] := 0X;

		IF (largestFunction >= 1) THEN

			(* CPUID standard function 1 returns: CPU features in ecx & edx *)
			CPUID(1, eax, ebx, ecx, edx);
			features := SYSTEM.VAL(SET, edx);
			features2 := SYSTEM.VAL(SET, ecx);

			(* 	The code below is used to determine the number of threads per processor core (hyperthreading). This is required
				since processors supporting hyperthreading are listed only once in the MP tables, so we need to know the
				exact number of threads per processor to start the processor correctly *)

			IF (HTT IN features) THEN (* multithreading supported by CPU *)
				(* logical processor count = number of cores * number of threads per core = total number of threads supported *)
				logicalProcessorCount := SYSTEM.VAL(LONGINT, SYSTEM.LSH(ebx * {16..23}, -16));
				IF (vendor = "GenuineIntel") THEN
					IF (largestFunction >= 4) THEN
						(* CPUID standard function 4 returns: number of processor cores -1 on this die eax[26.31] *)
						ecx := SYSTEM.VAL(SET, 0); (* input parameter - must be set to 0 *)
						CPUID(4, eax, ebx, ecx, edx);
						coresPerProcessor := SYSTEM.VAL(LONGINT, SYSTEM.LSH(eax * {26..31}, -26)) + 1;
						threadsPerCore := logicalProcessorCount DIV coresPerProcessor;
					ELSE
						threadsPerCore := logicalProcessorCount;
					END;
				ELSIF (vendor = "AuthenticAMD") THEN
					(* CPUID extended function 1 returns: largest extended function *)
					CPUID(SHORT(80000000H), eax, ebx, ecx, edx);
					largestFunction := SYSTEM.VAL(LONGINT, eax - {31}); (* remove sign *)
					IF (largestFunction >= 8) THEN
						(* CPUID extended function 8 returns: *)
						CPUID(SHORT(80000008H), eax, ebx, ecx, edx);
						coresPerProcessor := SYSTEM.VAL(LONGINT, ecx * {0..7}) + 1;
						threadsPerCore := logicalProcessorCount DIV coresPerProcessor;
					ELSIF (largestFunction >= 1) THEN
						(* CPUID extended function 1 returns CmpLegacy bit in ecx *)
						CPUID(SHORT(80000001H), eax, ebx, ecx, edx);
						IF 1 IN ecx THEN (* CmpLegacy bit set -> no hyperthreading *)
							coresPerProcessor := logicalProcessorCount;
							threadsPerCore := 1;
						END;
					ELSE
						(* single-core, single-thread *)
					END;
				ELSE
					Trace.String("Machine: "); Trace.Yellow; Trace.String("Warning: Cannot detect hyperthreading, unknown CPU vendor ");
					Trace.String(vendor); Trace.Ln; Trace.Default;
				END;
			END;
		END;
	END;
	Trace.String("Machine: "); Trace.Int(coresPerProcessor, 0); Trace.String(" cores per physical package, ");
	Trace.Int(threadsPerCore, 0); Trace.String(" threads per core.");
	Trace.Ln;
	InitFPU;
	fcr := (FCR () - {0, 2, 3, 10, 11}) + {0 .. 5, 8, 9};	(* default FCR RC=00B *)
	bootID := 0; map[0] := 0;
	idAdr := SYSTEM.ADR (bootID);
	(* allow user to specify GetTimer rate, for tracing purposes *)
	GetConfig ("MHz", u);
	i := 0; mhz := StrToInt (i, u);
END InitBoot;

(** -- Configuration and bootstrapping -- *)

(** Return the value of the configuration string specified by parameter name in parameter val. Returns val = "" if the string was not found, or has an empty value. *)
PROCEDURE GetConfig* (CONST name: ARRAY OF CHAR; VAR val: ARRAY OF CHAR);
VAR i, src: LONGINT; ch: CHAR;
BEGIN
	ASSERT (name[0] # "=");	(* no longer supported, use GetInit instead *)
	src := 0;
	LOOP
		ch := config[src];
		IF ch = 0X THEN EXIT END;
		i := 0;
		LOOP
			ch := config[src];
			IF (ch # name[i]) OR (name[i] = 0X) THEN EXIT END;
			INC (i); INC (src)
		END;
		IF (ch = 0X) & (name[i] = 0X) THEN	(* found: (src^ = 0X) & (name[i] = 0X) *)
			i := 0;
			REPEAT
				INC (src); ch := config[src]; val[i] := ch; INC (i);
				IF i = LEN(val) THEN val[i - 1] := 0X; RETURN END	(* val too short *)
			UNTIL ch = 0X;
			val[i] := 0X; RETURN
		ELSE
			WHILE ch # 0X DO	(* skip to end of name *)
				INC (src); ch := config[src]
			END;
			INC (src);
			REPEAT	(* skip to end of value *)
				ch := config[src]; INC (src)
			UNTIL ch = 0X
		END
	END;
	val[0] := 0X
END GetConfig;

(** Get CHS parameters of first two BIOS-supported hard disks. *)
PROCEDURE GetDiskCHS* (d: LONGINT; VAR cyls, hds, spt: LONGINT);
BEGIN
	cyls := chs[d].cyls; hds := chs[d].hds; spt := chs[d].spt
END GetDiskCHS;

(** Get parameter values from Init string. If n = 0, return val = ASH(bx, 16) + ax, and if n = 1, return val = ASH(dx, 16) + cx, where ax, bx, cx, dx are the register values after the OBL boot loader or noboot.exe have executed the 16-bit x86 code in the Init string. *)
PROCEDURE GetInit* (n: LONGINT; VAR val: LONGINT);
BEGIN
	val := initRegs[n]
END GetInit;

(** Convert a string to an integer. Parameter i specifies where in the string scanning should begin (usually 0 in the first call). Scanning stops at the first non-valid character, and i returns the updated position. Parameter s is the string to be scanned. The value is returned as result, or 0 if not valid. Syntax: number = ["-"] digit {digit} ["H" | "h"] . digit = "0" | ... "9" | "A" .. "F" | "a" .. "f" . If the number contains any hexdecimal letter, or if it ends in "H" or "h", it is interpreted as hexadecimal. *)
PROCEDURE StrToInt* (VAR i: LONGINT; CONST s: ARRAY OF CHAR): LONGINT;
VAR vd, vh, sgn, d: LONGINT; hex: BOOLEAN;
BEGIN
	vd := 0; vh := 0; hex := FALSE;
	IF s[i] = "-" THEN sgn := -1; INC (i) ELSE sgn := 1 END;
	LOOP
		IF (s[i] >= "0") & (s[i] <= "9") THEN d := ORD (s[i])-ORD ("0")
		ELSIF (CAP (s[i]) >= "A") & (CAP (s[i]) <= "F") THEN d := ORD (CAP (s[i]))-ORD ("A") + 10; hex := TRUE
		ELSE EXIT
		END;
		vd := 10*vd + d; vh := 16*vh + d;
		INC (i)
	END;
	IF CAP (s[i]) = "H" THEN hex := TRUE; INC (i) END;	(* optional H *)
	IF hex THEN vd := vh END;
	RETURN sgn * vd
END StrToInt;

(* Delay for IO *)
PROCEDURE -Wait*;
CODE {SYSTEM.i386}
	JMP 0
	JMP 0
	JMP 0
END Wait;

(* Reset processor by causing a double fault. *)
PROCEDURE Reboot;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	PUSH 0
	PUSH 0
	LIDT [ESP]
	INT 3
END Reboot;

PROCEDURE -Cli*;
CODE{SYSTEM.i386}
	CLI
END Cli;

PROCEDURE -Sti*;
CODE{SYSTEM.i386}
	STI
END Sti;


(** Shut down the system. If parameter reboot is set, attempt to reboot the system. *)
PROCEDURE Shutdown* (reboot: BOOLEAN);
VAR i: LONGINT;
BEGIN
	Cli;
	IF reboot THEN	(* attempt reboot *)
		Portout8 (70H, 8FX);			(* Reset type: p. 5-37 AT Tech. Ref. *)
		Wait; Portout8 (71H, 0X);		(* Note: soft boot flag was set in InitMemory *)
		Wait; Portout8 (70H, 0DX);
		Wait; Portout8 (64H, 0FEX);	(* reset CPU *)
		FOR i := 1 TO 10000 DO END;
		Reboot
	END;
	LOOP END
END Shutdown;

(* Get hard disk parameters. *)
PROCEDURE GetPar (p: SYSTEM.ADDRESS; ofs: LONGINT): LONGINT;
VAR ch: CHAR;
BEGIN
	SYSTEM.GET (p + 12 + ofs, ch);
	RETURN ORD (ch)
END GetPar;

(* Read boot table. *)
PROCEDURE ReadBootTable (bt: SYSTEM.ADDRESS);
VAR i, p: SYSTEM.ADDRESS; j, d, type, addr, size, heapSize: LONGINT; ch: CHAR;
BEGIN
	heapSize := 0; lowTop := 0;
	p := bt; d := 0;
	LOOP
		SYSTEM.GET (p, type);
		IF type = -1 THEN
			EXIT	(* end *)
		ELSIF type = 3 THEN	(* boot memory/top of low memory *)
			SYSTEM.GET (p + 8, addr); SYSTEM.GET (p + 12, size);
			lowTop := addr + size
		ELSIF type = 4 THEN	(* free memory/extended memory size *)
			SYSTEM.GET (p + 8, addr); SYSTEM.GET (p + 12, size);
			IF addr = HeapAdr THEN heapSize := size END
		ELSIF type = 5 THEN	(* HD config *)
			IF d < MaxDisks THEN
				chs[d].cyls := GetPar (p, 0) + 100H * GetPar (p, 1);
				chs[d].hds := GetPar (p, 2); chs[d].spt := GetPar (p, 14);
				INC (d)
			END
		ELSIF type = 8 THEN	(* config strings *)
			i := p + 8; j := 0;	(* copy the config strings over *)
			LOOP
				SYSTEM.GET (i, ch); config[j] := ch; INC (i); INC (j);
				IF ch = 0X THEN EXIT END;
				REPEAT SYSTEM.GET (i, ch); config[j] := ch; INC (i); INC (j) UNTIL ch = 0X;	(* end of name *)
				REPEAT SYSTEM.GET (i, ch); config[j] := ch; INC (i); INC (j) UNTIL ch = 0X	(* end of value *)
			END
		END;
		SYSTEM.GET (p + 4, size); INC (p, size)
	END;
	ASSERT((heapSize # 0) & (lowTop # 0));
	memTop := HeapAdr + heapSize
END ReadBootTable;

(** Read a byte from the non-volatile setup memory. *)
PROCEDURE GetNVByte* (ofs: LONGINT): CHAR;
VAR c: CHAR;
BEGIN
	Portout8 (70H, CHR(ofs)); Wait; Portin8(71H, c);
	RETURN c
END GetNVByte;

(** Write a byte to the non-volatile setup memory. *)
PROCEDURE PutNVByte* (ofs: LONGINT; val: CHAR);
BEGIN
	Portout8 (70H, CHR(ofs)); Wait; Portout8 (71H, val)
END PutNVByte;

(** Compute a checksum for the Intel SMP spec floating pointer structure. *)
PROCEDURE ChecksumMP* (adr: SYSTEM.ADDRESS; size: SYSTEM.SIZE): LONGINT;
VAR sum: LONGINT; x: SYSTEM.ADDRESS; ch: CHAR;
BEGIN
	sum := 0;
	FOR x := adr TO adr + size-1 DO
		SYSTEM.GET (x, ch);
		sum := (sum + ORD(ch)) MOD 256
	END;
	RETURN sum
END ChecksumMP;

(* Search for MP floating pointer structure. *)
PROCEDURE SearchMem (adr: SYSTEM.ADDRESS; size: SYSTEM.SIZE): SYSTEM.ADDRESS;
VAR x, len: LONGINT; ch: CHAR;
BEGIN
	WHILE size > 0 DO
		SYSTEM.GET (adr, x);
		IF x = 05F504D5FH THEN	(* "_MP_" found *)
			SYSTEM.GET (adr + 8, ch); len := ORD(ch)*16;
			IF len > 0 THEN
				SYSTEM.GET (adr + 9, ch);
				IF (ch = 1X) OR (ch >= 4X) THEN	(* version 1.1 or 1.4 or higher *)
					IF ChecksumMP(adr, len) = 0 THEN
						RETURN adr		(* found *)
					END
				END
			END
		END;
		INC (adr, 16); DEC (size, 16)
	END;
	RETURN NilAdr	(* not found *)
END SearchMem;

(* Search for MP spec info. *)
PROCEDURE SearchMP;
VAR adr: SYSTEM.ADDRESS;
BEGIN
	adr := 0;
	SYSTEM.GET (040EH, SYSTEM.VAL (INTEGER, adr));	(* EBDA address *)
	adr := adr*16;
	IF adr < 100000H THEN adr := SearchMem(adr, 1024)	(* 1. look in EBDA *)
	ELSE adr := NilAdr
	END;
	IF adr = NilAdr THEN	(* 2. look in last kb of base memory *)
		adr := SearchMem(lowTop + (-lowTop) MOD 10000H - 1024, 1024);
		IF adr = NilAdr THEN	(* 3. look at top of physical memory *)
			adr := SearchMem(memTop - 1024, 1024);
			IF adr = NilAdr THEN	(* 4. look in BIOS ROM space *)
				adr := SearchMem(0E0000H, 20000H)
			END
		END
	END;
	IF adr = NilAdr THEN
		revMP := 0X; configMP := NilAdr
	ELSE
		SYSTEM.GET (adr + 9, revMP);
		SYSTEM.MOVE(adr + 11, SYSTEM.ADR(featureMP[0]), 5);	(* feature bytes *)
		configMP := SYSTEM.GET32 (adr + 4);	(* physical address outside reported RAM (spec 1.4 p. 4-2) *)
		IF configMP = 0 THEN configMP := NilAdr END
	END
END SearchMP;

(* Allocate area for ISA DMA. *)
PROCEDURE AllocateDMA;
VAR old: SYSTEM.ADDRESS;
BEGIN
	old := lowTop;
	dmaSize := DefaultDMASize*1024;
	ASSERT((dmaSize >= 0) & (dmaSize <= 65536));
	IF (lowTop-dmaSize) DIV 65536 # (lowTop-1) DIV 65536 THEN	(* crosses 64KB boundary *)
		DEC (lowTop, lowTop MOD 65536)	(* round down to 64KB boundary *)
	END;
	DEC (lowTop, dmaSize);	(* allocate memory *)
	dmaSize := old - lowTop	(* how much was allocated (including rounding) *)
END AllocateDMA;

(* Check if the specified address is RAM. *)
PROCEDURE IsRAM(adr: SYSTEM.ADDRESS): BOOLEAN;
CONST Pattern1 = SHORT(0BEEFC0DEH); Pattern2 = SHORT(0AA55FF00H);
VAR save, x: LONGINT; ok: BOOLEAN;
BEGIN
	ok := FALSE;
	SYSTEM.GET (adr, save);
	SYSTEM.PUT (adr, Pattern1);	(* attempt 1st write *)
	x := Pattern2;				(* write something else *)
	SYSTEM.GET (adr, x);		(* attempt 1st read *)
	IF x = Pattern1 THEN		(* first test passed *)
		SYSTEM.PUT (adr, Pattern2);	(* attempt 2nd write *)
		x := Pattern1;			(* write something else *)
		SYSTEM.GET (adr, x);	(* attempt 2nd read *)
		ok := (x = Pattern2)
	END;
	SYSTEM.PUT (adr, save);
	RETURN ok
END IsRAM;

(* Check amount of memory available and update memTop. *)
PROCEDURE CheckMemory;
CONST M = 100000H; ExtMemAdr = M; Step = M;
VAR s: ARRAY 16 OF CHAR; adr: SYSTEM.ADDRESS; i: LONGINT;
BEGIN
	GetConfig("ExtMemSize", s);	(* in MB *)
	IF s[0] # 0X THEN	(* override detection *)
		i := 0; memTop := ExtMemAdr + StrToInt(i, s) * M;
		Trace.String("Machine: Memory: ");
	ELSE
		Trace.String("Machine: Detecting memory... ");
		IF memTop >= 15*M THEN	(* search for more memory (ignore aliasing) *)
			adr := memTop-4;
			WHILE (SYSTEM.LSH(memTop, -12) < SYSTEM.LSH(MaxMemTop, -12)) & IsRAM(adr) DO
				memTop := adr + 4;
				INC (adr, Step)
			END;
			IF (memTop <= 0) THEN memTop := 2047 * M ; END;
		END
	END;
	Trace.Green; Trace.IntSuffix(memTop, 0, "B"); Trace.Ln; Trace.Default;
END CheckMemory;

(* Initialize locks. *)
PROCEDURE InitLocks;
VAR i: LONGINT; s: ARRAY 12 OF CHAR;
BEGIN
	IF TimeCount # 0 THEN
		GetConfig("LockTimeout", s);
		i := 0; maxTime := StrToInt(i, s);
		IF maxTime > MAX(LONGINT) DIV 1000000 THEN
			maxTime := MAX(LONGINT)
		ELSE
			maxTime := maxTime * 1000000
		END
	END;
	FOR i := 0 TO MaxCPU-1 DO
		proc[i].locksHeld := {}; proc[i].preemptCount := 0
	END;
	FOR i := 0 TO MaxLocks-1 DO
		lock[i].locked := FALSE
	END
END InitLocks;

(* Return flags state. *)
PROCEDURE -GetFlags (): SET;
CODE {SYSTEM.i386}
	PUSHFD
	POP EAX
END GetFlags;

(* Set flags state. *)
PROCEDURE -SetFlags (s: SET);
CODE {SYSTEM.i386}
	POPFD
END SetFlags;

PROCEDURE -PushFlags*;
CODE {SYSTEM.i386}
	PUSHFD
END PushFlags;

PROCEDURE -PopFlags*;
CODE {SYSTEM.i386}
	POPFD
END PopFlags;

(** Disable preemption on the current processor (increment the preemption counter). Returns the current processor ID as side effect. *)
PROCEDURE AcquirePreemption* (): LONGINT;
VAR id: LONGINT;
BEGIN
	PushFlags; Cli;
	id := ID ();
	INC (proc[id].preemptCount);
	PopFlags;
	RETURN id
END AcquirePreemption;

(** Enable preemption on the current processor (decrement the preemption counter). *)
PROCEDURE ReleasePreemption*;
VAR id: LONGINT;
BEGIN
	PushFlags; Cli;
	id := ID ();
	IF StrongChecks THEN
		ASSERT(proc[id].preemptCount > 0)
	END;
	DEC (proc[id].preemptCount);
	PopFlags
END ReleasePreemption;

(** Return the preemption counter of the current processor (specified in parameter). *)
PROCEDURE PreemptCount* (id: LONGINT): LONGINT;
BEGIN
	IF StrongChecks THEN
		(*ASSERT(~(9 IN GetFlags ()));*)	(* interrupts off *)	(* commented out because check is too strong *)
		ASSERT(id = ID ())	(* caller must specify current processor *)
	END;
	RETURN proc[id].preemptCount
END PreemptCount;

(* Spin waiting for a lock. Return AL = 1X iff timed out. *)
PROCEDURE AcquireSpinTimeout(VAR locked: BOOLEAN; count: LONGINT; flags: SET): CHAR;
CODE {SYSTEM.i386}
	MOV ESI, [EBP+flags]		; ESI := flags
	MOV EDI, [EBP+count]	; EDI := count
	MOV EBX, [EBP+locked]	; EBX := ADR(locked)
	MOV AL, 1				; AL := 1
	CLI						; switch interrupts off before acquiring lock

test:
	CMP [EBX], AL			; locked? { AL = 1 }
	JE wait					; yes, go wait
	XCHG [EBX], AL			; set and read the lock atomically. LOCK prefix implicit.
	CMP AL, 1				; was locked?
	JNE exit					; no, we have it now, interrupts are off, and AL # 1

wait:
; ASSERT(AL = 1)
	XOR ECX, ECX			; just in case some processor interprets REP this way
	REP NOP				; PAUSE instruction; see SpinHint

	TEST ESI, 200H			; bit 9 - IF
	JZ intoff
	STI						; restore interrupt state quickly to allow pending interrupts (e.g. Processors.StopAll/Broadcast)
	NOP					; NOP required, otherwise STI; CLI not interruptable
	CLI						; disable interrupts
intoff:

	DEC EDI				; counter
	JNZ test					; not timed out yet
	OR EDI, [EBP+count]		; re-fetch original value & set flags
	JZ test					; if count = 0, retry forever
; timed out (AL = 1)

exit:
END AcquireSpinTimeout;

(** Acquire a spin-lock and disable interrupts. *)
PROCEDURE  Acquire* (level: LONGINT);
VAR id, i: LONGINT; flags: SET; start: HUGEINT;
BEGIN
	id := AcquirePreemption ();
	flags := GetFlags ();	(* store state of interrupt flag *)
	IF StrongChecks THEN
		ASSERT(~(9 IN flags) OR (proc[id].locksHeld = {}));	(* interrupts enabled => no locks held *)
		ASSERT(~(level IN proc[id].locksHeld))	(* recursive locks not allowed *)
	END;
	IF (TimeCount = 0) OR (maxTime = 0) THEN
		IF AcquireSpinTimeout(lock[level].locked, 0, flags) = 0X THEN END;	(* {interrupts off} *)
	ELSE
		start := GetTimer ();
		WHILE AcquireSpinTimeout(lock[level].locked, TimeCount, flags) = 1X DO
			IF GetTimer () - start > maxTime THEN
				trapState := proc;
				trapLocksBusy := {};
				FOR i := 0 TO MaxLocks-1 DO
					IF lock[i].locked THEN INCL(trapLocksBusy, i) END
				END;
				HALT(1301)	(* Lock timeout - see Traps *)
			END
		END
	END;
	IF proc[id].locksHeld = {} THEN
		proc[id].state := flags
	END;
	INCL(proc[id].locksHeld, level);	(* we now hold the lock *)
	IF StrongChecks THEN	(* no lower-level locks currently held by this processor *)
		ASSERT((level = 0) OR (proc[id].locksHeld * {0..level-1} = {}))
	END
END Acquire;

(** Release a spin-lock. Switch on interrupts when last lock released. *)
PROCEDURE Release* (level: LONGINT);
VAR id: LONGINT; flags: SET;
BEGIN	(* {interrupts off} *)
	id := ID ();
	IF StrongChecks THEN
		ASSERT(~(9 IN GetFlags ()));	(* {interrupts off} *)
		ASSERT(lock[level].locked);
		ASSERT(level IN proc[id].locksHeld)
	END;
	EXCL(proc[id].locksHeld, level);
	IF proc[id].locksHeld = {} THEN
		flags := proc[id].state ELSE flags := GetFlags ()
	END;
	lock[level].locked := FALSE;
	SetFlags(flags);
	ReleasePreemption
END Release;

(** Acquire all locks. Only for exceptional cases. *)
PROCEDURE AcquireAll*;
VAR lock: LONGINT;
BEGIN
	FOR lock := HighestLock TO LowestLock BY -1 DO Acquire(lock) END
END AcquireAll;

(** Release all locks. Reverse of AcquireAll. *)
PROCEDURE ReleaseAll*;
VAR lock: LONGINT;
BEGIN
	FOR lock := LowestLock TO HighestLock DO Release(lock) END
END ReleaseAll;

(** Break all locks held by current processor (for exception handling). Returns levels released. *)
PROCEDURE BreakAll* (): SET;
VAR id, level: LONGINT; released: SET;
BEGIN
	id := AcquirePreemption ();
	PushFlags; Cli;
	released := {};
	FOR level := 0 TO MaxLocks-1 DO
		IF level IN proc[id].locksHeld THEN
			lock[level].locked := FALSE;	(* break the lock *)
			EXCL(proc[id].locksHeld, level);
			INCL(released, level)
		END
	END;
	IF proc[id].preemptCount > 1 THEN INCL(released, Preemption) END;
	proc[id].preemptCount := 0;	(* clear preemption flag *)
	PopFlags;
	RETURN released
END BreakAll;

(** Acquire a fine-grained lock on an active object. *)
PROCEDURE AcquireObject* (VAR locked: BOOLEAN);
CODE {SYSTEM.i386}
	PUSHFD
	MOV EBX, [EBP+locked]	; EBX := ADR(locked)
	MOV AL, 1
test:
	CMP [EBX], AL			; locked? { AL = 1 }
	JNE try
	XOR ECX, ECX			; just in case some processor interprets REP this way
	STI
	REP NOP				; PAUSE instruction; see SpinHint
	CLI
	JMP test
try:
	XCHG [EBX], AL			; set and read the lock atomically. LOCK prefix implicit.
	CMP AL, 1				; was locked?
	JE test					; yes, try again
	POPFD
END AcquireObject;

(** Release an active object lock. *)
PROCEDURE ReleaseObject* (VAR locked: BOOLEAN);
CODE {SYSTEM.i386}
	MOV EBX, [EBP+locked]	; EBX := ADR(locked)
	MOV BYTE [EBX], 0
END ReleaseObject;

(* Load global descriptor table *)
PROCEDURE LoadGDT(base: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
CODE {SYSTEM.i386, SYSTEM.Privileged}
	SHL DWORD [EBP+size], 16
	MOV EBX, 2
	LGDT [EBP+EBX+size]
END LoadGDT;

(* Load segment registers *)
PROCEDURE LoadSegRegs(data: LONGINT);
CODE {SYSTEM.i386}
	MOV EAX, [EBP+data]
	MOV DS, AX
	MOV ES, AX
	XOR EAX, EAX
	MOV FS, AX
	MOV GS, AX
END LoadSegRegs;

(* Return CS. *)
PROCEDURE -CS* (): LONGINT;
CODE {SYSTEM.i386}
	XOR EAX, EAX
	MOV AX, CS
END CS;

(** -- Memory management -- *)

(* Allocate a physical page below 1M. Parameter adr returns physical and virtual address (or NilAdr).*)
PROCEDURE NewLowPage(VAR adr: SYSTEM.ADDRESS);
BEGIN
	adr := freeLowPage;
	IF freeLowPage # NilAdr THEN
		SYSTEM.GET (freeLowPage, freeLowPage);	(* freeLowPage := freeLowPage.next *)
		DEC(freeLowPages)
	END
END NewLowPage;

(* Allocate a directly-mapped page. Parameter adr returns physical and virtual address (or NilAdr). *)
PROCEDURE NewDirectPage(VAR adr: SYSTEM.ADDRESS);
BEGIN
	IF pageHeapAdr # heapEndAdr THEN
		DEC(pageHeapAdr, PS); adr := pageHeapAdr;
		DEC(freeHighPages)
	ELSE
		adr := NilAdr
	END
END NewDirectPage;

(* Allocate a physical page. *)
PROCEDURE NewPage(VAR physAdr: SYSTEM.ADDRESS);
VAR sp, prev: SYSTEM.ADDRESS;
BEGIN
	SYSTEM.GET(pageStackAdr + NodeSP, sp);
	ASSERT((sp >= MinSP) & (sp <= MaxSP) & (sp MOD AddressSize = 0));	(* index check *)
	IF sp > MinSP THEN	(* stack not empty, pop entry *)
		DEC(sp, AddressSize);
		SYSTEM.GET (pageStackAdr+sp, physAdr);
		SYSTEM.PUT (pageStackAdr+NodeSP, sp);
		SYSTEM.GET (pageStackAdr+NodePrev, prev);
		IF (sp = MinSP) & (prev # NilAdr) THEN
			pageStackAdr := prev
		END;
		DEC(freeHighPages)
	ELSE
		NewDirectPage(physAdr)
	END
END NewPage;

(* Deallocate a physical page. *)
PROCEDURE DisposePage(physAdr: SYSTEM.ADDRESS);
VAR sp, next, newAdr: SYSTEM.ADDRESS;
BEGIN
	SYSTEM.GET (pageStackAdr + NodeSP, sp);
	ASSERT((sp >= MinSP) & (sp <= MaxSP) & (sp MOD AddressSize = 0));	(* index check *)
	IF sp = MaxSP THEN	(* current stack full *)
		SYSTEM.GET (pageStackAdr + NodeNext, next);
		IF next # NilAdr THEN	(* next stack exists, make it current *)
			pageStackAdr := next;
			SYSTEM.GET (pageStackAdr+NodeSP, sp);
			ASSERT(sp = MinSP)	(* must be empty *)
		ELSE	(* allocate next stack *)
			NewDirectPage(newAdr);
			IF newAdr = NilAdr THEN
				NewLowPage(newAdr);	(* try again from reserve *)
				IF newAdr = NilAdr THEN
					IF Stats THEN INC(NlostPages) END;
					RETURN	(* give up (the disposed page is lost) *)
				ELSE
					IF Stats THEN INC(NreservePagesUsed) END
				END
			END;
			sp := MinSP;	(* will be written to NodeSP below *)
			SYSTEM.PUT (newAdr + NodeNext, next);
			SYSTEM.PUT (newAdr + NodePrev, pageStackAdr);
			pageStackAdr := newAdr
		END
	END;
		(* push entry on current stack *)
	SYSTEM.PUT (pageStackAdr + sp, physAdr);
	SYSTEM.PUT (pageStackAdr + NodeSP, sp + AddressSize);
	INC(freeHighPages)
END DisposePage;

(* Allocate virtual address space for mapping. Parameter size must be multiple of page size. Parameter virtAdr returns virtual address or NilAdr on failure. *)
PROCEDURE NewVirtual(VAR virtAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
BEGIN
	ASSERT(size MOD PS = 0);
	IF mapTop+size > MapAreaAdr+MapAreaSize THEN
		virtAdr := NilAdr		(* out of virtual space *)
	ELSE
		virtAdr := mapTop;
		INC(mapTop, size)
	END
END NewVirtual;

PROCEDURE DisposeVirtual(virtAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
	(* to do *)
END DisposeVirtual;

(* Map a physical page into the virtual address space. Parameter virtAdr is mapped address and phys is mapping value. Returns TRUE iff mapping successful. *)
PROCEDURE MapPage(virtAdr, phys: SYSTEM.ADDRESS): BOOLEAN;
VAR i, pt: SYSTEM.ADDRESS;
BEGIN
	i := virtAdr DIV RS MOD PTEs;
	SYSTEM.GET (kernelPD + AddressSize*i, pt);
	IF ODD(pt) THEN	(* pt present *)
		DEC(pt, pt MOD PS)
	ELSE
		NewPage(pt);
		IF pt = NilAdr THEN RETURN FALSE END;
		SYSTEM.PUT (kernelPD + AddressSize*i, pt + UserPage);
		Fill32(pt, PTEs*AddressSize, PageNotPresent)
	END;
	SYSTEM.PUT (pt + AddressSize*(virtAdr DIV PS MOD PTEs), phys);
	RETURN TRUE
END MapPage;

(* Return mapped page address for a given virtual address (ODD if mapped) *)
PROCEDURE MappedPage(virtAdr: SYSTEM.ADDRESS): SYSTEM.ADDRESS;
VAR pt: SYSTEM.ADDRESS;
BEGIN
	SYSTEM.GET (kernelPD + AddressSize*(virtAdr DIV RS MOD PTEs), pt);
	IF ODD(pt) THEN	(* pt present *)
		SYSTEM.GET (pt - pt MOD PS + AddressSize*(virtAdr DIV PS MOD PTEs), pt);
		RETURN pt
	ELSE
		RETURN 0	(* ~ODD *)
	END
END MappedPage;

(* Unmap a page and return the previous mapping, like MappedPage (). Caller must flush TLB. *)
PROCEDURE UnmapPage(virtAdr: SYSTEM.ADDRESS): SYSTEM.ADDRESS;
VAR t, pt: SYSTEM.ADDRESS;
BEGIN
	SYSTEM.GET (kernelPD + AddressSize*(virtAdr DIV RS MOD PTEs), pt);
	IF ODD(pt) THEN	(* pt present *)
		pt := pt - pt MOD PS + AddressSize*(virtAdr DIV PS MOD PTEs);
		SYSTEM.GET (pt, t);
		SYSTEM.PUT (pt, NIL);	(* unmap *)
			(* could use INVLPG here, but it is not supported equally on all processors *)
		RETURN t
	ELSE
		RETURN 0	(* ~ODD *)
	END
END UnmapPage;

(* Map area [virtAdr..virtAdr+size) directly to area [Adr(phys)..Adr(phys)+size). Returns TRUE iff successful. *)
PROCEDURE MapDirect(virtAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE; phys: SYSTEM.ADDRESS): BOOLEAN;
BEGIN
(*
	Trace.String("MapDirect "); Trace.Address (virtAdr); Trace.Char(' '); Trace.Address (phys); Trace.Char (' '); Trace.Int (size, 0);
	Trace.Int(size DIV PS, 8); Trace.Ln;
*)
	ASSERT((virtAdr MOD PS = 0) & (size MOD PS = 0));
	WHILE size # 0 DO
		IF ~ODD(MappedPage(virtAdr)) THEN
			IF ~MapPage(virtAdr, phys) THEN RETURN FALSE END
		END;
		INC(virtAdr, PS); INC(phys, PS); DEC(size, PS)
	END;
	RETURN TRUE
END MapDirect;

(* Policy decision for heap expansion. NewBlock for the same block has failed try times. *)
PROCEDURE ExpandNow(try: LONGINT): BOOLEAN;
VAR size: SYSTEM.SIZE;
BEGIN
	size := SYSTEM.LSH(memBlockTail.endBlockAdr - memBlockHead.beginBlockAdr, -10);	(* heap size in KB *)
	RETURN (~ODD(try) OR (size < heapMinKB)) & (size < heapMaxKB)
END ExpandNow;

(* Try to expand the heap by at least "size" bytes *)
PROCEDURE ExpandHeap*(try: LONGINT; size: SYSTEM.SIZE; VAR memBlock: MemoryBlock; VAR beginBlockAdr, endBlockAdr: SYSTEM.ADDRESS);
BEGIN
	IF ExpandNow(try) THEN
		IF size < expandMin THEN size := expandMin END;
		beginBlockAdr := memBlockHead.endBlockAdr;
		endBlockAdr := beginBlockAdr;
		INC(endBlockAdr, size);
		SetHeapEndAdr(endBlockAdr);	(* in/out parameter *)
		memBlock := memBlockHead;
		(* endBlockAdr of memory block is set by caller after free block has been set in memory block - this process is part of lock-free heap expansion *)
	ELSE
		beginBlockAdr := memBlockHead.endBlockAdr;
		endBlockAdr := memBlockHead.endBlockAdr;
		memBlock := NIL
	END
END ExpandHeap;

(* Set memory block end address *)
PROCEDURE SetMemoryBlockEndAddress*(memBlock: MemoryBlock; endBlockAdr: SYSTEM.ADDRESS);
BEGIN
	ASSERT(endBlockAdr >= memBlock.beginBlockAdr);
	memBlock.endBlockAdr := endBlockAdr
END SetMemoryBlockEndAddress;

(* Free unused memory block *)
PROCEDURE FreeMemBlock*(memBlock: MemoryBlock);
BEGIN
	HALT(515) (* impossible to free heap in I386 native A2 version *)
END FreeMemBlock;

(** Attempt to set the heap end address to the specified address. The returned value is the actual new end address (never smaller than previous value). *)
PROCEDURE SetHeapEndAdr(VAR endAdr: SYSTEM.ADDRESS);
VAR n, m: SYSTEM.SIZE;
BEGIN
	Acquire(Memory);
	n := SYSTEM.LSH(endAdr+(PS-1), -PSlog2) - SYSTEM.LSH(heapEndAdr, -PSlog2);	(* pages requested *)
	m := SYSTEM.LSH(pageHeapAdr, -PSlog2) - SYSTEM.LSH(heapEndAdr, -PSlog2) - ReservedPages;	(* max pages *)
	IF n > m THEN n := m END;
	IF n > 0 THEN INC(heapEndAdr, n*PS); DEC(freeHighPages, n) END;
	endAdr := heapEndAdr;
	Release(Memory)
END SetHeapEndAdr;

(** Map a physical memory area (physAdr..physAdr+size-1) into the virtual address space. Parameter virtAdr returns the virtual address of mapped region, or NilAdr on failure. *)
PROCEDURE MapPhysical*(physAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE; VAR virtAdr: SYSTEM.ADDRESS);
VAR ofs: SYSTEM.ADDRESS;
BEGIN
	IF (SYSTEM.LSH(physAdr, -PSlog2) <= topPageNum) &
			(SYSTEM.LSH(physAdr+size-1, -PSlog2) <= topPageNum) &
			(SYSTEM.LSH(physAdr, -PSlog2) >= SYSTEM.LSH(LowAdr, -PSlog2)) THEN
		virtAdr := physAdr	(* directly mapped *)
	ELSE
		ofs := physAdr MOD PS;
		DEC(physAdr, ofs); INC(size, ofs);	(* align start to page boundary *)
		INC(size, (-size) MOD PS);	(* align end to page boundary *)
		Acquire(Memory);
		NewVirtual(virtAdr, size);
		IF virtAdr # NilAdr THEN
			IF ~MapDirect(virtAdr, size, physAdr + UserPage) THEN
				DisposeVirtual(virtAdr, size);
				virtAdr := NilAdr
			END
		END;
		Release(Memory);
		IF TraceVerbose THEN
			Acquire (TraceOutput);
			Trace.String("Mapping ");
			Trace.IntSuffix(size, 1, "B"); Trace.String(" at ");
			Trace.Address (physAdr); Trace.String (" - "); Trace.Address (physAdr+size-1);
			IF virtAdr = NilAdr THEN
				Trace.String(" failed")
			ELSE
				Trace.String (" to "); Trace.Address (virtAdr);
				IF ofs # 0 THEN Trace.String (", offset "); Trace.Int(ofs, 0) END
			END;
			Trace.Ln;
			Release (TraceOutput);
		END;
		IF virtAdr # NilAdr THEN INC(virtAdr, ofs) END	(* adapt virtual address to correct offset *)
	END
END MapPhysical;

(** Unmap an area previously mapped with MapPhysical. *)
PROCEDURE UnmapPhysical*(virtAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
	(* to do *)
END UnmapPhysical;

(** Return the physical address of the specified range of memory, or NilAdr if the range is not contiguous. It is the caller's responsibility to assure the range remains allocated during the time it is in use. *)
PROCEDURE PhysicalAdr*(adr: SYSTEM.ADDRESS; size: SYSTEM.SIZE): SYSTEM.ADDRESS;
VAR physAdr, mapped, expected: SYSTEM.ADDRESS;
BEGIN
	IF (SYSTEM.LSH(adr, -PSlog2) <= topPageNum) & (SYSTEM.LSH(adr+size-1, -PSlog2) <= topPageNum) THEN
		RETURN adr	(* directly mapped *)
	ELSE
		Acquire(Memory);
		mapped := MappedPage(adr);
		Release(Memory);
		IF ODD(mapped) & (size > 0) THEN	(* mapped, and range not empty or too big *)
			physAdr := mapped - mapped MOD PS + adr MOD PS;	(* strip paging bits and add page offset *)
			(* now check if whole range is physically contiguous *)
			DEC(size, PS - adr MOD PS);	(* subtract distance to current page end *)
			IF size > 0 THEN	(* range crosses current page end *)
				expected := SYSTEM.LSH(mapped, -PSlog2)+1;	(* expected physical page *)
				LOOP
					INC(adr, PS);	(* step to next page *)
					Acquire(Memory);
					mapped := MappedPage(adr);
					Release(Memory);
					IF ~ODD(mapped) OR (SYSTEM.LSH(mapped, -PSlog2) # expected) THEN
						physAdr := NilAdr; EXIT
					END;
					DEC(size, PS);
					IF size <= 0 THEN EXIT END;	(* ok *)
					INC(expected)
				END
			ELSE
				(* ok, skip *)
			END
		ELSE
			physAdr := NilAdr
		END;
		RETURN physAdr
	END
END PhysicalAdr;

(** Translate a virtual address range to num ranges of physical address. num returns 0 on error. *)
PROCEDURE TranslateVirtual*(virtAdr: SYSTEM.ADDRESS; size: SYSTEM.SIZE;  VAR num: LONGINT; VAR physAdr: ARRAY OF Range);
VAR ofs, phys1: SYSTEM.ADDRESS; size1: SYSTEM.SIZE;
BEGIN
	Acquire(Memory);
	num := 0;
	LOOP
		IF size = 0 THEN EXIT END;
		IF num = LEN(physAdr) THEN num := 0; EXIT END;	(* index check *)
		ofs := virtAdr MOD PS;	(* offset in page *)
		size1 := PS - ofs;	(* distance to next page boundary *)
		IF size1 > size THEN size1 := size END;
		phys1 := MappedPage(virtAdr);
		IF ~ODD(phys1) THEN num := 0; EXIT END;	(* page not present *)
		physAdr[num].adr := phys1 - phys1 MOD PS + ofs;
		physAdr[num].size := size1; INC(num);
		INC(virtAdr, size1); DEC(size, size1)
	END;
	IF num = 0 THEN physAdr[0].adr := NilAdr; physAdr[0].size := 0 END;
	Release(Memory)
END TranslateVirtual;

(** Return information on free memory in Kbytes. *)
PROCEDURE GetFreeK*(VAR total, lowFree, highFree: SYSTEM.SIZE);
CONST KperPage = PS DIV 1024;
BEGIN
	Acquire(Memory);
	total := totalPages * KperPage;
	lowFree := freeLowPages * KperPage;
	highFree := freeHighPages * KperPage;
	Release(Memory)
END GetFreeK;

(** -- Stack -- *)

(** Extend the stack to include the specified address, if possible. Returns TRUE iff ok. *)
PROCEDURE ExtendStack*(VAR s: Stack; virtAdr: SYSTEM.ADDRESS): BOOLEAN;
VAR phys: SYSTEM.ADDRESS; ok: BOOLEAN;
BEGIN
	Acquire(Memory);
	ok := FALSE;
	IF (virtAdr < s.high) & (virtAdr >= s.low) THEN
		DEC(virtAdr, virtAdr MOD PS);	(* round down to page boundary *)
		IF Stats & (virtAdr < s.adr-PS) THEN INC(Nbigskips) END;
		IF ODD(MappedPage(virtAdr)) THEN	(* already mapped *)
			ok := TRUE
		ELSE
			NewPage(phys);
			IF phys # NilAdr THEN
				IF MapPage(virtAdr, phys + UserPage) THEN
					IF virtAdr < s.adr THEN
						s.adr := virtAdr
					ELSE
						IF Stats THEN INC(Nfilled) END
					END;
					ok := TRUE
				ELSE
					DisposePage(phys)
				END
			END
		END
	END;
	Release(Memory);
	RETURN ok
END ExtendStack;

(** Allocate a stack. Parameter initSP returns initial stack pointer value. *)
PROCEDURE NewStack*(VAR s: Stack; process: ANY; VAR initSP: SYSTEM.ADDRESS);
VAR adr, phys: SYSTEM.ADDRESS; old: LONGINT; free: SET;
BEGIN
	ASSERT(InitUserStackSize = PS);	(* for now *)
	Acquire(Memory);
	IF Stats THEN INC(NnewStacks) END;
	old := freeStackIndex;
	LOOP
		IF Stats THEN INC(NnewStackLoops) END;
		free := freeStack[freeStackIndex];
		IF free # {} THEN
			adr := 0; WHILE ~(adr IN free) DO INC(adr) END;	(* BTW: BSF instruction is not faster *)
			IF Stats THEN INC(NnewStackInnerLoops, adr+1) END;
			EXCL(freeStack[freeStackIndex], adr);
			adr := StackAreaAdr + (freeStackIndex*SetSize + adr)*MaxUserStackSize;
			EXIT
		END;
		INC(freeStackIndex);
		IF freeStackIndex = LEN(freeStack) THEN freeStackIndex := 0 END;
		IF freeStackIndex = old THEN HALT(1503) END	(* out of stack space *)
	END;
	NewPage(phys); ASSERT(phys # NilAdr);	(* allocate one physical page at first *)
	s.high := adr + MaxUserStackSize; s.low := adr + UserStackGuardSize;
	s.adr := s.high - InitUserStackSize;	(* at the top of the virtual area *)
	initSP := s.high-AddressSize;
	IF ~MapPage(s.adr, phys + UserPage) THEN HALT(99) END;
	SYSTEM.PUT (initSP, process);
	Release(Memory)
END NewStack;

(** Return the process pointer set when the current user stack was created (must be running on user stack). *)
PROCEDURE  -GetProcessPtr* (): ANY;
CONST Mask = -MaxUserStackSize; Ofs = MaxUserStackSize-4;
CODE {SYSTEM.i386}
	MOV EAX, Mask
	AND EAX, ESP
	MOV EAX, [EAX+Ofs]
	POP EBX; pointer returned via stack!
	MOV [EBX], EAX
END GetProcessPtr;

(** True iff current process works on a kernel stack *)
PROCEDURE WorkingOnKernelStack* (): BOOLEAN;
VAR id: LONGINT; sp: SYSTEM.ADDRESS;
BEGIN
	ASSERT(KernelStackSize # MaxUserStackSize - UserStackGuardSize); (* detection does only work with this assumption *)
	sp := CurrentSP ();
	id := ID ();
	RETURN (sp >= procm[id].stack.low) & (sp <= procm[id].stack.high)
END WorkingOnKernelStack;

(** Deallocate a stack. Current thread should not dispose its own stack. Uses privileged instructions. *)
PROCEDURE DisposeStack*(CONST s: Stack);
VAR adr, phys: SYSTEM.ADDRESS;
BEGIN
	(* First make sure there are no references to virtual addresses of the old stack in the TLBs. This is required because we are freeing the pages, and they could be remapped later at different virtual addresses. DisposeStack will only be called from the thread finalizer, which ensures that the user will no longer be referencing this memory. Therefore we can make this upcall from outside the locked region, avoiding potential deadlock. *)
	GlobalFlushTLB;	(* finalizers are only called after Processors has initialized this upcall *)
	Acquire(Memory);
	IF Stats THEN INC(NdisposeStacks) END;
	adr := s.adr;	(* unmap and deallocate all pages of stack *)
	REPEAT
		phys := UnmapPage(adr);	(* TLB was flushed and no intermediate references possible to unreachable stack *)
		IF ODD(phys) THEN DisposePage(phys - phys MOD PS) END;
		INC(adr, PS)
	UNTIL adr = s.high;
	adr := (adr - MaxUserStackSize - StackAreaAdr) DIV MaxUserStackSize;
	INCL(freeStack[adr DIV 32], adr MOD 32);
	Release(Memory)
END DisposeStack;

(** Check if the specified stack is valid. *)
PROCEDURE ValidStack*(CONST s: Stack; sp: SYSTEM.ADDRESS): BOOLEAN;
VAR valid: BOOLEAN;
BEGIN
	Acquire(Memory);
	valid := (sp MOD 4 = 0) & (sp >= s.adr) & (sp <= s.high);
	WHILE valid & (sp < s.high) DO
		valid := ODD(MappedPage(sp));
		INC(sp, PS)
	END;
	Release(Memory);
	RETURN valid
END ValidStack;

(** Update the stack snapshot of the current processor. (for Processors) *)
PROCEDURE UpdateState*;
VAR id: LONGINT;
BEGIN
	ASSERT(CS () MOD 4 = 0);	(* to get kernel stack pointer *)
	id := ID ();
	ASSERT(procm[id].stack.high # 0);	(* current processor stack has been assigned *)
	procm[id].sp := CurrentBP ()	(* instead of ESP, just fetch EBP of current procedure (does not contain pointers) *)
END UpdateState;

(** Get kernel stack regions for garbage collection. (for Heaps) *)
PROCEDURE GetKernelStacks*(VAR stack: ARRAY OF Stack);
VAR i: LONGINT;
BEGIN	(* {UpdateState has been called by each processor} *)
	FOR i := 0 TO MaxCPU-1 DO
		stack[i].adr := procm[i].sp;
		stack[i].high := procm[i].stack.high
	END
END GetKernelStacks;

(* Init page tables (paging still disabled until EnableMM is called). *)
PROCEDURE InitPages;
VAR i, j, phys, lTop, mTop: SYSTEM.ADDRESS;
BEGIN
	(* get top of high and low memory *)
	mTop := memTop;
	DEC(mTop, mTop MOD PS);	(* mTop MOD PS = 0 *)
	topPageNum := SYSTEM.LSH(mTop-1, -PSlog2);
	lTop := lowTop;
	DEC(lTop, lTop MOD PS);	(* lTop MOD PS = 0 *)
	(* initialize NewDirectPage and SetHeapEndAdr (get kernel range) *)
	SYSTEM.GET (LinkAdr + EndBlockOfs, heapEndAdr);
(* ug *) (*
	SYSTEM.PUT (heapEndAdr, NIL);	(* set tag to NIL *)
	INC(heapEndAdr, AddressSize);	(* space for NIL *)
*)
(* ug: not needed, extension of heap done in GetStaticHeap anyway
	INC(heapEndAdr, K); (* space for free heap block descriptor of type Heaps.HeapBlockDesc at heapEndAdr, initialization is done in Heaps *)
	INC(heapEndAdr, (-heapEndAdr) MOD PS);	(* round up to page size *)
*)
	pageHeapAdr := mTop;
	freeHighPages := SYSTEM.LSH(pageHeapAdr, -PSlog2) - SYSTEM.LSH(heapEndAdr, -PSlog2);
	IF TraceVerbose THEN
		Trace.String("Kernel: "); Trace.Address (LinkAdr); Trace.String(" .. ");
		Trace.Address (heapEndAdr-1); Trace.Ln;
		Trace.String ("High: "); Trace.Address (heapEndAdr); Trace.String(" .. ");
		Trace.Address (pageHeapAdr-1); Trace.String(" = "); Trace.Int (freeHighPages, 0);
		Trace.StringLn (" free pages")
	END;
	(* initialize empty free page stack *)
	NewDirectPage(pageStackAdr); ASSERT(pageStackAdr # NilAdr);
	SYSTEM.PUT (pageStackAdr+NodeSP, SYSTEM.VAL (SYSTEM.ADDRESS, MinSP));
	SYSTEM.PUT (pageStackAdr+NodeNext, SYSTEM.VAL (SYSTEM.ADDRESS, NilAdr));
	SYSTEM.PUT (pageStackAdr+NodePrev, SYSTEM.VAL (SYSTEM.ADDRESS, NilAdr));
	(* free low pages *)
	freeLowPage := NilAdr; freeLowPages := 0;
	i := lTop DIV PS; j := LowAdr DIV PS;
	IF TraceVerbose THEN
		Trace.String("Low: "); Trace.Address (j*PS); Trace.String (".."); Trace.Address (i*PS-1)
	END;
	REPEAT
		DEC(i); phys := i*PS;
		SYSTEM.PUT (phys, freeLowPage);	(* phys.next := freeLowPage *)
		freeLowPage := phys; INC(freeLowPages)
	UNTIL i = j;
	IF TraceVerbose THEN
		Trace.String(" = "); Trace.Int(freeLowPages, 1); Trace.StringLn (" free pages")
	END;
	totalPages := SYSTEM.LSH(memTop - M + lowTop + dmaSize + PS, -PSlog2);	(* what BIOS gave us *)
	(* stacks *)
	ASSERT((StackAreaAdr MOD MaxUserStackSize = 0) & (StackAreaSize MOD MaxUserStackSize = 0));
	FOR i := 0 TO LEN(freeStack)-1 DO freeStack[i] := {0..SetSize-1} END;
	FOR i := MaxUserStacks TO LEN(freeStack)*SetSize-1 DO EXCL(freeStack[i DIV SetSize], i MOD SetSize) END;
	freeStackIndex := 0;
	(* mappings *)
	mapTop := MapAreaAdr;
	(* create the address space *)
	NewPage(kernelPD); ASSERT(kernelPD # NilAdr);
	Fill32(kernelPD, PTEs*4, PageNotPresent);
	IF ~MapDirect(LowAdr, mTop-LowAdr, LowAdr + UserPage) THEN HALT(99) END	(* map heap direct *)
END InitPages;

(* Generate a memory segment descriptor. type IN {0..7} & dpl IN {0..3}.

type
0	data, expand-up, read-only
1	data, expand-up, read-write
2	data, expand-down, read-only
3	data, expand-down, read-write
4	code, non-conforming, execute-only
5	code, non-conforming, execute-read
6	code, conforming, execute-only
7	code, conforming, execute-read
*)

PROCEDURE GenMemSegDesc(type, base, limit, dpl: LONGINT; page: BOOLEAN; VAR sd: SegDesc);
VAR s: SET;
BEGIN
	sd.low := ASH(base MOD 10000H, 16) + limit MOD 10000H;
	s := SYSTEM.VAL (SET, ASH(ASH(base, -24), 24) + ASH(ASH(limit, -16), 16) +
		ASH(dpl, 13) + ASH(type, 9) + ASH(base, -16) MOD 100H);
	s := s + {12, 15, 22};	(* code/data=1, present=1, 32-bit=1, A=0, AVL=0 *)
	IF page THEN INCL(s, 23) END;	(* page granularity *)
	sd.high := SYSTEM.VAL (LONGINT, s)
END GenMemSegDesc;

(* Generate a TSS descriptor. *)
PROCEDURE GenTSSDesc(base: SYSTEM.ADDRESS; limit, dpl: LONGINT; VAR sd: SegDesc);
VAR s: SET;
BEGIN
	sd.low := ASH(base MOD 10000H, 16) + limit MOD 10000H;
	s := SYSTEM.VAL (SET, ASH(ASH(base, -24), 24) + ASH(ASH(limit, -16), 16) +
		ASH(dpl, 13) + ASH(base, -16) MOD 100H);
	s := s + {8, 11, 15};	(* type=non-busy TSS, present=1, AVL=0, 32-bit=0 *)
	sd.high := SYSTEM.VAL (LONGINT, s)
END GenTSSDesc;

(* Initialize segmentation. *)
PROCEDURE InitSegments;
VAR i: LONGINT;
BEGIN
	(* GDT 0: Null segment *)
	gdt[0].low := 0; gdt[0].high := 0;
	(* GDT 1: Kernel code: non-conforming, execute-read, base 0, limit 4G, PL 0 *)
	GenMemSegDesc(5, 0, M-1, 0, TRUE, gdt[1]);
	(* GDT 2: Kernel stack: expand-up, read-write, base 0, limit 4G, PL 0 *)
	GenMemSegDesc(1, 0, M-1, 0, TRUE, gdt[2]);
	(* GDT 3: User code: conforming, execute-read, base 0, limit 4G, PL 0 *)
	GenMemSegDesc(7, 0, M-1, 0, TRUE, gdt[3]);
	(* GDT 4: User/Kernel data: expand-up, read-write, base 0, limit 4G, PL 3 *)
	GenMemSegDesc(1, 0, M-1, 3, TRUE, gdt[4]);
	(* GDT 5: User stack: expand-down, read-write, base 0, limit 1M, PL 3 *)
	GenMemSegDesc(3, 0, M DIV PS, 3, TRUE, gdt[5]);
	(* GDT TSSOfs..n: Kernel TSS *)
	FOR i := 0 TO MaxCPU-1 DO
		GenTSSDesc(SYSTEM.ADR(procm[i].tss), SYSTEM.SIZEOF(TSSDesc)-1, 0, gdt[TSSOfs+i]);
		procm[i].sp := 0; procm[i].stack.high := 0
	END
END InitSegments;

(* Enable segmentation on the current processor. *)
PROCEDURE EnableSegments;
BEGIN
	LoadGDT(SYSTEM.ADR(gdt[0]), SYSTEM.SIZEOF(GDT)-1);
	LoadSegRegs(DataSel)
END EnableSegments;

(* Allocate a kernel stack. *)
PROCEDURE NewKernelStack(VAR stack: Stack);
VAR phys, virt: SYSTEM.ADDRESS; size: SYSTEM.SIZE;
BEGIN
	size := KernelStackSize;
	NewVirtual(virt, size + PS);	(* add one page for overflow protection *)
	ASSERT(virt # NilAdr, 1502);
	INC(virt, PS);	(* leave page open at bottom *)
	stack.low := virt;
	stack.adr := virt;	(* return stack *)
	REPEAT
		NewPage(phys); ASSERT(phys # NilAdr);
		IF ~MapPage(virt, phys + KernelPage) THEN HALT(99) END;
		DEC(size, PS); INC(virt, PS)
	UNTIL size = 0;
	stack.high := virt
END NewKernelStack;

(* Set task register *)
PROCEDURE -SetTR(tr: SYSTEM.ADDRESS);
CODE {SYSTEM.i386, SYSTEM.Privileged}
	POP EAX
	LTR AX
END SetTR;

(* Enable memory management and switch to new stack in virtual space.

	Stack layout:
			caller1 return
			caller1 EBP	<-- caller0 EBP
			[caller0 locals]
	04	caller0 return
	00	caller0 EBP	<-- EBP
			locals	<-- ESP
*)

PROCEDURE -EnableMM(pd, esp: SYSTEM.ADDRESS);
CODE {SYSTEM.Pentium, SYSTEM.Privileged}
	POP EBX				; esp
	POP EAX				; pd
	MOV CR3, EAX			; page directory ptr

	MOV ECX, [EBP+4]		; caller0 return
	MOV EDX, [EBP]		; caller0 EBP
	MOV EDX, [EDX+4]		; caller1 return

	MOV EAX, CR0
	OR EAX, 80000000H	; set PG bit
	MOV CR0, EAX			; enable virtual addressing (old stack no longer usable)
	JMP 0					; flush queue
	WBINVD

	MOV DWORD  [EBX-4], 0	; not UserStackSel (cf. GetUserStack)
	MOV  [EBX-8], EDX	; caller1 return on new stack
	MOV DWORD  [EBX-12], 0	; caller1 EBP on new stack
	LEA EBP,  [EBX-12]		; new stack top
	MOV ESP, EBP

	JMP ECX				; back to caller0 (whose locals are now inaccessible!)
END EnableMM;

(** -- Initialization -- *)

(** Initialize memory management.
	o every processor calls this once during initialization
	o mutual exclusion with other processors must be guaranteed by the caller
	o interrupts must be off
	o segmentation and paging is enabled
	o return is on the new stack => caller must have no local variables
*)

PROCEDURE InitMemory*;
VAR id: LONGINT;
BEGIN
	EnableSegments;
	(* allocate stack *)
	id := ID ();
	NewKernelStack(procm[id].stack);
	procm[id].sp := 0;
	(* initialize TSS *)
	Fill32(SYSTEM.ADR(procm[id].tss), SYSTEM.SIZEOF(TSSDesc), 0);
	procm[id].tss.ESP0 := procm[id].stack.high;	(* kernel stack org *)
	procm[id].tss.ESS0 := KernelStackSel;
	procm[id].tss.IOBitmapOffset := -1;	(* no bitmap *)
	(* enable paging and switch stack *)
	SetTR(KernelTR + id*8);
	EnableMM(kernelPD, procm[id].tss.ESP0)
END InitMemory;

(** Initialize a boot page for MP booting. Parameter physAdr returns the physical address of a low page. *)
PROCEDURE InitBootPage*(start: Startup; VAR physAdr: SYSTEM.ADDRESS);
CONST BootOfs = 800H;
VAR adr, a: SYSTEM.ADDRESS;
BEGIN
	Acquire(Memory);
	NewLowPage(physAdr);
	Release(Memory);
	ASSERT((physAdr # NilAdr) & (physAdr >= 0) & (physAdr < M) & (physAdr MOD PS = 0));
	adr := physAdr + BootOfs;
	a := adr;	(* from SMP.Asm - generated with BinToCode.Kernel smp.bin 800H *)
	SYSTEM.PUT32(a, 0100012EBH); INC(a, 4); SYSTEM.PUT32(a, 000080000H); INC(a, 4);
	SYSTEM.PUT32(a, 000000000H); INC(a, 4); SYSTEM.PUT32(a, 000170000H); INC(a, 4);
	SYSTEM.PUT32(a, 000000000H); INC(a, 4); SYSTEM.PUT32(a, 0010F2EFAH); INC(a, 4);
	SYSTEM.PUT32(a, 02E08081EH); INC(a, 4); SYSTEM.PUT32(a, 00E16010FH); INC(a, 4);
	SYSTEM.PUT32(a, 0E0010F08H); INC(a, 4); SYSTEM.PUT32(a, 0010F010CH); INC(a, 4);
	SYSTEM.PUT32(a, 0B800EBF0H); INC(a, 4); SYSTEM.PUT32(a, 0D08E0010H); INC(a, 4);
	SYSTEM.PUT32(a, 0C08ED88EH); INC(a, 4); SYSTEM.PUT32(a, 00800BC66H); INC(a, 4);
	SYSTEM.PUT32(a, 033660000H); INC(a, 4); SYSTEM.PUT32(a, 0FF2E66C0H); INC(a, 4);
	SYSTEM.PUT32(a, 09008022EH); INC(a, 4);
	(* these offsets are from the last two dwords in SMP.Asm *)
	SYSTEM.PUT32(adr+2, SYSTEM.VAL (LONGINT, start));	(* not a method *)
	SYSTEM.PUT32(adr+16, SYSTEM.ADR(gdt[0]));
	(* jump at start *)
	SYSTEM.PUT8(physAdr, 0EAX);	(* jmp far *)
	SYSTEM.PUT32(physAdr + 1, ASH(physAdr, 16-4) + BootOfs)	(* seg:ofs *)
END InitBootPage;

(** The BP in a MP system calls this to map the APIC physical address directly. *)
PROCEDURE InitAPICArea*(adr: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
BEGIN
	ASSERT((size = PS) & (adr >= IntelAreaAdr) & (adr+size-1 < IntelAreaAdr+IntelAreaSize));
	IF ~MapDirect(adr, size, adr + UserPage) THEN HALT(99) END
END InitAPICArea;

(* Set machine-dependent parameters gcThreshold, expandMin, heapMinKB and heapMaxKB *)
PROCEDURE SetGCParams*;
VAR size, t: SYSTEM.SIZE;
BEGIN
	GetFreeK(size, t, t);	(* size is total memory size in KB *)
	heapMinKB := size * HeapMin DIV 100;
	heapMaxKB := size * HeapMax DIV 100;
	expandMin := size * ExpandRate DIV 100 * 1024;
	IF expandMin < 0 THEN expandMin := MAX(LONGINT) END;
	gcThreshold := size * Threshold DIV 100 * 1024;
	IF gcThreshold < 0 THEN gcThreshold := MAX(LONGINT) END
END SetGCParams;

(** Get first memory block and first free address, heap area in first memory block is automatically expanded to account for the first
      few calls to NEW  *)
PROCEDURE GetStaticHeap*(VAR beginBlockAdr, endBlockAdr, freeBlockAdr: SYSTEM.ADDRESS);
VAR memBlockAdr, endAdr: SYSTEM.ADDRESS;
BEGIN
	SYSTEM.GET(LinkAdr + EndBlockOfs, freeBlockAdr);
	ASSERT(freeBlockAdr MOD StaticBlockSize = 0);
	memBlockAdr := LinkAdr + HeaderSize + MemoryBlockOfs;
	memBlockHead := SYSTEM.VAL(MemoryBlock, memBlockAdr);
	ASSERT(memBlockHead.beginBlockAdr MOD StaticBlockSize = 0);
	endAdr := memBlockHead.endBlockAdr;
	INC(endAdr, InitialHeapIncrement);
	SetHeapEndAdr(endAdr);	(* in/out parameter *)
	memBlockHead.endBlockAdr := endAdr;
	ASSERT((memBlockHead.endBlockAdr - memBlockHead.beginBlockAdr) MOD StaticBlockSize = 0);
	memBlockTail := memBlockHead;
	beginBlockAdr := memBlockHead.beginBlockAdr;
	endBlockAdr := memBlockHead.endBlockAdr
END GetStaticHeap;

(* returns if an address is a currently allocated heap address *)
PROCEDURE ValidHeapAddress*(p: SYSTEM.ADDRESS): BOOLEAN;
BEGIN
	RETURN GreaterOrEqual(p,memBlockHead.beginBlockAdr) & LessOrEqual(p,memBlockTail.endBlockAdr)
END ValidHeapAddress;

(** Jump from kernel to user mode. Every processor calls this during initialization. *)
PROCEDURE JumpToUserLevel*(userEBP: SYSTEM.ADDRESS);
CODE {SYSTEM.i386}
	PUSH UserStackSel		; SS3
	PUSH DWORD [EBP+userEBP]		; ESP3
	PUSHFD	; EFLAGS3
	PUSH UserCodeSel		; CS3
	CALL L1	; PUSH L1 (EIP3)
L1:
	ADD DWORD [ESP], BYTE 5; adjust EIP3 to L2 (L2-L1 = 5)
	IRETD					; switch to level 3 and continue at following instruction
L2:
	POP EBP				; from level 3 stack (refer to Objects.NewProcess)
	RET 8					; jump to body of first active object
END JumpToUserLevel;

PROCEDURE Ensure32BitAddress*(adr: SYSTEM.ADDRESS): LONGINT;
BEGIN
	RETURN adr
END Ensure32BitAddress;

PROCEDURE Is32BitAddress*(adr: SYSTEM.ADDRESS): BOOLEAN;
BEGIN RETURN SYSTEM.VAL (Address32, adr) = adr;
END Is32BitAddress;

(* Unexpected - Default interrupt handler *)
PROCEDURE Unexpected(VAR state: State);
VAR int: LONGINT; isr, irr: CHAR;
BEGIN
	int := state.INT;
	IF HandleSpurious & ((int >= IRQ0) & (int <= MaxIRQ) OR (int = MPSPU)) THEN	(* unexpected IRQ, get more info *)
		IF (int >= IRQ8) & (int <= IRQ15) THEN
			Portout8 (IntB0, 0BX); Portin8(IntB0, isr);
			Portout8 (IntB0, 0AX); Portin8(IntB0, irr)
		ELSIF (int >= IRQ0) & (int <= IRQ7) THEN
			Portout8 (IntA0, 0BX); Portin8(IntA0, isr);
			Portout8 (IntA0, 0AX); Portin8(IntA0, irr)
		ELSE
			isr := 0X; irr := 0X
		END;
		IF TraceSpurious THEN
			Acquire (TraceOutput);
			Trace.String("INT"); Trace.Int(int, 1);
			Trace.Hex(ORD(isr), -3); Trace.Hex(ORD(irr), -2); Trace.Ln;
			Release (TraceOutput);
		END
	ELSE
		Acquire (TraceOutput);
		Trace.StringLn ("Unexpected interrupt");
		Trace.Memory(SYSTEM.ADR(state), SYSTEM.SIZEOF(State)-4*4);	(* exclude last 4 fields *)
		IF int = 3 THEN	(* was a HALT or ASSERT *)
			(* It seems that no trap handler is installed (Traps not linked), so wait endlessly, while holding trace lock. This should quiten down the system, although other processors may possibly still run processes. *)
			LOOP END
		ELSE
			Release (TraceOutput);
			SetEAX(int);
			HALT(1801)	(* unexpected interrupt *)
		END
	END
END Unexpected;

(* InEnableIRQ - Enable a hardware interrupt (caller must hold module lock). *)
PROCEDURE -InEnableIRQ (int: LONGINT);
CODE {SYSTEM.i386}
	POP EBX
	CMP EBX, IRQ7
	JG cont2
	IN AL, IntA1
	SUB EBX, IRQ0
	BTR EAX, EBX
	OUT IntA1, AL
	JMP end
cont2:
	IN AL, IntB1
	SUB EBX, IRQ8
	BTR EAX, EBX
	OUT IntB1, AL
end:
END InEnableIRQ;

(* InDisableIRQ - Disable a hardware interrupt (caller must hold module lock). *)
PROCEDURE  -InDisableIRQ (int: LONGINT);
CODE {SYSTEM.i386}
	POP EBX
	CMP EBX, IRQ7
	JG cont2
	IN AL, IntA1
	SUB EBX, IRQ0
	BTS EAX, EBX
	OUT IntA1, AL
	JMP end
cont2:
	IN AL, IntB1
	SUB EBX, IRQ8
	BTS EAX, EBX
	OUT IntB1, AL
end:
END InDisableIRQ;

(** EnableIRQ - Enable a hardware interrupt (also done automatically by InstallHandler). *)
PROCEDURE EnableIRQ* (int: LONGINT);
BEGIN
	ASSERT((int >= IRQ0) & (int <= IRQ15) & (int # IRQ2));
	Acquire(Interrupts);	(* protect interrupt mask register *)
	InEnableIRQ(int);
	Release(Interrupts)
END EnableIRQ;

(** DisableIRQ - Disable a hardware interrupt. *)
PROCEDURE DisableIRQ* (int: LONGINT);
BEGIN
	ASSERT((int >= IRQ0) & (int <= IRQ15) & (int # IRQ2));
	Acquire(Interrupts);	(* protect interrupt mask register *)
	InDisableIRQ(int);
	Release(Interrupts)
END DisableIRQ;

(** InstallHandler - Install interrupt handler & enable IRQ if necessary.
	On entry to h interrupts are disabled and may be enabled with Sti. After handling the interrupt
	the state of interrupts are restored. The acknowledgement of a hardware interrupt is done automatically.
	IRQs are mapped from IRQ0 to MaxIRQ. *)
PROCEDURE InstallHandler* (h: Handler; int: LONGINT);
VAR (* n: HandlerList; *) i: LONGINT; unexpected: Handler;
BEGIN
	ASSERT(default.valid);	(* initialized *)
	ASSERT(int # IRQ2);	(* IRQ2 is used for cascading and remapped to IRQ9 *)
	Acquire(Interrupts);
		(* FieldInterrupt may traverse list while it is being modified *)
	i := 0;
	unexpected := Unexpected;
	IF intHandler[int, 0].handler # unexpected THEN
		WHILE (i < MaxNumHandlers - 1) & intHandler[int, i].valid DO
			INC(i)
		END;
		IF i < MaxNumHandlers - 1 THEN
			intHandler[int, i].valid := TRUE;
			intHandler[int, i].handler := h;
		ELSE
			Acquire(TraceOutput);
			Trace.String("Machine.InstallHandler: handler could not be installed for interrupt "); Trace.Int(int,  0);
			Trace.String(" - too many handlers per interrupt number"); Trace.Ln;
			Release(TraceOutput)
		END
	ELSE
		intHandler[int, 0].handler := h;
		IF (int >= IRQ0) & (int <= IRQ15) THEN InEnableIRQ(int) END
	END;
	Release(Interrupts)
END InstallHandler;

(** RemoveHandler - Uninstall interrupt handler & disable IRQ if necessary *)
PROCEDURE RemoveHandler* (h: Handler; int: LONGINT);
VAR (* p, c: HandlerList; *) i, j, foundIndex: LONGINT;
BEGIN
	ASSERT(default.valid);	(* initialized *)
	Acquire(Interrupts);
	(* find h *)
	i := 0;
	foundIndex := -1;
	WHILE  (i < MaxNumHandlers - 1) & intHandler[int, i].valid DO
		IF intHandler[int, i].handler = h THEN foundIndex := i END;
		INC(i)
	END;
	IF foundIndex # -1 THEN
		(* h found -> copy interrupt handlers higher than foundIndex *)
		FOR j := foundIndex TO i - 2 DO
			intHandler[int, j] := intHandler[int, j + 1]
		END
	END;
	IF ~intHandler[int, 0].valid THEN
		(* handler h was the only interrupt handler for interrupt int -> install the default handler *)
		intHandler[int, 0] := default;
		IF (int >= IRQ0) & (int <= IRQ15) THEN DisableIRQ(int) END
	END;
	Release(Interrupts)
END RemoveHandler;

(*
PROCEDURE ListIntHandlers*;
VAR i, j, highest: LONGINT; handler: Handler;
BEGIN
	highest := 0;
	FOR i := 0 TO IDTSize - 1 DO
		j := 0;
		WHILE (j < MaxNumHandlers - 1) & intHandler[i, j].valid DO INC(j) END;
		Trace.String("int = "); Trace.Int(i, 3); Trace.String(" # installed handlers = "); Trace.Int(j, 0);
		IF j = 1 THEN
			handler := Unexpected;
			IF intHandler[i, 0].handler = handler THEN
				Trace.String(" default handler installed")
			END
		END;
		Trace.Ln;
		IF j > highest THEN highest := j END;
	END;
	Trace.String("highest # installed handlers = "); Trace.Int(highest, 0); Trace.Ln
END ListIntHandlers;
*)

(* Get control registers. *)
PROCEDURE GetCR0to4(VAR cr: ARRAY OF LONGINT);
CODE {SYSTEM.Pentium, SYSTEM.Privileged}
	MOV EDI, [EBP+cr]
	MOV EAX, CR0
	XOR EBX, EBX		; CR1 is not documented
	MOV ECX, CR2
	MOV EDX, CR3
	MOV [EDI], EAX
	MOV [EDI+4], EBX
	MOV [EDI+8], ECX
	MOV [EDI+12], EDX
	MOV EAX, CR4		; Pentium only
	MOV [EDI+16], EAX
END GetCR0to4;

(* GetDR0to7 - Get debug registers. *)
PROCEDURE GetDR0to7(VAR dr: ARRAY OF LONGINT);
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EDI, [EBP+dr]
	MOV EAX, DR0
	MOV EBX, DR1
	MOV ECX, DR2
	MOV EDX, DR3
	MOV [EDI], EAX
	MOV [EDI+4], EBX
	MOV [EDI+8], ECX
	MOV [EDI+12], EDX
	XOR EAX, EAX		; DR4 is not documented
	XOR EBX, EBX		; DR5 is not documented
	MOV ECX, DR6
	MOV EDX, DR7
	MOV [EDI+16], EAX
	MOV [EDI+20], EBX
	MOV [EDI+24], ECX
	MOV [EDI+28], EDX
END GetDR0to7;

(* GetSegments - Get segment registers. *)
PROCEDURE GetSegments(VAR ss, es, ds, fs, gs: LONGINT);
CODE {SYSTEM.i386}
	XOR EAX, EAX
	MOV EBX, [EBP+ss]
	MOV AX, SS
	MOV [EBX], EAX
	MOV EBX, [EBP+es]
	MOV AX, ES
	MOV [EBX], EAX
	MOV EBX, [EBP+ds]
	MOV AX, DS
	MOV [EBX], EAX
	MOV EBX, [EBP+fs]
	MOV AX, FS
	MOV [EBX], EAX
	MOV EBX, [EBP+gs]
	MOV AX, GS
	MOV [EBX], EAX
END GetSegments;

(* CLTS - Clear task-switched flag. *)
PROCEDURE -CLTS;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	CLTS
END CLTS;

(* GetFPU - Store floating-point environment (28 bytes) and mask all floating-point exceptions. *)
PROCEDURE  -GetFPU(adr: SYSTEM.ADDRESS);
CODE {SYSTEM.i386, SYSTEM.FPU}
	POP EBX
	FNSTENV [EBX]	; also masks all exceptions
	FWAIT
END GetFPU;

(* CR2 - Get page fault address. *)
PROCEDURE -CR2* (): SYSTEM.ADDRESS;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EAX, CR2
END CR2;

(** GetExceptionState - Get exception state from interrupt state (and switch on interrupts). *)
PROCEDURE GetExceptionState* (VAR int: State; VAR exc: ExceptionState);
VAR id: LONGINT; level0: BOOLEAN;
BEGIN
	(* save all state information while interrupts are still disabled *)
	exc.halt := -int.INT; id := ID ();
	IF int.INT = PF THEN exc.pf := CR2 () ELSE exc.pf := 0 END;
	GetCR0to4(exc.CR);
	GetDR0to7(exc.DR);
	CLTS;	(* ignore task switch flag *)
	IF int.INT = MF THEN
		GetFPU(SYSTEM.ADR(exc.FPU[0]));
		int.PC := SYSTEM.VAL (SYSTEM.ADDRESS, exc.FPU[3]);	(* modify PC according to FPU info *)
		(* set halt code according to FPU info *)
		IF 2 IN exc.FPU[1] THEN exc.halt := -32		(* division by 0 *)
		ELSIF 3 IN exc.FPU[1] THEN exc.halt := -33	(* overflow *)
		ELSIF 0 IN exc.FPU[1] THEN exc.halt := -34	(* operation invalid *)
		ELSIF 6 IN exc.FPU[1] THEN exc.halt := -35	(* stack fault *)
		ELSIF 1 IN exc.FPU[1] THEN exc.halt := -36	(* denormalized *)
		ELSIF 4 IN exc.FPU[1] THEN exc.halt := -37	(* underflow *)
		ELSIF 5 IN exc.FPU[1] THEN exc.halt := -38	(* precision loss *)
		ELSE	(* {exc.halt = -16} *)
		END
	ELSE
		Fill32(SYSTEM.ADR(exc.FPU[0]), LEN(exc.FPU)*SYSTEM.SIZEOF(SET), 0)
	END;
	SetupFPU;
	level0 := (int.CS MOD 4 = KernelLevel);
	IF int.INT = BP THEN	(* breakpoint (HALT) *)
		IF level0 THEN
			exc.halt := int.SP	(* get halt code *)
			(* if HALT(MAX(INTEGER)), leave halt code on stack when returning, but not serious problem.*)
		ELSE
			SYSTEM.GET (int.SP, exc.halt);	(* get halt code from outer stack *)
			IF exc.halt >= MAX(INTEGER) THEN INC (int.SP, AddressSize) END	(* pop halt code from outer stack *)
		END;
		IF exc.halt < MAX(INTEGER) THEN DEC (int.PC) END;	(* point to the INT 3 instruction (assume 0CCX, not 0CDX 3X) *)
	ELSIF int.INT = OVF THEN	(* overflow *)
		DEC (int.PC)	(* point to the INTO instruction (assume 0CEX, not 0CDX 4X) *)
	ELSIF int.INT = PF THEN	(* page fault *)
		IF int.PC = 0 THEN	(* reset PC to return address of indirect CALL to 0 *)
			IF level0 THEN int.PC := int.SP (* ret adr *) ELSE SYSTEM.GET (int.SP, int.PC) END
		END
	END;
	(* get segment registers *)
	GetSegments(exc.SS, exc.ES, exc.DS, exc.FS, exc.GS);
	IF level0 THEN	(* from same level, no ESP, SS etc. on stack *)
		exc.SP := SYSTEM.ADR(int.SP)	(* stack was here when interrupt happened *)
	ELSE	(* from outer level *)
		exc.SP := int.SP; exc.SS := int.SS
	END
END GetExceptionState;

(* FieldInterrupt and FieldIRQ *)

(*
	At entry to a Handler procedure the stack is as follows:
		72	--	.GS
		68	--	.FS
		64	--	.DS
		60	--	.ES	; or haltcode
		-- if (VMBit IN .FLAGS) --
		56	--	.SS
		52	--	.ESP	; or haltcode
		-- (VMBit IN .EFLAGS) OR (CS MOD 4 < .CS MOD 4) --
		48	--	.EFLAGS
		44	--	.CS
		40	--	.EIP	; rest popped by IRETD
		36	--	.ERR/EBP	; pushed by processor or glue code, popped by POP EBP
		32	--	.INT	<-- .ESP0	; pushed by glue code, popped by POP EBP
		28	--	.EAX
		24	--	.ECX
		20	--	.EDX
		16	--	.EBX
		12	--	.ESP0
		08	--	.BP/ERR	; exchanged by glue code
		04	--	.ESI
		00	24	.EDI	<--- state: State
		--	20	ptr
		--	16	object pointer for DELEGATE
		--	12	TAG(state)
		--	08	ADR(state)
		--	04	EIP'	(RET to FieldInterrupt)
		--	00	EBP'	<-- EBP
		--	--	locals	<-- ESP
*)

PROCEDURE FieldInterrupt;
CODE {SYSTEM.i386}	; 3 bytes implicit code skipped: PUSH EBP; MOV EBP, ESP
entry:
	PUSHAD			; save all registers (EBP = error code)
	LEA EBP, [ESP+36]	; procedure link (for correct tracing of interrupt procedures)

	MOV EBX, [ESP+32]	; EBX = int number
	IMUL EBX, EBX, MaxNumHandlers
	IMUL EBX, EBX, 12
	LEA EAX, intHandler
	ADD EAX, EBX		; address of intHandler[int, 0]

loop:					; call all handlers for the interrupt
	MOV ECX, ESP
	PUSH EAX			; save ptr for table
	PUSH stateTag		; TAG(state)
	PUSH ECX			; ADR(state)
	MOV EBX, [EAX+8]
	CMP EBX, 0
	JE nodelegate
	PUSH EBX			; object pointer for DELEGATE
nodelegate:
	CALL DWORD [EAX+4]	; call handler
	CLI					; handler may have re-enabled interrupts
	POP EAX
	ADD EAX, 12
	MOV EBX, [EAX]
	CMP EBX, 0
	JNE loop

	POPAD				; now EBP = error code
	POP EBP			; now EBP = INT
	POP EBP			; now EBP = caller EBP
	IRETD
END FieldInterrupt;

PROCEDURE FieldIRQ;
CODE {SYSTEM.i386}	; 3 bytes implicit code skipped: PUSH EBP; MOV EBP, ESP
entry:
	PUSHAD			; save all registers (EBP = error code)
	LEA EBP, [ESP+36]	; procedure link (for correct tracing of interrupt procedures)

;	PUSH [ESP+32]		; int number
;	CALL traceInterruptIn

	MOV EBX, [ESP+32]	; EBX = int number
	IMUL EBX, EBX, MaxNumHandlers
	IMUL EBX, EBX, 12
	LEA EAX, intHandler
	ADD EAX, EBX		; address of intHandler[int, 0]

loop:					; call all handlers for the interrupt
	MOV ECX, ESP
	PUSH EAX			; save ptr for linked list
	PUSH stateTag		; TAG(state)
	PUSH ECX			; ADR(state)
	MOV EBX, [EAX+8]
	CMP EBX, 0
	JE nodelegate
	PUSH EBX			; object pointer for DELEGATE
nodelegate:
	CALL DWORD [EAX+4]	; call handler
	CLI					; handler may have re-enabled interrupts
	POP EAX
	ADD EAX, 12
	MOV EBX, [EAX]
	CMP EBX, 0
	JNE loop

;	PUSH [ESP+32]		; int number
;	CALL traceInterruptOut

; ack interrupt
	MOV AL, 20H		; undoc PC ed. 2 p. 1018
	CMP BYTE [ESP+32], IRQ8
	JB irq0
	OUT IntB0, AL		; 2nd controller
irq0:
	OUT IntA0, AL		; 1st controller

	POPAD				; now EBP = error code
	POP EBP			; now EBP = INT
	POP EBP			; now EBP = caller EBP
	IRETD
END FieldIRQ;

(* LoadIDT - Load interrupt descriptor table *)
PROCEDURE LoadIDT(base: SYSTEM.ADDRESS; size: SYSTEM.SIZE);
CODE {SYSTEM.i386, SYSTEM.Privileged}
	SHL DWORD [EBP+size], 16
	MOV EBX, 2
	LIDT [EBP+EBX+size]
END LoadIDT;

(** Init - Initialize interrupt handling. Called once during initialization. Uses NEW. *)

(*
	The glue code is:
	entry0:	; entry point for interrupts without error code
		PUSH 0	; fake error code
	entry1:	; entry point for interrupts with error code
		XCHG [ESP], EBP	; exchange error code and caller EBP
		PUSH int	; interrupt number
		JMP FieldInterrupt:entry
*)

PROCEDURE InitInterrupts*;
VAR a: SYSTEM.ADDRESS; o, i: LONGINT; p: PROCEDURE; mask: SET;
BEGIN
	stateTag := SYSTEM.TYPECODE(State);
	(* initialise 8259 interrupt controller chips *)
	Portout8 (IntA0, 11X); Portout8 (IntA1, CHR(IRQ0));
	Portout8 (IntA1, 4X); Portout8 (IntA1, 1X); Portout8 (IntA1, 0FFX);
	Portout8 (IntB0, 11X); Portout8 (IntB1, CHR(IRQ8));
	Portout8 (IntB1, 2X); Portout8 (IntB1, 1X); Portout8 (IntB1, 0FFX);
	(* enable interrupts from second interrupt controller, chained to line 2 of controller 1 *)
	Portin8(IntA1, SYSTEM.VAL (CHAR, mask));
	EXCL(mask, IRQ2-IRQ0);
	Portout8 (IntA1, SYSTEM.VAL (CHAR, mask));

(*
	NEW(default); default.next := NIL; default.handler := Unexpected;
*)
(*
	newrec (SYSTEM.VAL (ANY, default), SYSTEM.TYPECODE (HandlerList));
*)
(*	default.next := NIL; default.handler := Unexpected; *)
	default.valid := TRUE; default.handler := Unexpected;

	FOR i := 0 TO IDTSize-1 DO	(* set up glue code *)
		intHandler[i, 0] := default; o := 0;
		(* PUSH error code, int num & regs *)
		glue[i][o] := 6AX; INC (o); glue[i][o] := 0X; INC (o);	(* PUSH 0	; {o = 2} *)
		glue[i][o] := 87X; INC (o); glue[i][o] := 2CX; INC (o); glue[i][o] := 24X; INC (o);	(* XCHG [ESP], EBP *)
		glue[i][o] := 6AX; INC (o); glue[i][o] := CHR(i); INC (o);	(* PUSH i *)
		IF (i >= IRQ0) & (i <= IRQ15) THEN p := FieldIRQ ELSE p := FieldInterrupt END;
		a := SYSTEM.VAL (SYSTEM.ADDRESS, p) + 3 - (SYSTEM.ADR(glue[i][o]) + 5);
		glue[i][o] := 0E9X; INC (o);	(* JMP FieldInterrupt.entry *)
		SYSTEM.PUT32 (SYSTEM.ADR(glue[i][o]), a);

		(* set up IDT entry *)
		IF (i > 31) OR ~(i IN {8, 10..14, 17}) THEN a := SYSTEM.ADR(glue[i][0])	(* include PUSH 0 *)
		ELSE a := SYSTEM.ADR(glue[i][2])	(* skip PUSH 0, processor supplies error code *)
		END;
		idt[i].offsetBits0to15 := SHORT(a MOD 10000H);
		(* IRQ0 must be at level 0 because time slicing in Objects needs to set interrupted process' ESP *)
		(* all irq's are handled at level 0, because of priority experiment in Objects.FieldIRQ *)
		IF TRUE (* (i < IRQ0) OR (i > IRQ15) OR (i = IRQ0) OR (i = IRQ0 + 1)*) THEN
			idt[i].selector := KernelCodeSel;	(* gdt[1] -> non-conformant segment => level 0 *)
			idt[i].gateType := SYSTEM.VAL (INTEGER, 0EE00H)	(* present, DPL 3, system, 386 interrupt *)
		ELSE	(* {IRQ0..IRQ15} - {IRQ0 + 1} *)
			idt[i].selector := UserCodeSel;	(* gdt[3] -> conformant segment => level 0 or 3 *)
			idt[i].gateType := SYSTEM.VAL (INTEGER, 08E00H)	(* present, DPL 0, system, 386 interrupt *)
		END;
		idt[i].offsetBits16to31 := SHORT(a DIV 10000H)
	END
END InitInterrupts;

(** Start - Start handling interrupts. Every processor calls this once during initialization. *)
PROCEDURE Start*;
BEGIN
	ASSERT(default.valid);	(* initialized *)
	LoadIDT(SYSTEM.ADR(idt[0]), SYSTEM.SIZEOF(IDT)-1);
	Sti
END Start;

(* Return current instruction pointer *)
PROCEDURE CurrentPC* (): SYSTEM.ADDRESS;
CODE {SYSTEM.i386}
	MOV EAX, [EBP+4]
END CurrentPC;

(* Return current frame pointer *)
PROCEDURE -CurrentBP* (): SYSTEM.ADDRESS;
CODE {SYSTEM.i386}
	MOV EAX, EBP
END CurrentBP;

(* Set current frame pointer *)
PROCEDURE -SetBP* (bp: SYSTEM.ADDRESS);
CODE {SYSTEM.i386}
	POP EBP
END SetBP;

(* Return current stack pointer *)
PROCEDURE -CurrentSP* (): SYSTEM.ADDRESS;
CODE {SYSTEM.i386}
	MOV EAX, ESP
END CurrentSP;

(* Set current stack pointer *)
PROCEDURE -SetSP* (sp: SYSTEM.ADDRESS);
CODE {SYSTEM.i386}
	POP ESP
END SetSP;

(* Save minimal FPU state (for synchronous process switches). *)
(* saving FPU state takes 108 bytes memory space, no alignment required *)
PROCEDURE -FPUSaveMin* (VAR state: SSEState);
CODE {SYSTEM.i386, SYSTEM.FPU}
	POP EAX
	FNSTCW [EAX]	; control word is at state[0]
	FWAIT
END FPUSaveMin;

(* Restore minimal FPU state. *)
PROCEDURE -FPURestoreMin* (VAR state: SSEState);
CODE {SYSTEM.i386, SYSTEM.FPU}
	POP EAX
	FLDCW [EAX]	; control word is at state[0]
END FPURestoreMin;

(* Save full FPU state (for asynchronous process switches). *)
PROCEDURE -FPUSaveFull* (VAR state: SSEState);
CODE {SYSTEM.i386, SYSTEM.FPU}
	POP EAX
	FSAVE [EAX]
END FPUSaveFull;

(* Restore full FPU state. *)
PROCEDURE -FPURestoreFull* (VAR state: SSEState);
CODE {SYSTEM.i386, SYSTEM.FPU}
	POP EAX
	FRSTOR [EAX]
END FPURestoreFull;

(* stateAdr must be the address of a 16-byte aligned memory area of at least 512 bytes *)
PROCEDURE -SSESaveFull* (stateAdr: SYSTEM.ADDRESS);
CODE {SYSTEM.P2, SYSTEM.FPU, SYSTEM.SSE2}
	POP EAX
	FXSAVE [EAX]
	FWAIT
	FNINIT
END SSESaveFull;

PROCEDURE -SSERestoreFull* (stateAdr: SYSTEM.ADDRESS);
CODE {SYSTEM.P2, SYSTEM.FPU, SYSTEM.SSE2}
	POP EAX
	FXRSTOR [EAX]
END SSERestoreFull;

PROCEDURE -SSESaveMin* (stateAdr: SYSTEM.ADDRESS);
CODE {SYSTEM.i386, SYSTEM.FPU, SYSTEM.SSE2}
	POP EAX
	FNSTCW [EAX]
	FWAIT
	STMXCSR [EAX+24]
END SSESaveMin;

PROCEDURE -SSERestoreMin* (stateAdr: SYSTEM.ADDRESS);
CODE {SYSTEM.i386, SYSTEM.FPU, SYSTEM.SSE2}
	POP EAX
	FLDCW [EAX]
	LDMXCSR [EAX+24]
END SSERestoreMin;

(* Helper functions for SwitchTo. *)
PROCEDURE -PushState* (CONST state: State);
CODE {SYSTEM.i386}
	POP EAX	; ADR (state)
	POP EBX	; TYPECODE (state), ignored
	PUSH DWORD [EAX+48]	; FLAGS
	PUSH DWORD [EAX+44]	; CS
	PUSH DWORD [EAX+40]	; PC
	PUSH DWORD [EAX+28]	; EAX
	PUSH DWORD [EAX+24]	; ECX
	PUSH DWORD [EAX+20]	; EDX
	PUSH DWORD [EAX+16]	; EBX
	PUSH DWORD 0	; ignored
	PUSH DWORD [EAX+36]	; BP
	PUSH DWORD [EAX+4]	; ESI
	PUSH DWORD [EAX+0]	; EDI
END PushState;

PROCEDURE -JumpState*;
CODE {SYSTEM.i386}
	POPAD
	IRETD
END JumpState;

PROCEDURE -CallLocalIPC*;
CODE {SYSTEM.i386}
	INT MPIPCLocal
END CallLocalIPC;

PROCEDURE -HLT*;
CODE {SYSTEM.i386, SYSTEM.Privileged}
	STI	; (* required according to ACPI 2.0 spec section 8.2.2 *)
	HLT
END HLT;

PROCEDURE -GetEAX*(): LONGINT;
CODE{SYSTEM.i386}
END GetEAX;

PROCEDURE -GetECX*(): LONGINT;
CODE{SYSTEM.i386}
	MOV EAX,ECX
END GetECX;

PROCEDURE -GetESI*(): LONGINT;
CODE{SYSTEM.i386}
	MOV EAX,ESI
END GetESI;

PROCEDURE -GetEDI*(): LONGINT;
CODE{SYSTEM.i386}
	MOV EAX,EDI
END GetEDI;


PROCEDURE -SetEAX*(n: LONGINT);
CODE{SYSTEM.i386}	POP EAX
END SetEAX;

PROCEDURE -SetEBX*(n: LONGINT);
CODE{SYSTEM.i386}
	POP EBX
END SetEBX;

PROCEDURE -SetECX*(n: LONGINT);
CODE{SYSTEM.i386}
	POP ECX
END SetECX;

PROCEDURE -SetEDX*(n: LONGINT);
CODE{SYSTEM.i386}
	POP EDX
END SetEDX;

PROCEDURE -SetESI*(n: LONGINT);
CODE{SYSTEM.i386}
	POP ESI
END SetESI;

PROCEDURE -SetEDI*(n: LONGINT);
CODE{SYSTEM.i386}
	POP EDI
END SetEDI;

PROCEDURE  Portin8*(port: LONGINT; VAR val: CHAR);
CODE{SYSTEM.i386}
	MOV EDX,[EBP+port]
	IN AL, DX
	MOV ECX, [EBP+val]
	MOV [ECX], AL
END Portin8;

PROCEDURE  Portin16*(port: LONGINT; VAR val: INTEGER);
CODE{SYSTEM.i386}
	MOV EDX,[EBP+port]
	IN AX, DX
	MOV ECX, [EBP+val]
	MOV [ECX], AX
END Portin16;

PROCEDURE  Portin32*(port: LONGINT; VAR val: LONGINT);
CODE{SYSTEM.i386}
	MOV EDX,[EBP+port]
	IN EAX, DX
	MOV ECX, [EBP+val]
	MOV [ECX], EAX
END Portin32;

PROCEDURE  Portout8*(port: LONGINT; val: CHAR);
CODE{SYSTEM.i386}
	MOV AL,[EBP+val]
	MOV EDX,[EBP+port]
	OUT DX,AL
END Portout8;

PROCEDURE  Portout16*(port: LONGINT; val: INTEGER);
CODE{SYSTEM.i386}
	MOV AX,[EBP+val]
	MOV EDX,[EBP+port]
	OUT DX,AX
END Portout16;

PROCEDURE  Portout32*(port: LONGINT; val: LONGINT);
CODE{SYSTEM.i386}
	MOV EAX,[EBP+val]
	MOV EDX,[EBP+port]
	OUT DX,EAX
END Portout32;

(* Kernel mode upcall to perform global processor halt. *)
PROCEDURE KernelCallHLT*;
CODE {SYSTEM.i386}
	MOV EAX, 2
	INT MPKC
END KernelCallHLT;

(* Parse processor entry in MP config table. *)
PROCEDURE CPUID1*(): LONGINT;
CODE {SYSTEM.i386, SYSTEM.Pentium}
	MOV EAX, 1
	CPUID
	MOV EAX, EBX
END CPUID1;

(** -- Atomic operations -- *)

(** Atomic INC(x). *)
PROCEDURE -AtomicInc*(VAR x: LONGINT);
CODE {SYSTEM.i386}
	POP EAX
	LOCK
	INC DWORD [EAX]
END AtomicInc;

(** Atomic DEC(x). *)
PROCEDURE -AtomicDec*(VAR x: LONGINT);
CODE {SYSTEM.i386}
	POP EAX
	LOCK
	DEC DWORD [EAX]
END AtomicDec;

(** Atomic EXCL. *)
PROCEDURE  AtomicExcl* (VAR s: SET; bit: LONGINT);
CODE {SYSTEM.i386}
	MOV EAX, [EBP+bit]
	MOV EBX, [EBP+s]
	LOCK
	BTR [EBX], EAX
END AtomicExcl;

(** Atomic INC(x, y). *)
PROCEDURE -AtomicAdd*(VAR x: LONGINT; y: LONGINT);
CODE {SYSTEM.i386}
	POP EBX
	POP EAX
	LOCK
	ADD DWORD [EAX], EBX
END AtomicAdd;

(** Atomic test-and-set. Set x = TRUE and return old value of x. *)
PROCEDURE -AtomicTestSet*(VAR x: BOOLEAN): BOOLEAN;
CODE {SYSTEM.i386}
	POP EBX
	MOV AL, 1
	XCHG [EBX], AL
END AtomicTestSet;

(* Atomic compare-and-swap. Set x = new if x = old and return old value of x *)
PROCEDURE -AtomicCAS* (VAR x: LONGINT; old, new: LONGINT): LONGINT;
CODE {SYSTEM.i386}
	POP EBX		; new
	POP EAX		; old
	POP ECX		; address of x
	DB 0F0X, 00FX, 0B1X, 019X	; LOCK CMPXCHG [ECX], EBX; atomicly compare x with old and set it to new if equal
END AtomicCAS;

PROCEDURE CopyState* (CONST from: State; VAR to: State);
BEGIN
	to.EDI := from.EDI; to.ESI := from.ESI;
	to.EBX := from.EBX; to.EDX := from.EDX;
	to.ECX := from.ECX; to.EAX := from.EAX;
	to.BP := from.BP; to.PC := from.PC;
	to.CS := from.CS; to.FLAGS := from.FLAGS;
	to.SP := from.SP
END CopyState;


(* function returning the number of processors that are available to Aos *)
PROCEDURE NumberOfProcessors*( ): LONGINT;
BEGIN
	RETURN numberOfProcessors
END NumberOfProcessors;

(*! non portable code, for native Aos only *)
PROCEDURE SetNumberOfProcessors*(num: LONGINT);
BEGIN
	numberOfProcessors := num;
END SetNumberOfProcessors;

(* function for changing byte order *)
PROCEDURE ChangeByteOrder* (n: LONGINT): LONGINT;
CODE { SYSTEM.Pentium }
	MOV EAX, [EBP+n]				; load n in eax
	BSWAP EAX						; swap byte order
END ChangeByteOrder;

(* Write a value to the APIC. *)
PROCEDURE  ApicPut(ofs: SYSTEM.SIZE; val: SET);
BEGIN
	IF TraceApic THEN
		Acquire(TraceOutput);
		Trace.Hex(ofs, SYSTEM.SIZEOF(SYSTEM.SIZE)*2); Trace.String(" := "); Trace.Hex(SYSTEM.VAL (LONGINT, val), 9); Trace.Ln;
		Release(TraceOutput);
	END;
	SYSTEM.PUT(localAPIC+ofs, SYSTEM.VAL (LONGINT, val))
END ApicPut;

(* Read a value from the APIC. *)
PROCEDURE ApicGet(ofs: SYSTEM.SIZE): SET;
VAR val: SET;
BEGIN
	SYSTEM.GET(localAPIC+ofs, SYSTEM.VAL (LONGINT, val));
	IF TraceApic THEN
		Acquire(TraceOutput);
		Trace.String(" ("); Trace.Hex(ofs, SYSTEM.SIZEOF(SYSTEM.SIZE)*2); Trace.String(" = ");
		Trace.Hex(SYSTEM.VAL(LONGINT, val), 9); Trace.StringLn (")");
		Release(TraceOutput);
	END;
	RETURN val
END ApicGet;

(* Handle interprocessor interrupt. During upcall interrupts are off and processor is at kernel level. *)
PROCEDURE HandleIPC(VAR state: State);
VAR id: LONGINT;
BEGIN
	id := ID();
	IF ~TraceProcessor OR (id IN allProcessors) THEN
		IF FrontBarrier IN ipcFlags THEN
			AtomicExcl(ipcFrontBarrier, id);
			WHILE ipcFrontBarrier # {} DO SpinHint END	(* wait for all *)
		END;
		ipcHandler(id, state, ipcMessage);	(* interrupts off and at kernel level *)
		IF BackBarrier IN ipcFlags THEN
			AtomicExcl(ipcBackBarrier, id);
			WHILE ipcBackBarrier # {} DO SpinHint END	(* wait for all *)
		END;
		AtomicExcl(ipcBusy, id)	(* ack - after this point we do not access shared variables for this broadcast *)
	END;
	IF state.INT = MPIPC THEN
		ApicPut(0B0H, {})	(* EOI (not needed for NMI or local call, see 7.4.10.6) *)
	END
END HandleIPC;

(* Handle MP error interrupt. *)
PROCEDURE HandleError(VAR state: State);
VAR esr: SET; (* int: LONGINT; *)
BEGIN
	(* int := state.INT; *) esr := ApicGet(280H);
	ApicPut(0B0H, {});	(* EOI *)
	HALT(2302)	(* SMP error *)
END HandleError;

(* Interprocessor broadcasting. Lock level SMP. *)
PROCEDURE LocalBroadcast(h: BroadcastHandler; msg: Message; flags: SET);
BEGIN
	IF Self IN flags THEN ipcBusy := allProcessors
	ELSE ipcBusy := allProcessors - {ID()}
	END;
	ipcFrontBarrier := ipcBusy; ipcBackBarrier := ipcBusy;
	ipcHandler := h; ipcMessage := msg; ipcFlags := flags;
	IF numProcessors > 1 THEN	(* ICR: Fixed, Physical, Edge, All Excl. Self, INT IPC *)
		ApicPut(300H, {18..19} + SYSTEM.VAL (SET, MPIPC));
		(*REPEAT UNTIL ~(12 IN ApicGet(300H))*)	(* wait for send to finish *)
	END;
	IF Self IN flags THEN CallLocalIPC END;	(* "send" to self also *)
	WHILE ipcBusy # {} DO SpinHint END;	(* wait for all to ack before we release locks *)
	ipcHandler := NIL; ipcMessage := NIL	(* no race, because we have IPC lock *)
END LocalBroadcast;

(** Broadcast an operation to all processors. *)
PROCEDURE Broadcast* (h: BroadcastHandler; msg: Message; flags: SET);
BEGIN
	Acquire(Processors);
	LocalBroadcast(h, msg, flags);
	Release(Processors)
END Broadcast;

(* Start all halted processors. *)	(* Lock level Processors. *)
PROCEDURE StartAll*;
BEGIN
	Acquire(Processors);	(* wait for any pending Stops to finish, and disallow further Stops *)
	ASSERT(stopped & (ipcBusy = {}));
	ipcBusy := allProcessors - {ID()};
	stopped := FALSE;
	WHILE ipcBusy # {} DO SpinHint END;	(* wait for all to ack *)
	Release(Processors)
END StartAll;

PROCEDURE HandleFlushTLB(id: LONGINT; CONST state: State; msg: Message);
CODE {SYSTEM.i386, SYSTEM.Privileged}
	MOV EAX, CR3
	MOV CR3, EAX
END HandleFlushTLB;

(** Flush the TLBs on all processors (multiprocessor-safe). *)
PROCEDURE GlobalFlushTLB;
BEGIN
	Acquire(Processors);
	LocalBroadcast(HandleFlushTLB, NIL, {Self, FrontBarrier, BackBarrier});
	Release(Processors)
END GlobalFlushTLB;

PROCEDURE HandleFlushCache(id: LONGINT; CONST state: State; msg: Message);
CODE {SYSTEM.Pentium, SYSTEM.Privileged}
	WBINVD	; write back and invalidate internal cache and initiate write back and invalidation of external caches
END HandleFlushCache;

(** Flush the caches on all processors (multiprocessor-safe). *)
PROCEDURE GlobalFlushCache;
BEGIN
	Acquire(Processors);
	LocalBroadcast(HandleFlushCache, NIL, {Self, FrontBarrier, BackBarrier});
	Release(Processors)
END GlobalFlushCache;

(* Activate the garbage collector in single-processor mode. Lock level ALL. *)
PROCEDURE HandleKernelCall(VAR state: State);
BEGIN	(* level 0 *)
	IF IFBit IN state.FLAGS THEN
		Sti	(* re-enable interrupts *)
	END;
	CASE state.EAX OF	(* see KernelCall* *)
		|2:	(* HLT *)
			IF IFBit IN state.FLAGS THEN
				HLT
			END
	END
END HandleKernelCall;

(*
(** Activate the garbage collector immediately (multiprocessor-safe). *)
PROCEDURE GlobalGC*;
BEGIN
	Acquire(Processors);
	gcBarrier := allProcessors;
	LocalBroadcast(HandleGC, NIL, {Self, BackBarrier});
	Release(Processors);
END GlobalGC;
*)

PROCEDURE HandleGetTimestamp(id: LONGINT; CONST state: State; msg: Message);
BEGIN
	time[id] := GetTimer()
END HandleGetTimestamp;

(** Get timestamp on all processors (for testing). *)
PROCEDURE GlobalGetTimestamp;
VAR t: TimeArray; i: LONGINT; mean, var, n: HUGEINT;
BEGIN
	Acquire(Processors);
	LocalBroadcast(HandleGetTimestamp, NIL, {Self, FrontBarrier});
	LocalBroadcast(HandleGetTimestamp, NIL, {Self, FrontBarrier});
	t := time;
	Release(Processors);
	Acquire (TraceOutput);
	FOR i := 0 TO numProcessors-1 DO Trace.HIntHex(t[i], 17) END;
	IF numProcessors > 1 THEN
		mean := 0;
		n := numProcessors;
		FOR i := 0 TO numProcessors-1 DO
			INC (mean, t[i])
		END;
		mean := DivH(mean, n);
		var := 0;
		FOR i := 0 TO numProcessors-1 DO
			n := t[i] - mean;
			INC (var, MulH(n, n))
		END;
		var := DivH(var, numProcessors - 1);
		Trace.String(" mean="); Trace.HIntHex(mean, 16);
		Trace.String(" var="); Trace.HIntHex(var, 16);
		Trace.String(" var="); Trace.Int(SHORT (var), 1);
		Trace.String(" diff:");
		FOR i := 0 TO numProcessors-1 DO
			Trace.Int(SHORT (t[i] - mean), 1); Trace.Char(" ")
		END
	END;
	Release (TraceOutput);
END GlobalGetTimestamp;

PROCEDURE ParseProcessor(adr: SYSTEM.ADDRESS);
VAR id, idx, signature, family, feat, ver, log: LONGINT; flags: SET; string : ARRAY 8 OF CHAR;
BEGIN
	SYSTEM.GET(adr, SYSTEM.VAL (LONGINT, flags));
	id := ASH(SYSTEM.VAL (LONGINT, flags * {8..15}), -8);
	ver := ASH(SYSTEM.VAL (LONGINT, flags * {16..23}), -16);
	SYSTEM.GET (adr+4, signature);
	family := ASH(signature, -8) MOD 10H;
	SYSTEM.GET (adr+8, feat);
	idx := -1;
	IF (family # 0) & (signature MOD 1000H # 0FFFH) & (24 IN flags) & (id < LEN(idMap)) & (idMap[id] = -1) THEN
		IF 25 IN flags THEN idx := 0	(* boot processor *)
		ELSIF numProcessors < maxProcessors THEN idx := numProcessors; INC(numProcessors)
		ELSE	(* skip *)
		END
	END;
	IF idx # -1 THEN apicVer[idx] := ver; idMap[id] := SHORT(SHORT(idx)) END;
	Trace.String("   Processor "); Trace.Int(id, 1);
	Trace.String(", APIC"); Trace.Hex(ver, -3);
	Trace.String(", ver "); Trace.Int(family, 1);
	Trace.Char("."); Trace.Int(ASH(signature, -4) MOD 10H, 1);
	Trace.Char("."); Trace.Int(signature MOD 10H, 1);
	Trace.String(", features "); Trace.Hex(feat, 9);
	Trace.String(", ID "); Trace.Int(idx, 1);
	IF (threadsPerCore > 1) THEN Trace.String(" ("); Trace.Int(threadsPerCore, 0); Trace.String(" threads)"); END;
	Trace.Ln;
	IF (threadsPerCore > 1)  THEN
		GetConfig("DisableHyperthreading", string);
		IF (string = "1") THEN
			Trace.String("Machine: Hyperthreading disabled."); Trace.Ln;
			RETURN;
		END;
		log := (SYSTEM.LSH(CPUID1(), -16) MOD 256);
		WHILE log > 1 DO
			INC(id); DEC(log);
			IF numProcessors < maxProcessors THEN
				idx := numProcessors; INC(numProcessors);
				apicVer[idx] := ver; idMap[id] := SHORT(SHORT(idx))
			END
		END
	END
END ParseProcessor;

(* Parse MP configuration table. *)
PROCEDURE ParseMPConfig;
VAR adr, x: SYSTEM.ADDRESS; i: LONGINT; entries: INTEGER; ch: CHAR; s: SET; str: ARRAY 8 OF CHAR;
BEGIN
	localAPIC := 0; numProcessors := 1; allProcessors := {0};
	FOR i := 0 TO LEN(idMap)-1 DO idMap[i] := -1 END;	(* all unassigned *)
	FOR i := 0 TO MaxCPU-1 DO started[i] := FALSE END;
	adr := configMP;
	GetConfig("MaxProcs", str);
	i := 0; maxProcessors := StrToInt(i, str);
	IF maxProcessors = 0 THEN maxProcessors := MaxCPU END;
	IF (maxProcessors > 0) & (adr # NilAdr) THEN	(* MP config table present, possible multi-processor *)
		Trace.String("Machine: Intel MP Spec "); Trace.Int(ORD(revMP) DIV 10H + 1, 1);
		Trace.Char("."); Trace.Int(ORD(revMP) MOD 10H, 1); Trace.Ln;
		IF TraceVerbose THEN
			IF ODD(ASH(ORD(featureMP[1]), -7)) THEN
				Trace.StringLn (" PIC mode");
				(* to do: enable SymIO *)
			ELSE
				Trace.StringLn (" Virtual wire mode");
			END
		END;
		IF featureMP[0] # 0X THEN	(* pre-defined configuration *)
			Trace.String("   Default config "); Trace.Int(ORD(featureMP[0]), 1); Trace.Ln;
			localAPIC := SHORT(0FEE00000H);
			apicVer[0] := 0; apicVer[1] := 0
		ELSE	(* configuration defined in table *)
			MapPhysical(adr, 68*1024, adr);	(* 64K + 4K header *)
			SYSTEM.GET (adr, x); ASSERT(x = 504D4350H);	(* check signature *)
			SYSTEM.GET (adr+4, x);	(* length *)
			ASSERT(ChecksumMP(adr, x MOD 10000H) = 0);
			IF TraceVerbose THEN
				Trace.String(" ID: ");
				FOR x := adr+8 TO adr+27 DO
					SYSTEM.GET (x, ch); Trace.Char(ch);
					IF x = adr+15 THEN Trace.Char(" ") END
				END;
				Trace.Ln
			END;
			localAPIC := 0; SYSTEM.GET(adr+36, SYSTEM.VAL (LONGINT, localAPIC));
			IF TraceVerbose THEN Trace.String("  Local APIC:"); Trace.Address (localAPIC); Trace.Ln END;
			SYSTEM.GET (adr+34, entries);
			INC(adr, 44);	(* skip header *)
			WHILE entries > 0 DO
				SYSTEM.GET (adr, ch);	(* type *)
				CASE ORD(ch) OF
					0:	(* processor *)
						ParseProcessor(adr);
						INC(adr, 20)
					|1:	(* bus *)
						IF TraceVerbose THEN
							SYSTEM.GET (adr+1, ch);
							Trace.String(" Bus "); Trace.Int(ORD(ch), 1); Trace.String(": ");
							FOR x := adr+2 TO adr+7 DO SYSTEM.GET (x, ch); Trace.Char(ch) END;
							Trace.Ln
						END;
						INC(adr, 8)
					|2:	(* IO APIC *)
						IF TraceVerbose THEN
							SYSTEM.GET (adr+1, ch); Trace.String(" IO APIC ID:"); Trace.Hex(ORD(ch), -3);
							SYSTEM.GET (adr+2, ch); Trace.String(", version "); Trace.Int(ORD(ch), 1);
							SYSTEM.GET(adr, SYSTEM.VAL (LONGINT, s)); IF ~(24 IN s) THEN Trace.String(" (disabled)") END;
							Trace.Ln
						END;
						INC(adr, 8)
					|3:	(* IO interrupt assignment *)
						INC(adr, 8)
					|4:	(* Local interrupt assignment *)
						INC(adr, 8)
				END;	(* CASE *)
				DEC(entries)
			END
		END
	END;
	IF localAPIC = 0 THEN	(* single processor *)
		Trace.StringLn ("Machine: Single-processor");
		apicVer[0] := 0
	END;
	started[0] := TRUE;
	FOR i := 0 TO MaxCPU-1 DO revIDmap[i] := -1 END;
	FOR i := 0 TO LEN(idMap)-1 DO
		x := idMap[i];
		IF x # -1 THEN
			ASSERT(revIDmap[x] = -1);	(* no duplicate APIC ids *)
			revIDmap[x] := SHORT(SHORT(i))
		END
	END;
	(* timer configuration *)
	GetConfig("TimerRate", str);
	i := 0; timerRate := StrToInt(i, str);
	IF timerRate = 0 THEN timerRate := 1000 END;
	IF TraceProcessor THEN
		GetConfig("TraceProc", str);
		i := 0; traceProcessor := StrToInt(i, str) # 0
	END
END ParseMPConfig;

(* Return the current average measured bus clock speed in Hz. *)
PROCEDURE GetBusClockRate(): LONGINT;
VAR timer: LONGINT; t: LONGINT;
BEGIN
	t := ticks;
	REPEAT UNTIL ticks # t;	(* wait for edge *)
	timer := ticks + ClockRateDelay;
	ApicPut(380H, SYSTEM.VAL (SET, MAX(LONGINT)));	(* initial count *)
	REPEAT UNTIL timer - ticks <= 0;
	t := MAX(LONGINT) - SYSTEM.VAL (LONGINT, ApicGet(390H));	(* current count *)
	IF t <= MAX(LONGINT) DIV 1000 THEN
		RETURN 1000 * t DIV ClockRateDelay
	ELSE
		RETURN t DIV ClockRateDelay * 1000
	END
END GetBusClockRate;

(* Initialize APIC timer for timeslicing. *)
PROCEDURE InitMPTimer;
VAR rate: LONGINT;
BEGIN
	IF timerRate > 0 THEN
		ApicPut(3E0H, {0,1,3});	(* divide by 1 *)
		ApicPut(320H, {16} + SYSTEM.VAL (SET, MPTMR));	(* masked, one-shot *)
		rate := GetBusClockRate();
		busHz0[ID()] := rate;
		rate := (rate+500000) DIV 1000000 * 1000000;	(* round to nearest MHz *)
		busHz1[ID()] := rate;
		ApicPut(320H, {17} + SYSTEM.VAL (SET, MPTMR));	(* unmasked, periodic *)
		ApicPut(380H, SYSTEM.VAL (SET, rate DIV timerRate))	(* initial count *)
	END
END InitMPTimer;

(* Handle multiprocessor timer interrupt. *)
PROCEDURE HandleMPTimer(VAR state: State);
BEGIN	(* {interrupts off} *)
	timer(ID(), state);
	ApicPut(0B0H, {});	(* EOI *)
	Sti;	(* enable interrupts before acquiring locks below - to avoid deadlock with StopAll. *)
	Timeslice(state)	(* fixme: check recursive interrupt *)
END HandleMPTimer;

(* Handle uniprocessor timer interrupt. *)
PROCEDURE HandleUPTimer(VAR state: State);
BEGIN	(* {interrupts off} *)
	timer(0, state);
	Sti;	(* enable interrupts before acquiring locks below - to avoid deadlock with StopAll. *)
	Timeslice(state)
END HandleUPTimer;

PROCEDURE DummyEvent(id: LONGINT; CONST state: State);
END DummyEvent;

(** Install a processor timer event handler. *)
PROCEDURE InstallEventHandler* (h: EventHandler);
BEGIN
	IF h # NIL THEN timer := h ELSE timer := DummyEvent END
END InstallEventHandler;

(* Initialize APIC for current processor. *)
PROCEDURE InitAPIC;
BEGIN
	(* enable APIC, set focus checking & set spurious interrupt handler *)
	ASSERT(MPSPU MOD 16 = 15);	(* low 4 bits set, p. 7-29 *)
	ApicPut(0F0H, {8} + SYSTEM.VAL (SET, MPSPU));
	(* set error interrupt handler *)
	ApicPut(370H, SYSTEM.VAL (SET, MPERR));
	InitMPTimer
END InitAPIC;

(* Start processor activity. *)
PROCEDURE StartMP;
VAR id: LONGINT; state: State;
BEGIN	(* running at kernel level with interrupts on *)
	InitAPIC;
	id := ID();	(* timeslicing is disabled, as we are running at kernel level *)
	Acquire (TraceOutput);
	Trace.String ("   P"); Trace.Int(id, 1); Trace.StringLn (" running");
	Release (TraceOutput);
	IF TraceProcessor & traceProcessor & (id = numProcessors-1) THEN
		DEC(numProcessors)	(* exclude from rest of activity *)
	ELSE
		INCL(allProcessors, id)
	END;
	(* synchronize with boot processor - end of mutual exclusion *)
	started[id] := TRUE;
	IF TraceProcessor & ~(id IN allProcessors) THEN
		Acquire (TraceOutput);
		Trace.String ("   P"); Trace.Int(id, 1); Trace.StringLn (" tracing");
		Release (TraceOutput);
		LOOP
			IF traceProcessorProc # NIL THEN traceProcessorProc(id, state) END;
			SpinHint
		END
	END;
	(* wait until woken up *)
	WHILE stopped DO SpinHint END;
	(* now fully functional, including storage allocation *)
	AtomicExcl(ipcBusy, id);	(* ack *)
	Acquire (TraceOutput);
	Trace.String ("   P"); Trace.Int(id, 1); Trace.StringLn(" scheduling");
	Release (TraceOutput);
	ASSERT(id = ID());	(* still running on same processor *)
	start;
END StartMP;

(* Subsequent processors start executing here. *)
PROCEDURE EnterMP;
(* no local variables allowed, because stack is switched. *)
BEGIN	(* running at kernel level with interrupts off *)
	InitProcessor;
	InitMemory;	(* switch stack *)
	Start;
	StartMP
END EnterMP;

(* Start another processor. *)
PROCEDURE StartProcessor(phys: SYSTEM.ADDRESS; apicid: LONGINT; startup: BOOLEAN);
VAR j, k: LONGINT; s: SET; timer: LONGINT;
BEGIN
	(* clear APIC errors *)
	ApicPut(280H, {}); s := ApicGet(280H);
	(* assert INIT *)
	ApicPut(310H, SYSTEM.VAL (SET, ASH(apicid, 24)));	(* set destination *)
	ApicPut(300H, {8, 10, 14, 15});	(* set Dest, INIT, Phys, Assert, Level *)
	timer := ticks + 5;		(* > 200us *)
	REPEAT UNTIL timer - ticks <= 0;
	(* deassert INIT *)
	ApicPut(310H, SYSTEM.VAL (SET, ASH(apicid, 24)));	(* set destination *)
	ApicPut(300H, {8, 10, 15});	(* set Dest, INIT, Deassert, Phys, Level *)
	IF startup THEN	(* send STARTUP if required *)
		j := 0; k := 2;
		WHILE j # k DO
			ApicPut(280H, {});
			ApicPut(310H, SYSTEM.VAL (SET, ASH(apicid, 24)));	(* set destination *)
			(* set Dest, Startup, Deassert, Phys, Edge *)
			ApicPut(300H, {9, 10} + SYSTEM.VAL (SET, phys DIV 4096 MOD 256));
			timer := ticks + 10;	(* ~10ms *)
			REPEAT UNTIL timer - ticks <= 0;
			IF ~(12 IN ApicGet(300H)) THEN	(* idle *)
				IF ApicGet(280H) * {0..3, 5..7} = {} THEN k := j	(* ESR success, exit *)
				ELSE INC(j)	(* retry *)
				END
			ELSE INC(j)	(* retry *)
			END
		END
	END
END StartProcessor;

(* Boot other processors, one at a time. *)
PROCEDURE BootMP;
VAR phys, page0Adr: SYSTEM.ADDRESS; i: LONGINT; timer: LONGINT;
BEGIN
	stopped := TRUE; ipcBusy := {};	(* other processors can be woken with StartAll *)
	InitBootPage(EnterMP, phys);
	MapPhysical(0, 4096, page0Adr);	(* map in BIOS data area *)
	Acquire(TraceOutput); Trace.String("Machine: Booting processors... "); Trace.Ln; Release(TraceOutput);
	FOR i := 1 TO numProcessors-1 DO
		(* set up booting for old processor types that reset on INIT & don't understand STARTUP *)
		SYSTEM.PUT (page0Adr + 467H, ASH(phys, 16-4));
		PutNVByte(15, 0AX);	(* shutdown status byte *)
		(* attempt to start another processor *)
		Acquire(TraceOutput); Trace.String("   P0 starting P"); Trace.Int(i, 1); Trace.Ln; Release(TraceOutput);
		StartProcessor(phys, revIDmap[i], apicVer[i] >= 10H);	(* try booting processor i *)
		(* wait for CPU to become active *)
		timer := ticks + 5000;	(* ~5s timeout *)
		REPEAT SpinHint UNTIL started[i] OR (timer - ticks <= 0);
		(* end of mutual exclusion *)
		Acquire(TraceOutput);
		IF started[i] THEN
			Trace.String("   P0 recognized P"); Trace.Int(i, 1);
		ELSE
			Trace.String("   P0 timeout on P"); Trace.Int(i, 1);
		END;
		Trace.Ln;
		Release(TraceOutput);
	END;
	SYSTEM.PUT (page0Adr + 467H, SYSTEM.VAL (LONGINT, 0));
	UnmapPhysical(page0Adr, 4096);
	PutNVByte(15, 0X)	(* restore shutdown status *)
END BootMP;

(* Timer interrupt handler. *)
PROCEDURE TimerInterruptHandler(VAR state: State);
BEGIN
	INC(ticks);
	DEC(eventCount);
	IF eventCount = 0 THEN
		eventCount := eventMax; event(state)
	END
END TimerInterruptHandler;

PROCEDURE Dummy(VAR state: State);
END Dummy;

PROCEDURE InitTicks;
CONST Div = (2*TimerClock + Second) DIV (2*Second);	(* timer clock divisor *)
BEGIN
	eventCount := 0; eventMax := 0; event := Dummy;
	(* initialize timer hardware *)
	ASSERT(Div <= 65535);
	Portout8(43H, 34X); Wait;	(* mode 2, rate generator *)
	Portout8(40H, CHR(Div MOD 100H)); Wait;
	Portout8(40H, CHR(ASH(Div, -8)));
	InstallHandler(TimerInterruptHandler, IRQ0)
END InitTicks;

(* Set timer upcall. The handler procedure will be called at a rate of Second/divisor Hz. *)
PROCEDURE InstallTickHandler(handler: Handler; divisor: LONGINT);
BEGIN
	eventMax := divisor; event := handler;
	eventCount := eventMax
END InstallTickHandler;

(* Initialize processors *)
PROCEDURE InitProcessors*;
BEGIN
	traceProcessor := FALSE; traceProcessorProc := NIL;
	ASSERT(Second = 1000);	(* use of Machine.ticks *)
	InitTicks;
	timer := DummyEvent;
	ParseMPConfig;
	InstallHandler(HandleIPC, MPIPCLocal);
	IF localAPIC # 0 THEN	(* APIC present *)
		InitAPICArea(localAPIC, 4096);
		InitAPICIDAdr(localAPIC+20H, idMap);
		ASSERT(MPSPU MOD 16 = 15);	(* use default handler (see 7.4.11.1) *)
		InstallHandler(HandleError, MPERR);
		InstallHandler(HandleMPTimer, MPTMR);
		InstallHandler(HandleIPC, MPIPC);
		InitAPIC;
		IF numProcessors > 1 THEN BootMP END
	ELSE
		IF timerRate > 0 THEN
			InstallTickHandler(HandleUPTimer, Second DIV timerRate)
		END
	END;
	InstallHandler(HandleKernelCall, MPKC);
END InitProcessors;

(* Send and print character *)
PROCEDURE TraceChar (c: CHAR);
VAR status: SHORTINT;

	(* Scroll the screen by one line. *)
	PROCEDURE Scroll;
	VAR adr: SYSTEM.ADDRESS; off: SYSTEM.SIZE;
	BEGIN
		adr := traceBase + TraceLen;
		SYSTEM.MOVE (adr, adr - TraceLen, TraceSize - TraceLen);
		adr := traceBase + TraceSize - TraceLen;
		FOR off := 0 TO TraceLen - SYSTEM.SIZEOF(INTEGER) BY SYSTEM.SIZEOF(INTEGER) DO SYSTEM.PUT16 (adr + off, 100H * 7H + 32) END
	END Scroll;

BEGIN
	IF TraceV24 IN traceMode THEN
		REPEAT	(* wait until port is ready to accept a character *)
		Portin8 (SHORT(tracePort + 5), SYSTEM.VAL(CHAR,status))
		UNTIL ODD (status DIV 20H);	(* THR empty *)
		Portout8 (SHORT(tracePort), c);
	END;
	IF TraceScreen IN traceMode THEN
		IF c = 9X THEN c := 20X END;
		IF c = 0DX THEN	(* CR *)
			DEC (tracePos, tracePos MOD TraceLen)
		ELSIF c = 0AX THEN	(* LF *)
			IF tracePos < TraceSize THEN
				INC (tracePos, TraceLen)	(* down to next line *)
			ELSE
				Scroll
			END
		ELSE
			IF tracePos >= TraceSize THEN
				Scroll;
				DEC (tracePos, TraceLen)
			END;
			SYSTEM.PUT16 (traceBase + tracePos, 100H * traceColor + ORD (c));
			INC (tracePos, SYSTEM.SIZEOF(INTEGER))
		END
	END
END TraceChar;

(* Change color *)
PROCEDURE TraceColor (c: SHORTINT);
BEGIN traceColor := c;
END TraceColor;

(* Initialise tracing. *)
PROCEDURE InitTrace;
CONST MaxPorts = 8;
VAR i, p, bps: LONGINT; off: SYSTEM.SIZE; s, name: ARRAY 32 OF CHAR;
	baselist: ARRAY MaxPorts OF LONGINT;
BEGIN
	GetConfig ("TraceMode", s);
	p := 0; traceMode := SYSTEM.VAL (SET, StrToInt (p, s));
	IF TraceScreen IN traceMode THEN
		GetConfig ("TraceMem", s);
		p := 0; traceBase := SYSTEM.VAL (SYSTEM.ADDRESS, StrToInt (p, s));
		IF traceBase = 0 THEN traceBase := 0B8000H END;	(* default screen buffer *)
		FOR off := 0 TO TraceSize - SYSTEM.SIZEOF(INTEGER) BY SYSTEM.SIZEOF(INTEGER) DO SYSTEM.PUT16 (traceBase + off, 100H * 7H + 32) END;
		tracePos := 0;
		Portout8(3D4H, 0EX);
		Portout8(3D5H, CHR((TraceWidth*TraceHeight) DIV 100H));
		Portout8(3D4H, 0FX);
		Portout8(3D5H, CHR((TraceWidth*TraceHeight) MOD 100H))
	END;
	IF TraceV24 IN traceMode THEN
		FOR i := 0 TO MaxPorts - 1 DO
			COPY ("COMx", name); name[3] := CHR (ORD ("1") + i);
			GetConfig (name, s); p := 0; baselist[i] := StrToInt (p, s);
		END;
		IF baselist[0] = 0 THEN baselist[0] := 3F8H END;	(* COM1 port default values *)
		IF baselist[1] = 0 THEN baselist[1] := 2F8H END;	(* COM2 port default values *)
		GetConfig("TracePort", s); p := 0; p := StrToInt(p, s); DEC(p);
		IF (p >= 0) & (p < MaxPorts) THEN tracePort := baselist[p] ELSE tracePort := baselist[0] END;
		ASSERT(tracePort > 0);
		GetConfig("TraceBPS", s); p := 0; bps := StrToInt(p, s);
		IF bps <= 0 THEN bps := 38400 END;
		Portout8 (SHORT(tracePort + 3), 80X);	(* Set the Divisor Latch Bit - DLAB = 1 *)
		bps := 115200 DIV bps;	(* compiler DIV/PORTOUT bug workaround *)
		Portout8 (SHORT(tracePort + 1), CHR (bps DIV 100H));	(* Set the Divisor Latch MSB *)
		Portout8 (SHORT(tracePort), CHR (bps MOD 100H));	(* Set the Divisor Latch LSB *)
		Portout8 (SHORT(tracePort + 3), 3X);	(* 8N1 *)
		Portout8 (SHORT(tracePort + 4), 3X);	(* Set DTR, RTS on in the MCR *)
		Portout8 (SHORT(tracePort + 1), 0X);	(* Disable receive interrupts *)
	END;
	traceColor := 7; Trace.Char := TraceChar; Trace.Color := TraceColor;
END InitTrace;

PROCEDURE InitHandlers;
VAR i,j: LONGINT;
BEGIN
	FOR i := 0 TO IDTSize - 1 DO
		FOR j := 0 TO MaxNumHandlers - 1 DO
			intHandler[i, j].valid := FALSE;
			intHandler[i, j].handler := NIL
		END
	END;
END InitHandlers;

PROCEDURE - PushBootValues;
CODE{SYSTEM.i386}
	PUSH EDI
	PUSH ESI
	PUSH EAX
END PushBootValues;

PROCEDURE - GetBootValue(): LONGINT;
CODE{SYSTEM.i386}
	POP EAX
END GetBootValue;

(* Init code called from OBL. EAX = boot table offset. ESI, EDI=initRegs. 2k stack is available. No trap handling. *)
BEGIN
	PushBootValues;
	bootFlag := GetBootValue();
	initRegs[0] := GetBootValue();
	initRegs[1] := GetBootValue();
	(* registers 6 and 7 get converted to 32 bit, cf. PCB.Assigne
	SYSTEM.GETREG(6, initRegs[0]); SYSTEM.GETREG(7, initRegs[1]);	(* initRegs0 & initRegs1 *)
	*)
	SYSTEM.PUT16(0472H, 01234H);	(* soft boot flag, for when we reboot *)
	ReadBootTable(bootFlag);
	InitTrace;
	Trace.String("Machine: "); Trace.Blue;Trace.StringLn (Version); Trace.Default;
	CheckMemory;
	SearchMP;
	AllocateDMA;	(* must be called after SearchMP, as lowTop is modified *)
	version := Version;
	InitBoot;
	InitProcessor;

	InitLocks;

	NmaxUserStacks := MaxUserStacks;
	ASSERT(ASH(1, PSlog2) = PS);
	Trace.String("Machine: Enabling MMU... ");
	InitSegments;	(* enable flat segments *)
	InitPages;	(* create page tables *)
	InitMemory;	(* switch on segmentation, paging and switch stack *)
	Trace.Green; Trace.StringLn("Ok"); Trace.Default;
	InitHandlers; (* no FOR loops here as they create local variables, collision with GetEAX in first statement *)
	default.valid := FALSE;	(* initialized later *)
END Machine.

(*
03.03.1998	pjm	First version
30.06.1999	pjm	ProcessorID moved to AosProcessor
*)

(**
Notes

This module defines an interface to the boot environment of the system. The facilities provided here are only intended for the lowest levels of the system, and should never be directly imported by user modules (exceptions are noted below). They are highly specific to the system hardware and firmware architecture.

Typically a machine has some type of firmware that performs initial testing and setup of the system. The firmware initiates the operating system bootstrap loader, which loads the boot file. This module is the first module in the statically linked boot file that gets control.

There are two more-or-less general procedures in this module: GetConfig and StrToInt. GetConfig is used to query low-level system settings, e.g., the location of the boot file system. StrToInt is a utility procedure that parses numeric strings.

Config strings:

ExtMemSize	Specifies size of extended memory (above 1MB) in MB. This value is not checked for validity. Setting it false may cause the system to fail, possible after running for some time. The memory size is usually detected automatically, but if the detection does not work for some reason, or if you want to limit the amount of memory detected, this string can be set. For example, if the machine has 64MB of memory, this value can be set as ExtMemSize="63".
*)