summaryrefslogtreecommitdiffstats
path: root/private/ntos/nthals/halcbus/i386/cbus1bt.asm
diff options
context:
space:
mode:
Diffstat (limited to 'private/ntos/nthals/halcbus/i386/cbus1bt.asm')
-rw-r--r--private/ntos/nthals/halcbus/i386/cbus1bt.asm314
1 files changed, 314 insertions, 0 deletions
diff --git a/private/ntos/nthals/halcbus/i386/cbus1bt.asm b/private/ntos/nthals/halcbus/i386/cbus1bt.asm
new file mode 100644
index 000000000..5f36b92ad
--- /dev/null
+++ b/private/ntos/nthals/halcbus/i386/cbus1bt.asm
@@ -0,0 +1,314 @@
+ title "MP primitives for the Corollary Cbus machines"
+;++
+;
+;Copyright (c) 1992, 1993, 1994 Corollary Inc.
+;
+;Module Name:
+;
+; cbus1bt.asm
+;
+;Abstract:
+;
+; Corollary Cbus1 Boot Code
+;
+; This module implements the low-level highly cache
+; architecture dependent code to boot the additional
+; processors in the Corollary Cbus1 based machines.
+
+; This consists of two functions which are exactly the
+; same (Cbus1Boot1 & Cbus1Boot2). The calling code
+; determines which one is safe to call (depending on the
+; linker, sometimes both may be ok). The reason for this
+; is that the boot processor fills in the reset vector at
+; 0xFFFFFFF0 for the next processor and that cache line
+; must not be inadvertently flushed before the next processor
+; gets out of reset to see where to go (it's filled in with
+; a real-mode jmp cs:ip). Note that this code is highly
+; dependent on the linker placing all this code contiguous
+; and the hardware architecture of the Corollary L2 caches.
+; unless the system is fully populated, memory will not exist
+; at 0xFFFFFFF0. hence, we must ensure that the cacheline is
+; not evicted until the processor has done the jump!
+
+
+; the order of Cbus1Boot1, ciboot, and Cbus1Boot2 is critical.
+; Cbus1Boot1 and Cbus1Boot2 must be separated by Cbus1BootCPU;
+; Cbus1Boot1 must be defined before Cbus1Boot2.
+; the size of all three must be less than 4K.
+
+; WARNING!!! WARNING!!! WARNING!!!
+
+; do not put any routines between Cbus1Boot1 and Cbus1Boot2. there
+; are tricky games being played with the write back caches so
+; that StartVector[] does not get flushed.
+
+;
+;Author:
+;
+; Landy Wang (landy@corollary.com) 23-Jun-1993
+;
+;Environment:
+; Kernel mode.
+;
+;--
+
+
+
+.386p
+ .xlist
+include hal386.inc
+include callconv.inc ; calling convention macros
+
+ .list
+
+INIT SEGMENT DWORD PUBLIC 'CODE' ; Start 32 bit code
+ ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING
+
+;++
+;
+; VOID
+; Cbus1Boot1 (
+; IN ULONG Processor,
+; IN PQUAD Dest,
+; IN PQUAD Source,
+; IN ULONG ResetAddress,
+; IN ULONG ResetValue
+; )
+;
+;
+; Routine Description:
+;
+; Clear reset on the specified logical processor number, setting his
+; reset vector to point at the specified code location. The Dest
+; generally points at a reset vector, and thus, unless the system is
+; fully populated, memory will not exist at that address. hence, we
+; must ensure that the cacheline is not evicted until the processor
+; has done the jump!
+;
+; Arguments:
+;
+; Processor - Supplies a logical processor number
+;
+; Dest - Supplies the address of the reset vector where the code below
+; will go.
+;
+; Source - Supplies startup code for this processor, currently a 5 byte
+; intrasegment jump, ie: "jmp cs:ip"
+;
+; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest
+; and Source must point at arrays of 8 bytes each).
+;
+; ResetAddress - Supplies the address to poke to clear reset
+;
+; ResetValue - Supplies the value to poke to clear reset
+;
+; Return Value:
+;
+; None.
+;--
+
+ProcessorNumber equ dword ptr [ebp+8] ; zero based
+Destination equ dword ptr [ebp+12]
+Source equ dword ptr [ebp+16]
+ResetAddress equ dword ptr [ebp+20]
+ResetValue equ dword ptr [ebp+24]
+
+cPublicProc _Cbus1Boot1 ,5
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ ;
+ ; set up all variables to be used after the cache line
+ ; initialization. this is because we want to load up
+ ; our register variables with these values and avoid
+ ; memory references. see the comment below.
+ ;
+
+ mov eax, PCR[PcStallScaleFactor] ; get per microsecond
+ ; loop count for the processor
+
+ mov ecx, 40 ; 40 microsecond stall
+ mul ecx ; (eax) = desired loop count
+
+ mov edx, ResetAddress
+ mov ebx, ResetValue
+
+ mov esi, Source ; point at the source code
+
+ mov ecx, dword ptr [esi] ; get first dword into a reg
+ mov esi, dword ptr [esi+4] ; and 2nd dword into a reg
+
+ mov edi, Destination
+
+ ;
+ ; now start filling in the cache line for the processor coming out
+ ; of reset. no memory references which may flush this cache line
+ ; can be made after the below fill UNTIL the booting processor
+ ; has read the line. (the only memory references made here in this
+ ; critical time period is the code fetching, but our caller has
+ ; already determined that none of the code in this function could
+ ; cause the cache line to be flushed).
+ ;
+
+ mov dword ptr [edi], ecx ; 1st dword now in the cacheline
+ mov dword ptr [edi+4], esi ; and 2nd dword now in
+
+ ;
+ ; cache line is initialized, we must let it get flushed now, or
+ ; the additional processor will fly blind.
+ ;
+
+ mov byte ptr [edx], bl ; clear reset
+
+ ;
+ ; wait approximately 40 microseconds, but don't call
+ ; KeStallExecutionProcessor() as this might flush the
+ ; cache line prematurely. inline the function instead.
+ ;
+
+ align 4
+@@:
+ sub eax, 1 ; (eax) = (eax) - 1
+ jnz short @b
+
+
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ stdRET _Cbus1Boot1
+
+stdENDP _Cbus1Boot1
+
+ ;
+ ; force enough spacing between the two boot functions so
+ ; that at least one of them will always be safe to call.
+ ; currently that would be 16 bytes (the current cache line
+ ; size), but make it bigger so any of our OEMs will be safe
+ ; even if they modify the size of the cache line.
+ ;
+
+ public _Cbus1Boot1End
+_Cbus1Boot1End label byte
+ db 64 dup (?)
+
+;++
+;
+; VOID
+; Cbus1Boot2 (
+; IN ULONG Processor,
+; IN PQUAD Dest,
+; IN PQUAD Source,
+; IN ULONG ResetAddress,
+; IN ULONG ResetValue
+; )
+;
+;
+; Routine Description:
+;
+; Clear reset on the specified logical processor number, setting his
+; reset vector to point at the specified code location. The Dest
+; generally points at a reset vector, and thus, unless the system is
+; fully populated, memory will not exist at that address. hence, we
+; must ensure that the cacheline is not evicted until the processor
+; has done the jump!
+;
+; Arguments:
+;
+; Processor - Supplies a logical processor number
+;
+; Dest - Supplies the address of the reset vector where the code below
+; will go.
+;
+; Source - Supplies startup code for this processor, currently a 5 byte
+; intrasegment jump, ie: "jmp cs:ip"
+;
+; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest
+; and Source must point at arrays of 8 bytes each).
+;
+; ResetAddress - Supplies the address to poke to clear reset
+;
+; ResetValue - Supplies the value to poke to clear reset
+;
+; Return Value:
+;
+; None.
+;--
+
+cPublicProc _Cbus1Boot2 ,5
+ push ebp
+ mov ebp, esp
+ push ebx
+ push esi
+ push edi
+
+ ;
+ ; set up all variables to be used after the cache line
+ ; initialization. this is because we want to load up
+ ; our register variables with these values and avoid
+ ; memory references. see the comment below.
+ ;
+
+ mov eax, PCR[PcStallScaleFactor] ; get per microsecond
+ ; loop count for the processor
+
+ mov ecx, 40 ; 40 microsecond stall
+ mul ecx ; (eax) = desired loop count
+
+ mov edx, ResetAddress
+ mov ebx, ResetValue
+
+ mov esi, Source ; point at the source code
+
+ mov ecx, dword ptr [esi] ; get first dword into a reg
+ mov esi, dword ptr [esi+4] ; and 2nd dword into a reg
+
+ mov edi, Destination
+
+ ;
+ ; now start filling in the cache line for the processor coming out
+ ; of reset. no memory references which may flush this cache line
+ ; can be made after the below fill UNTIL the booting processor
+ ; has read the line. (the only memory references made here in this
+ ; critical time period is the code fetching, but our caller has
+ ; already determined that none of the code in this function could
+ ; cause the cache line to be flushed).
+ ;
+
+ mov dword ptr [edi], ecx ; 1st dword now in the cacheline
+ mov dword ptr [edi+4], esi ; and 2nd dword now in
+
+ ;
+ ; cache line is initialized, we must let it get flushed now, or
+ ; the additional processor will fly blind.
+ ;
+
+ mov byte ptr [edx], bl ; clear reset
+
+ ;
+ ; wait approximately 40 microseconds, but don't call
+ ; KeStallExecutionProcessor() as this might flush the
+ ; cache line prematurely. inline the function instead.
+ ;
+
+ align 4
+@@:
+ sub eax, 1 ; (eax) = (eax) - 1
+ jnz short @b
+
+
+ pop edi
+ pop esi
+ pop ebx
+ mov esp, ebp
+ pop ebp
+ stdRET _Cbus1Boot2
+
+stdENDP _Cbus1Boot2
+
+INIT ends ; end 32 bit code
+ end