diff options
Diffstat (limited to 'private/ntos/nthals/halcbus/i386/cbus1bt.asm')
-rw-r--r-- | private/ntos/nthals/halcbus/i386/cbus1bt.asm | 314 |
1 files changed, 314 insertions, 0 deletions
diff --git a/private/ntos/nthals/halcbus/i386/cbus1bt.asm b/private/ntos/nthals/halcbus/i386/cbus1bt.asm new file mode 100644 index 000000000..5f36b92ad --- /dev/null +++ b/private/ntos/nthals/halcbus/i386/cbus1bt.asm @@ -0,0 +1,314 @@ + title "MP primitives for the Corollary Cbus machines" +;++ +; +;Copyright (c) 1992, 1993, 1994 Corollary Inc. +; +;Module Name: +; +; cbus1bt.asm +; +;Abstract: +; +; Corollary Cbus1 Boot Code +; +; This module implements the low-level highly cache +; architecture dependent code to boot the additional +; processors in the Corollary Cbus1 based machines. + +; This consists of two functions which are exactly the +; same (Cbus1Boot1 & Cbus1Boot2). The calling code +; determines which one is safe to call (depending on the +; linker, sometimes both may be ok). The reason for this +; is that the boot processor fills in the reset vector at +; 0xFFFFFFF0 for the next processor and that cache line +; must not be inadvertently flushed before the next processor +; gets out of reset to see where to go (it's filled in with +; a real-mode jmp cs:ip). Note that this code is highly +; dependent on the linker placing all this code contiguous +; and the hardware architecture of the Corollary L2 caches. +; unless the system is fully populated, memory will not exist +; at 0xFFFFFFF0. hence, we must ensure that the cacheline is +; not evicted until the processor has done the jump! + + +; the order of Cbus1Boot1, ciboot, and Cbus1Boot2 is critical. +; Cbus1Boot1 and Cbus1Boot2 must be separated by Cbus1BootCPU; +; Cbus1Boot1 must be defined before Cbus1Boot2. +; the size of all three must be less than 4K. + +; WARNING!!! WARNING!!! WARNING!!! + +; do not put any routines between Cbus1Boot1 and Cbus1Boot2. there +; are tricky games being played with the write back caches so +; that StartVector[] does not get flushed. + +; +;Author: +; +; Landy Wang (landy@corollary.com) 23-Jun-1993 +; +;Environment: +; Kernel mode. +; +;-- + + + +.386p + .xlist +include hal386.inc +include callconv.inc ; calling convention macros + + .list + +INIT SEGMENT DWORD PUBLIC 'CODE' ; Start 32 bit code + ASSUME DS:FLAT, ES:FLAT, SS:NOTHING, FS:NOTHING, GS:NOTHING + +;++ +; +; VOID +; Cbus1Boot1 ( +; IN ULONG Processor, +; IN PQUAD Dest, +; IN PQUAD Source, +; IN ULONG ResetAddress, +; IN ULONG ResetValue +; ) +; +; +; Routine Description: +; +; Clear reset on the specified logical processor number, setting his +; reset vector to point at the specified code location. The Dest +; generally points at a reset vector, and thus, unless the system is +; fully populated, memory will not exist at that address. hence, we +; must ensure that the cacheline is not evicted until the processor +; has done the jump! +; +; Arguments: +; +; Processor - Supplies a logical processor number +; +; Dest - Supplies the address of the reset vector where the code below +; will go. +; +; Source - Supplies startup code for this processor, currently a 5 byte +; intrasegment jump, ie: "jmp cs:ip" +; +; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest +; and Source must point at arrays of 8 bytes each). +; +; ResetAddress - Supplies the address to poke to clear reset +; +; ResetValue - Supplies the value to poke to clear reset +; +; Return Value: +; +; None. +;-- + +ProcessorNumber equ dword ptr [ebp+8] ; zero based +Destination equ dword ptr [ebp+12] +Source equ dword ptr [ebp+16] +ResetAddress equ dword ptr [ebp+20] +ResetValue equ dword ptr [ebp+24] + +cPublicProc _Cbus1Boot1 ,5 + push ebp + mov ebp, esp + push ebx + push esi + push edi + + ; + ; set up all variables to be used after the cache line + ; initialization. this is because we want to load up + ; our register variables with these values and avoid + ; memory references. see the comment below. + ; + + mov eax, PCR[PcStallScaleFactor] ; get per microsecond + ; loop count for the processor + + mov ecx, 40 ; 40 microsecond stall + mul ecx ; (eax) = desired loop count + + mov edx, ResetAddress + mov ebx, ResetValue + + mov esi, Source ; point at the source code + + mov ecx, dword ptr [esi] ; get first dword into a reg + mov esi, dword ptr [esi+4] ; and 2nd dword into a reg + + mov edi, Destination + + ; + ; now start filling in the cache line for the processor coming out + ; of reset. no memory references which may flush this cache line + ; can be made after the below fill UNTIL the booting processor + ; has read the line. (the only memory references made here in this + ; critical time period is the code fetching, but our caller has + ; already determined that none of the code in this function could + ; cause the cache line to be flushed). + ; + + mov dword ptr [edi], ecx ; 1st dword now in the cacheline + mov dword ptr [edi+4], esi ; and 2nd dword now in + + ; + ; cache line is initialized, we must let it get flushed now, or + ; the additional processor will fly blind. + ; + + mov byte ptr [edx], bl ; clear reset + + ; + ; wait approximately 40 microseconds, but don't call + ; KeStallExecutionProcessor() as this might flush the + ; cache line prematurely. inline the function instead. + ; + + align 4 +@@: + sub eax, 1 ; (eax) = (eax) - 1 + jnz short @b + + + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + stdRET _Cbus1Boot1 + +stdENDP _Cbus1Boot1 + + ; + ; force enough spacing between the two boot functions so + ; that at least one of them will always be safe to call. + ; currently that would be 16 bytes (the current cache line + ; size), but make it bigger so any of our OEMs will be safe + ; even if they modify the size of the cache line. + ; + + public _Cbus1Boot1End +_Cbus1Boot1End label byte + db 64 dup (?) + +;++ +; +; VOID +; Cbus1Boot2 ( +; IN ULONG Processor, +; IN PQUAD Dest, +; IN PQUAD Source, +; IN ULONG ResetAddress, +; IN ULONG ResetValue +; ) +; +; +; Routine Description: +; +; Clear reset on the specified logical processor number, setting his +; reset vector to point at the specified code location. The Dest +; generally points at a reset vector, and thus, unless the system is +; fully populated, memory will not exist at that address. hence, we +; must ensure that the cacheline is not evicted until the processor +; has done the jump! +; +; Arguments: +; +; Processor - Supplies a logical processor number +; +; Dest - Supplies the address of the reset vector where the code below +; will go. +; +; Source - Supplies startup code for this processor, currently a 5 byte +; intrasegment jump, ie: "jmp cs:ip" +; +; Note the reset vector length is hardcoded here to 8 bytes. (ie: Dest +; and Source must point at arrays of 8 bytes each). +; +; ResetAddress - Supplies the address to poke to clear reset +; +; ResetValue - Supplies the value to poke to clear reset +; +; Return Value: +; +; None. +;-- + +cPublicProc _Cbus1Boot2 ,5 + push ebp + mov ebp, esp + push ebx + push esi + push edi + + ; + ; set up all variables to be used after the cache line + ; initialization. this is because we want to load up + ; our register variables with these values and avoid + ; memory references. see the comment below. + ; + + mov eax, PCR[PcStallScaleFactor] ; get per microsecond + ; loop count for the processor + + mov ecx, 40 ; 40 microsecond stall + mul ecx ; (eax) = desired loop count + + mov edx, ResetAddress + mov ebx, ResetValue + + mov esi, Source ; point at the source code + + mov ecx, dword ptr [esi] ; get first dword into a reg + mov esi, dword ptr [esi+4] ; and 2nd dword into a reg + + mov edi, Destination + + ; + ; now start filling in the cache line for the processor coming out + ; of reset. no memory references which may flush this cache line + ; can be made after the below fill UNTIL the booting processor + ; has read the line. (the only memory references made here in this + ; critical time period is the code fetching, but our caller has + ; already determined that none of the code in this function could + ; cause the cache line to be flushed). + ; + + mov dword ptr [edi], ecx ; 1st dword now in the cacheline + mov dword ptr [edi+4], esi ; and 2nd dword now in + + ; + ; cache line is initialized, we must let it get flushed now, or + ; the additional processor will fly blind. + ; + + mov byte ptr [edx], bl ; clear reset + + ; + ; wait approximately 40 microseconds, but don't call + ; KeStallExecutionProcessor() as this might flush the + ; cache line prematurely. inline the function instead. + ; + + align 4 +@@: + sub eax, 1 ; (eax) = (eax) - 1 + jnz short @b + + + pop edi + pop esi + pop ebx + mov esp, ebp + pop ebp + stdRET _Cbus1Boot2 + +stdENDP _Cbus1Boot2 + +INIT ends ; end 32 bit code + end |