diff options
author | Adam <you@example.com> | 2020-05-17 05:51:50 +0200 |
---|---|---|
committer | Adam <you@example.com> | 2020-05-17 05:51:50 +0200 |
commit | e611b132f9b8abe35b362e5870b74bce94a1e58e (patch) | |
tree | a5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/ntos/nthals/halalpha/ev5mchk.c | |
download | NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2 NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip |
Diffstat (limited to 'private/ntos/nthals/halalpha/ev5mchk.c')
-rw-r--r-- | private/ntos/nthals/halalpha/ev5mchk.c | 677 |
1 files changed, 677 insertions, 0 deletions
diff --git a/private/ntos/nthals/halalpha/ev5mchk.c b/private/ntos/nthals/halalpha/ev5mchk.c new file mode 100644 index 000000000..400c2a95c --- /dev/null +++ b/private/ntos/nthals/halalpha/ev5mchk.c @@ -0,0 +1,677 @@ +/*++ + +Copyright (c) 1994 Digital Equipment Corporation + +Module Name: + + ev5mchk.c + +Abstract: + + This module implements generalized machine check handling for + platforms based on the DECchip 21164 (EV5) microprocessor. + +Author: + + Joe Notarangelo 30-Jun-1994 + +Environment: + + Kernel mode only. + +Revision History: + +--*/ + +#include "halp.h" +#include "axp21164.h" +#include "stdio.h" + + +// +// Declare the extern variable UncorrectableError declared in +// inithal.c. +// +extern PERROR_FRAME PUncorrectableError; + + +VOID +HalpDisplayLogout21164( + IN PLOGOUT_FRAME_21164 LogoutFrame ); + +BOOLEAN +HalpPlatformMachineCheck( + IN PEXCEPTION_RECORD ExceptionRecord, + IN PKEXCEPTION_FRAME ExceptionFrame, + IN PKTRAP_FRAME TrapFrame + ); + +VOID +HalpUpdateMces( + IN BOOLEAN ClearMachineCheck, + IN BOOLEAN ClearCorrectableError + ); + +// +// System-wide controls for machine check reporting. +// + +ProcessorCorrectableDisable = FALSE; +SystemCorrectableDisable = FALSE; +MachineCheckDisable = FALSE; + +// +// Error counts. +// + +ULONG CorrectableErrors = 0; +ULONG RetryableErrors = 0; + +VOID +HalpSetMachineCheckEnables( + IN BOOLEAN DisableMachineChecks, + IN BOOLEAN DisableProcessorCorrectables, + IN BOOLEAN DisableSystemCorrectables + ) +/*++ + +Routine Description: + + This function sets the enables that define which machine check + errors will be signaled by the processor. + + N.B. - The system has the capability to ignore all machine checks + by indicating DisableMachineChecks = TRUE. This is intended + for debugging purposes on broken hardware. If you disable + this you will get no machine check no matter what error the + system/processor detects. Consider the consequences. + +Arguments: + + DisableMachineChecks - Supplies a boolean which indicates if all + machine checks should be disabled and not + reported. (see note above). + + DisableProcessorCorrectables - Supplies a boolean which indicates if + processor correctable error reporting + should be disabled. + DisableSystemCorrectables - Supplies a boolean which indicates if + system correctable error reporting + should be disabled. + +Return Value: + + None. + +--*/ +{ + + + ProcessorCorrectableDisable = DisableProcessorCorrectables; + SystemCorrectableDisable = DisableSystemCorrectables; + MachineCheckDisable = DisableMachineChecks; + + HalpUpdateMces( FALSE, FALSE ); + + return; +} + +VOID +HalpUpdateMces( + IN BOOLEAN ClearMachineCheck, + IN BOOLEAN ClearCorrectableError + ) +/*++ + +Routine Description: + + This function updates the state of the MCES internal processor + register. + +Arguments: + + ClearMachineCheck - Supplies a boolean that indicates if the machine + check indicator in the MCES should be cleared. + + ClearCorrectableError - Supplies a boolean that indicates if the + correctable error indicators in the MCES should + be cleared. + +Return Value: + + None. + +--*/ +{ + MCES Mces; + + Mces.MachineCheck = ClearMachineCheck; + Mces.SystemCorrectable = ClearCorrectableError; + Mces.ProcessorCorrectable = ClearCorrectableError; + Mces.DisableProcessorCorrectable = ProcessorCorrectableDisable; + Mces.DisableSystemCorrectable = SystemCorrectableDisable; + Mces.DisableMachineChecks = MachineCheckDisable; + + HalpWriteMces( Mces ); + +} + + +BOOLEAN +HalMachineCheck ( + IN PEXCEPTION_RECORD ExceptionRecord, + IN PKEXCEPTION_FRAME ExceptionFrame, + IN PKTRAP_FRAME TrapFrame + ) +/*++ + +Routine Description: + + This function fields machine check for 21164-based machines. + +Arguments: + + ExceptionRecord - Supplies a pointer to the exception record for the + machine check. Included in the exception information + is the pointer to the logout frame. + + ExceptionFrame - Supplies a pointer to the kernel exception frame. + + TrapFrame - Supplies a pointer to the kernel trap frame. + +Return Value: + + A value of TRUE is returned if the machine check has been + handled by the HAL. If it has been handled then execution may + resume at the faulting address. Otherwise, a value of FALSE + is returned. + + N.B. - Under some circumstances this routine may not return at + all. + +--*/ + +{ + + BOOLEAN Handled; + PLOGOUT_FRAME_21164 LogoutFrame; + PMCHK_STATUS MachineCheckStatus; + MCES Mces; + PICPERR_STAT_21164 icPerrStat; + PDC_PERR_STAT_21164 dcPerrStat; + PSC_STAT_21164 scStat; + PEI_STAT_21164 eiStat; + BOOLEAN UnhandledPlatformError = FALSE; + + PUNCORRECTABLE_ERROR uncorrerr = NULL; + PPROCESSOR_EV5_UNCORRECTABLE ev5uncorr = NULL; + + // + // Check for retryable errors. These are usually I-stream parity + // errors, which may be retried following a cache flush (the cache + // flush is handled by the PAL). + // + + MachineCheckStatus = + (PMCHK_STATUS)&ExceptionRecord->ExceptionInformation[0]; + + // + // Handle any retryable errors. + // + + if( MachineCheckStatus->Retryable == 1 ){ + + // + // Log the error. + // + + RetryableErrors += 1; + +#if (DBG) || (HALDBG) + + if( (RetryableErrors % 32) == 0 ){ + DbgPrint( "HAL Retryable Errors = %d\n", RetryableErrors ); + } + +#endif //DBG || HALDBG + + // + // Acknowledge receipt of the retryable machine check. + // + + HalpUpdateMces( TRUE, TRUE ); + + return TRUE; + + } + + // + // Capture the logout frame pointer. + // + + LogoutFrame = + (PLOGOUT_FRAME_21164)ExceptionRecord->ExceptionInformation[1]; + + // + // Check for any hard errors that cannot be dismissed. + // They are: + // Tag parity error + // Tag control parity error + // Multiple external errors + // Fill ECC error + // Fill parity error + // Multiple fill errors + // + + icPerrStat = (PICPERR_STAT_21164)&LogoutFrame->IcPerrStat; + dcPerrStat = (PDC_PERR_STAT_21164)&LogoutFrame->DcPerrStat; + scStat = (PSC_STAT_21164)&LogoutFrame->ScStat; + eiStat = (PEI_STAT_21164)&LogoutFrame->EiStat; + + if(PUncorrectableError) { + // + // Fill in the processor specific uncorrectable error frame + // + uncorrerr = (PUNCORRECTABLE_ERROR) + &PUncorrectableError->UncorrectableFrame; + + // + // first fill in some generic processor Information. + // For the Current (Reporting) Processor. + // + HalpGetProcessorInfo(&uncorrerr->ReportingProcessor); + uncorrerr->Flags.ProcessorInformationValid = 1; + + ev5uncorr = (PPROCESSOR_EV5_UNCORRECTABLE) + uncorrerr->RawProcessorInformation; + } + if(ev5uncorr){ + ev5uncorr->IcPerrStat = LogoutFrame->IcPerrStat.all; + ev5uncorr->DcPerrStat = LogoutFrame->DcPerrStat.all; + ev5uncorr->ScStat = LogoutFrame->ScStat.all; + ev5uncorr->ScAddr = LogoutFrame->ScAddr.all; + ev5uncorr->EiStat = LogoutFrame->EiStat.all; + ev5uncorr->BcTagAddr = LogoutFrame->BcTagAddr.all; + ev5uncorr->EiAddr = LogoutFrame->EiAddr.all; + ev5uncorr->FillSyn = LogoutFrame->FillSyn.all; + ev5uncorr->BcConfig = LogoutFrame->BcConfig.all; + ev5uncorr->BcControl = LogoutFrame->BcControl.all; + } + +// +// SjBfix. The External parity error checking is disabled due to bug +// Rattler chipset on Gamma which causes the parity error on +// machine checks due to reads to PCI config space. (fixed in pass 2) +// + + if ( icPerrStat->Dpe == 1 || icPerrStat->Tpe == 1 || + icPerrStat->Tmr == 1 || dcPerrStat->Lock == 1 || + scStat->ScTperr == 1 || scStat->ScDperr == 1 || + eiStat->BcTperr == 1 || eiStat->BcTcperr == 1 || +// eiStat->UncEccErr == 1 || eiStat->EiParErr == 1 || + eiStat->SeoHrdErr == 1 || scStat->ScScndErr == 1 ){ + + // + // A serious, uncorrectable error has occured, under no circumstances + // can it be simply dismissed. + // + + goto FatalError; + + } + + // + // It is possible that the system has experienced a hard error and + // that nonetheless the error is recoverable. This is a system-specific + // decision - allow it to be handled as such. + // + + UnhandledPlatformError = TRUE; + if( (Handled = HalpPlatformMachineCheck( + ExceptionRecord, + ExceptionFrame, + TrapFrame )) == TRUE ){ + + // + // The system-specific code has handled the error. Dismiss + // the error and continue execution. + // + + HalpUpdateMces( TRUE, TRUE ); + + return TRUE; + + } + +// +// The system has experienced a fatal error that cannot be corrected. +// Print any possible relevant information and crash the system. +// +// N.B. - In the future some of these fatal errors could be potential +// recovered. Example, a user process gets a fatal error on one +// of its pages - we kill the user process, mark the page as bad +// and continue executing. +// + +FatalError: + + uncorrerr->Flags.ErrorStringValid = 1; + sprintf(uncorrerr->ErrorString,"Uncorrectable Error From " + "Processor Detected"); + // + // Begin the error output if this is a processor error. If this is + // an unhandled platform error than that code is responsible for + // beginning the error output. + // + + if( UnhandledPlatformError == FALSE ){ + + // + // Acquire ownership of the display. This is done here in case we take + // a machine check before the display has been taken away from the HAL. + // When the HAL begins displaying strings after it has lost the + // display ownership then the HAL will be careful not to scroll + // information off of the screen. + // + + HalAcquireDisplayOwnership(NULL); + + // + // Display the dreaded banner. + // + + HalDisplayString( "\nFatal system hardware error.\n\n" ); + + } + + // + // Display the EV5 logout frame. + // + + HalpDisplayLogout21164( LogoutFrame ); + + // + // Bugcheck to dump the rest of the machine state, this will help + // if the machine check is software-related. + // + + KeBugCheckEx( DATA_BUS_ERROR, + (ULONG)MachineCheckStatus->Correctable, + (ULONG)MachineCheckStatus->Retryable, + 0, + (ULONG)PUncorrectableError ); + +} + +#define MAX_ERROR_STRING 100 + +VOID +HalpDisplayLogout21164 ( + IN PLOGOUT_FRAME_21164 LogoutFrame + ) + +/*++ + +Routine Description: + + This function displays the logout frame for a 21164. + +Arguments: + + LogoutFrame - Supplies a pointer to the logout frame generated + by the 21164. +Return Value: + + None. + +--*/ + +{ + UCHAR OutBuffer[ MAX_ERROR_STRING ]; + + sprintf( OutBuffer, "ICSR : %016Lx ICPERR_STAT : %016Lx\n", + LogoutFrame->Icsr.all, LogoutFrame->IcPerrStat.all ); + + HalDisplayString( OutBuffer ); + + sprintf( OutBuffer, "MM_STAT : %016Lx DC_PERR_STAT : %016Lx\n", + LogoutFrame->MmStat.all, + LogoutFrame->DcPerrStat.all ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "PS : %016Lx VA : %016Lx VA_FORM : %016Lx\n", + LogoutFrame->Ps, + LogoutFrame->Va, + LogoutFrame->VaForm ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "ISR : %016Lx IPL : %016Lx INTID : %016Lx\n", + LogoutFrame->Isr.all, + LogoutFrame->Ipl, + LogoutFrame->IntId ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "SC_STAT : %016Lx SC_CTL : %016Lx SC_ADDR : %016Lx\n", + LogoutFrame->ScStat.all, + LogoutFrame->ScCtl.all, + LogoutFrame->ScAddr.all ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "EI_STAT : %016Lx EI_ADDR : %016Lx\n", + LogoutFrame->EiStat.all, LogoutFrame->EiAddr.all ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "BC_TAG_ADDR : %016Lx FILL_SYN : %016Lx\n", + LogoutFrame->BcTagAddr.all, LogoutFrame->FillSyn.all ); + + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "BC_CONTROL : %016Lx BC_CONFIG : %016Lx\n", + LogoutFrame->BcControl.all, LogoutFrame->BcConfig.all ); + + HalDisplayString( OutBuffer ); + + + sprintf( OutBuffer, "EXC_ADDR : %016Lx PAL_BASE : %016Lx\n", + LogoutFrame->ExcAddr, LogoutFrame->PalBase ); + + HalDisplayString( OutBuffer ); + + // + // Print out interpretation of the error. + // + + HalDisplayString( "\n" ); + + // + // Check for tag parity error. + // + + if ( LogoutFrame->IcPerrStat.Dpe == 1 || + LogoutFrame->IcPerrStat.Tpe == 1 ){ + + // + // Note: The excAddr may contain the address of the instruction + // the caused the parity error but it is not guaranteed: + // + sprintf( OutBuffer, "Icache %s parity error, Addr: %x\n", + LogoutFrame->IcPerrStat.Dpe ? "Data" : "Tag", + LogoutFrame->ExcAddr ); + + HalDisplayString( OutBuffer ); + + } else if ( LogoutFrame->DcPerrStat.Lock == 1 ){ + + sprintf( OutBuffer, "Dcache %s parity error, Addr: %x\n", + LogoutFrame->DcPerrStat.Dp0 || LogoutFrame->DcPerrStat.Dp1 ? + "Data" : "Tag", + LogoutFrame->Va ); + + HalDisplayString( OutBuffer ); + + } else if ( LogoutFrame->ScStat.ScTperr != 0 ) { + + sprintf( OutBuffer, + "Scache Tag parity error, Addr: %x Tag: %x Cmd: %x\n", + LogoutFrame->ScAddr.ScAddr, + LogoutFrame->ScStat.ScTperr, + LogoutFrame->ScStat.CboxCmd); + + HalDisplayString( OutBuffer ); + + + } else if ( LogoutFrame->ScStat.ScDperr != 0 ) { + + sprintf( OutBuffer, + "Scache Data parity error, Addr: %x Tag: %x Cmd: %x\n", + LogoutFrame->ScAddr.ScAddr, + LogoutFrame->ScStat.ScDperr, + LogoutFrame->ScStat.CboxCmd); + + HalDisplayString( OutBuffer ); + + + } else if ( LogoutFrame->EiStat.BcTperr == 1 || + LogoutFrame->EiStat.BcTcperr == 1 ){ + + sprintf( OutBuffer, + "Bcache Tag Parity error, Addr: %x Tag: %x\n", + LogoutFrame->EiAddr.EiAddr, + LogoutFrame->BcTagAddr.Tag1); + + HalDisplayString( OutBuffer ); + + } + + // + // Check for timeout reset error: + // + + if ( LogoutFrame->IcPerrStat.Tmr == 1 ){ + + sprintf( OutBuffer, "Timeout Reset Error\n" ); + + HalDisplayString( OutBuffer ); + } + + // + // Check for fill ECC errors. + // + + if( LogoutFrame->EiStat.UncEccErr == 1 ){ + + sprintf( OutBuffer, "Uncorrectable ECC error: %s\n", + LogoutFrame->EiStat.FilIrd ? "Icache Fill" : "Dcache Fill" ); + + HalDisplayString( OutBuffer ); + + sprintf( OutBuffer, + "PA: %16Lx Longword0: %x Longword1: %x\n", + LogoutFrame->EiAddr.EiAddr, + LogoutFrame->FillSyn.Lo, + LogoutFrame->FillSyn.Hi ); + + HalDisplayString( OutBuffer ); + + } + + // + // Check for address/command parity error + // + + if( LogoutFrame->EiStat.EiParErr == 1 ){ + + sprintf( OutBuffer, "Address/Command parity error, Addr=%x\n", + LogoutFrame->EiAddr.EiAddr ); + + HalDisplayString( OutBuffer ); + + } + + // + // Check for multiple hard errors. + // + + if ( LogoutFrame->ScStat.ScScndErr == 1 ){ + + HalDisplayString( "Multiple Scache parity errors detected.\n" ); + } + + if( LogoutFrame->EiStat.SeoHrdErr == 1 ){ + + HalDisplayString( "Multiple external/tag errors detected.\n" ); + + } + + return; +} + + +BOOLEAN +Halp21164CorrectedErrorInterrupt ( + VOID + ) + +/*++ + +Routine Description: + + This is the interrupt handler for the 21164 processor corrected error + interrupt. + +Arguments: + + None. + +Return Value: + + None. + +--*/ + +{ + // + // Handle any processor correctable errors. + // + + + // + // Log the error. + // + // simply assume this was a fill ecc correctable for now, print + // a debug message periodically + + CorrectableErrors += 1; + +#if 0 //jnfix +#if (DBG) || (HALDBG) + + if( (CorrectableErrors % 32) == 0 ){ + DbgPrint( "Correctable errors = %d\n", CorrectableErrors ); + } + +#endif //DBG || HALDBG +#endif //0 jnfix + + // + // Acknowledge receipt of the correctable error by clearing + // the error in the MCES register. + // + + HalpUpdateMces( FALSE, TRUE ); + + return TRUE; + +} |