summaryrefslogtreecommitdiffstats
path: root/private/ntos/nthals/halalpha/ev5mchk.c
diff options
context:
space:
mode:
authorAdam <you@example.com>2020-05-17 05:51:50 +0200
committerAdam <you@example.com>2020-05-17 05:51:50 +0200
commite611b132f9b8abe35b362e5870b74bce94a1e58e (patch)
treea5781d2ec0e085eeca33cf350cf878f2efea6fe5 /private/ntos/nthals/halalpha/ev5mchk.c
downloadNT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.gz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.bz2
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.lz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.xz
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.tar.zst
NT4.0-e611b132f9b8abe35b362e5870b74bce94a1e58e.zip
Diffstat (limited to 'private/ntos/nthals/halalpha/ev5mchk.c')
-rw-r--r--private/ntos/nthals/halalpha/ev5mchk.c677
1 files changed, 677 insertions, 0 deletions
diff --git a/private/ntos/nthals/halalpha/ev5mchk.c b/private/ntos/nthals/halalpha/ev5mchk.c
new file mode 100644
index 000000000..400c2a95c
--- /dev/null
+++ b/private/ntos/nthals/halalpha/ev5mchk.c
@@ -0,0 +1,677 @@
+/*++
+
+Copyright (c) 1994 Digital Equipment Corporation
+
+Module Name:
+
+ ev5mchk.c
+
+Abstract:
+
+ This module implements generalized machine check handling for
+ platforms based on the DECchip 21164 (EV5) microprocessor.
+
+Author:
+
+ Joe Notarangelo 30-Jun-1994
+
+Environment:
+
+ Kernel mode only.
+
+Revision History:
+
+--*/
+
+#include "halp.h"
+#include "axp21164.h"
+#include "stdio.h"
+
+
+//
+// Declare the extern variable UncorrectableError declared in
+// inithal.c.
+//
+extern PERROR_FRAME PUncorrectableError;
+
+
+VOID
+HalpDisplayLogout21164(
+ IN PLOGOUT_FRAME_21164 LogoutFrame );
+
+BOOLEAN
+HalpPlatformMachineCheck(
+ IN PEXCEPTION_RECORD ExceptionRecord,
+ IN PKEXCEPTION_FRAME ExceptionFrame,
+ IN PKTRAP_FRAME TrapFrame
+ );
+
+VOID
+HalpUpdateMces(
+ IN BOOLEAN ClearMachineCheck,
+ IN BOOLEAN ClearCorrectableError
+ );
+
+//
+// System-wide controls for machine check reporting.
+//
+
+ProcessorCorrectableDisable = FALSE;
+SystemCorrectableDisable = FALSE;
+MachineCheckDisable = FALSE;
+
+//
+// Error counts.
+//
+
+ULONG CorrectableErrors = 0;
+ULONG RetryableErrors = 0;
+
+VOID
+HalpSetMachineCheckEnables(
+ IN BOOLEAN DisableMachineChecks,
+ IN BOOLEAN DisableProcessorCorrectables,
+ IN BOOLEAN DisableSystemCorrectables
+ )
+/*++
+
+Routine Description:
+
+ This function sets the enables that define which machine check
+ errors will be signaled by the processor.
+
+ N.B. - The system has the capability to ignore all machine checks
+ by indicating DisableMachineChecks = TRUE. This is intended
+ for debugging purposes on broken hardware. If you disable
+ this you will get no machine check no matter what error the
+ system/processor detects. Consider the consequences.
+
+Arguments:
+
+ DisableMachineChecks - Supplies a boolean which indicates if all
+ machine checks should be disabled and not
+ reported. (see note above).
+
+ DisableProcessorCorrectables - Supplies a boolean which indicates if
+ processor correctable error reporting
+ should be disabled.
+ DisableSystemCorrectables - Supplies a boolean which indicates if
+ system correctable error reporting
+ should be disabled.
+
+Return Value:
+
+ None.
+
+--*/
+{
+
+
+ ProcessorCorrectableDisable = DisableProcessorCorrectables;
+ SystemCorrectableDisable = DisableSystemCorrectables;
+ MachineCheckDisable = DisableMachineChecks;
+
+ HalpUpdateMces( FALSE, FALSE );
+
+ return;
+}
+
+VOID
+HalpUpdateMces(
+ IN BOOLEAN ClearMachineCheck,
+ IN BOOLEAN ClearCorrectableError
+ )
+/*++
+
+Routine Description:
+
+ This function updates the state of the MCES internal processor
+ register.
+
+Arguments:
+
+ ClearMachineCheck - Supplies a boolean that indicates if the machine
+ check indicator in the MCES should be cleared.
+
+ ClearCorrectableError - Supplies a boolean that indicates if the
+ correctable error indicators in the MCES should
+ be cleared.
+
+Return Value:
+
+ None.
+
+--*/
+{
+ MCES Mces;
+
+ Mces.MachineCheck = ClearMachineCheck;
+ Mces.SystemCorrectable = ClearCorrectableError;
+ Mces.ProcessorCorrectable = ClearCorrectableError;
+ Mces.DisableProcessorCorrectable = ProcessorCorrectableDisable;
+ Mces.DisableSystemCorrectable = SystemCorrectableDisable;
+ Mces.DisableMachineChecks = MachineCheckDisable;
+
+ HalpWriteMces( Mces );
+
+}
+
+
+BOOLEAN
+HalMachineCheck (
+ IN PEXCEPTION_RECORD ExceptionRecord,
+ IN PKEXCEPTION_FRAME ExceptionFrame,
+ IN PKTRAP_FRAME TrapFrame
+ )
+/*++
+
+Routine Description:
+
+ This function fields machine check for 21164-based machines.
+
+Arguments:
+
+ ExceptionRecord - Supplies a pointer to the exception record for the
+ machine check. Included in the exception information
+ is the pointer to the logout frame.
+
+ ExceptionFrame - Supplies a pointer to the kernel exception frame.
+
+ TrapFrame - Supplies a pointer to the kernel trap frame.
+
+Return Value:
+
+ A value of TRUE is returned if the machine check has been
+ handled by the HAL. If it has been handled then execution may
+ resume at the faulting address. Otherwise, a value of FALSE
+ is returned.
+
+ N.B. - Under some circumstances this routine may not return at
+ all.
+
+--*/
+
+{
+
+ BOOLEAN Handled;
+ PLOGOUT_FRAME_21164 LogoutFrame;
+ PMCHK_STATUS MachineCheckStatus;
+ MCES Mces;
+ PICPERR_STAT_21164 icPerrStat;
+ PDC_PERR_STAT_21164 dcPerrStat;
+ PSC_STAT_21164 scStat;
+ PEI_STAT_21164 eiStat;
+ BOOLEAN UnhandledPlatformError = FALSE;
+
+ PUNCORRECTABLE_ERROR uncorrerr = NULL;
+ PPROCESSOR_EV5_UNCORRECTABLE ev5uncorr = NULL;
+
+ //
+ // Check for retryable errors. These are usually I-stream parity
+ // errors, which may be retried following a cache flush (the cache
+ // flush is handled by the PAL).
+ //
+
+ MachineCheckStatus =
+ (PMCHK_STATUS)&ExceptionRecord->ExceptionInformation[0];
+
+ //
+ // Handle any retryable errors.
+ //
+
+ if( MachineCheckStatus->Retryable == 1 ){
+
+ //
+ // Log the error.
+ //
+
+ RetryableErrors += 1;
+
+#if (DBG) || (HALDBG)
+
+ if( (RetryableErrors % 32) == 0 ){
+ DbgPrint( "HAL Retryable Errors = %d\n", RetryableErrors );
+ }
+
+#endif //DBG || HALDBG
+
+ //
+ // Acknowledge receipt of the retryable machine check.
+ //
+
+ HalpUpdateMces( TRUE, TRUE );
+
+ return TRUE;
+
+ }
+
+ //
+ // Capture the logout frame pointer.
+ //
+
+ LogoutFrame =
+ (PLOGOUT_FRAME_21164)ExceptionRecord->ExceptionInformation[1];
+
+ //
+ // Check for any hard errors that cannot be dismissed.
+ // They are:
+ // Tag parity error
+ // Tag control parity error
+ // Multiple external errors
+ // Fill ECC error
+ // Fill parity error
+ // Multiple fill errors
+ //
+
+ icPerrStat = (PICPERR_STAT_21164)&LogoutFrame->IcPerrStat;
+ dcPerrStat = (PDC_PERR_STAT_21164)&LogoutFrame->DcPerrStat;
+ scStat = (PSC_STAT_21164)&LogoutFrame->ScStat;
+ eiStat = (PEI_STAT_21164)&LogoutFrame->EiStat;
+
+ if(PUncorrectableError) {
+ //
+ // Fill in the processor specific uncorrectable error frame
+ //
+ uncorrerr = (PUNCORRECTABLE_ERROR)
+ &PUncorrectableError->UncorrectableFrame;
+
+ //
+ // first fill in some generic processor Information.
+ // For the Current (Reporting) Processor.
+ //
+ HalpGetProcessorInfo(&uncorrerr->ReportingProcessor);
+ uncorrerr->Flags.ProcessorInformationValid = 1;
+
+ ev5uncorr = (PPROCESSOR_EV5_UNCORRECTABLE)
+ uncorrerr->RawProcessorInformation;
+ }
+ if(ev5uncorr){
+ ev5uncorr->IcPerrStat = LogoutFrame->IcPerrStat.all;
+ ev5uncorr->DcPerrStat = LogoutFrame->DcPerrStat.all;
+ ev5uncorr->ScStat = LogoutFrame->ScStat.all;
+ ev5uncorr->ScAddr = LogoutFrame->ScAddr.all;
+ ev5uncorr->EiStat = LogoutFrame->EiStat.all;
+ ev5uncorr->BcTagAddr = LogoutFrame->BcTagAddr.all;
+ ev5uncorr->EiAddr = LogoutFrame->EiAddr.all;
+ ev5uncorr->FillSyn = LogoutFrame->FillSyn.all;
+ ev5uncorr->BcConfig = LogoutFrame->BcConfig.all;
+ ev5uncorr->BcControl = LogoutFrame->BcControl.all;
+ }
+
+//
+// SjBfix. The External parity error checking is disabled due to bug
+// Rattler chipset on Gamma which causes the parity error on
+// machine checks due to reads to PCI config space. (fixed in pass 2)
+//
+
+ if ( icPerrStat->Dpe == 1 || icPerrStat->Tpe == 1 ||
+ icPerrStat->Tmr == 1 || dcPerrStat->Lock == 1 ||
+ scStat->ScTperr == 1 || scStat->ScDperr == 1 ||
+ eiStat->BcTperr == 1 || eiStat->BcTcperr == 1 ||
+// eiStat->UncEccErr == 1 || eiStat->EiParErr == 1 ||
+ eiStat->SeoHrdErr == 1 || scStat->ScScndErr == 1 ){
+
+ //
+ // A serious, uncorrectable error has occured, under no circumstances
+ // can it be simply dismissed.
+ //
+
+ goto FatalError;
+
+ }
+
+ //
+ // It is possible that the system has experienced a hard error and
+ // that nonetheless the error is recoverable. This is a system-specific
+ // decision - allow it to be handled as such.
+ //
+
+ UnhandledPlatformError = TRUE;
+ if( (Handled = HalpPlatformMachineCheck(
+ ExceptionRecord,
+ ExceptionFrame,
+ TrapFrame )) == TRUE ){
+
+ //
+ // The system-specific code has handled the error. Dismiss
+ // the error and continue execution.
+ //
+
+ HalpUpdateMces( TRUE, TRUE );
+
+ return TRUE;
+
+ }
+
+//
+// The system has experienced a fatal error that cannot be corrected.
+// Print any possible relevant information and crash the system.
+//
+// N.B. - In the future some of these fatal errors could be potential
+// recovered. Example, a user process gets a fatal error on one
+// of its pages - we kill the user process, mark the page as bad
+// and continue executing.
+//
+
+FatalError:
+
+ uncorrerr->Flags.ErrorStringValid = 1;
+ sprintf(uncorrerr->ErrorString,"Uncorrectable Error From "
+ "Processor Detected");
+ //
+ // Begin the error output if this is a processor error. If this is
+ // an unhandled platform error than that code is responsible for
+ // beginning the error output.
+ //
+
+ if( UnhandledPlatformError == FALSE ){
+
+ //
+ // Acquire ownership of the display. This is done here in case we take
+ // a machine check before the display has been taken away from the HAL.
+ // When the HAL begins displaying strings after it has lost the
+ // display ownership then the HAL will be careful not to scroll
+ // information off of the screen.
+ //
+
+ HalAcquireDisplayOwnership(NULL);
+
+ //
+ // Display the dreaded banner.
+ //
+
+ HalDisplayString( "\nFatal system hardware error.\n\n" );
+
+ }
+
+ //
+ // Display the EV5 logout frame.
+ //
+
+ HalpDisplayLogout21164( LogoutFrame );
+
+ //
+ // Bugcheck to dump the rest of the machine state, this will help
+ // if the machine check is software-related.
+ //
+
+ KeBugCheckEx( DATA_BUS_ERROR,
+ (ULONG)MachineCheckStatus->Correctable,
+ (ULONG)MachineCheckStatus->Retryable,
+ 0,
+ (ULONG)PUncorrectableError );
+
+}
+
+#define MAX_ERROR_STRING 100
+
+VOID
+HalpDisplayLogout21164 (
+ IN PLOGOUT_FRAME_21164 LogoutFrame
+ )
+
+/*++
+
+Routine Description:
+
+ This function displays the logout frame for a 21164.
+
+Arguments:
+
+ LogoutFrame - Supplies a pointer to the logout frame generated
+ by the 21164.
+Return Value:
+
+ None.
+
+--*/
+
+{
+ UCHAR OutBuffer[ MAX_ERROR_STRING ];
+
+ sprintf( OutBuffer, "ICSR : %016Lx ICPERR_STAT : %016Lx\n",
+ LogoutFrame->Icsr.all, LogoutFrame->IcPerrStat.all );
+
+ HalDisplayString( OutBuffer );
+
+ sprintf( OutBuffer, "MM_STAT : %016Lx DC_PERR_STAT : %016Lx\n",
+ LogoutFrame->MmStat.all,
+ LogoutFrame->DcPerrStat.all );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "PS : %016Lx VA : %016Lx VA_FORM : %016Lx\n",
+ LogoutFrame->Ps,
+ LogoutFrame->Va,
+ LogoutFrame->VaForm );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "ISR : %016Lx IPL : %016Lx INTID : %016Lx\n",
+ LogoutFrame->Isr.all,
+ LogoutFrame->Ipl,
+ LogoutFrame->IntId );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "SC_STAT : %016Lx SC_CTL : %016Lx SC_ADDR : %016Lx\n",
+ LogoutFrame->ScStat.all,
+ LogoutFrame->ScCtl.all,
+ LogoutFrame->ScAddr.all );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "EI_STAT : %016Lx EI_ADDR : %016Lx\n",
+ LogoutFrame->EiStat.all, LogoutFrame->EiAddr.all );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "BC_TAG_ADDR : %016Lx FILL_SYN : %016Lx\n",
+ LogoutFrame->BcTagAddr.all, LogoutFrame->FillSyn.all );
+
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "BC_CONTROL : %016Lx BC_CONFIG : %016Lx\n",
+ LogoutFrame->BcControl.all, LogoutFrame->BcConfig.all );
+
+ HalDisplayString( OutBuffer );
+
+
+ sprintf( OutBuffer, "EXC_ADDR : %016Lx PAL_BASE : %016Lx\n",
+ LogoutFrame->ExcAddr, LogoutFrame->PalBase );
+
+ HalDisplayString( OutBuffer );
+
+ //
+ // Print out interpretation of the error.
+ //
+
+ HalDisplayString( "\n" );
+
+ //
+ // Check for tag parity error.
+ //
+
+ if ( LogoutFrame->IcPerrStat.Dpe == 1 ||
+ LogoutFrame->IcPerrStat.Tpe == 1 ){
+
+ //
+ // Note: The excAddr may contain the address of the instruction
+ // the caused the parity error but it is not guaranteed:
+ //
+ sprintf( OutBuffer, "Icache %s parity error, Addr: %x\n",
+ LogoutFrame->IcPerrStat.Dpe ? "Data" : "Tag",
+ LogoutFrame->ExcAddr );
+
+ HalDisplayString( OutBuffer );
+
+ } else if ( LogoutFrame->DcPerrStat.Lock == 1 ){
+
+ sprintf( OutBuffer, "Dcache %s parity error, Addr: %x\n",
+ LogoutFrame->DcPerrStat.Dp0 || LogoutFrame->DcPerrStat.Dp1 ?
+ "Data" : "Tag",
+ LogoutFrame->Va );
+
+ HalDisplayString( OutBuffer );
+
+ } else if ( LogoutFrame->ScStat.ScTperr != 0 ) {
+
+ sprintf( OutBuffer,
+ "Scache Tag parity error, Addr: %x Tag: %x Cmd: %x\n",
+ LogoutFrame->ScAddr.ScAddr,
+ LogoutFrame->ScStat.ScTperr,
+ LogoutFrame->ScStat.CboxCmd);
+
+ HalDisplayString( OutBuffer );
+
+
+ } else if ( LogoutFrame->ScStat.ScDperr != 0 ) {
+
+ sprintf( OutBuffer,
+ "Scache Data parity error, Addr: %x Tag: %x Cmd: %x\n",
+ LogoutFrame->ScAddr.ScAddr,
+ LogoutFrame->ScStat.ScDperr,
+ LogoutFrame->ScStat.CboxCmd);
+
+ HalDisplayString( OutBuffer );
+
+
+ } else if ( LogoutFrame->EiStat.BcTperr == 1 ||
+ LogoutFrame->EiStat.BcTcperr == 1 ){
+
+ sprintf( OutBuffer,
+ "Bcache Tag Parity error, Addr: %x Tag: %x\n",
+ LogoutFrame->EiAddr.EiAddr,
+ LogoutFrame->BcTagAddr.Tag1);
+
+ HalDisplayString( OutBuffer );
+
+ }
+
+ //
+ // Check for timeout reset error:
+ //
+
+ if ( LogoutFrame->IcPerrStat.Tmr == 1 ){
+
+ sprintf( OutBuffer, "Timeout Reset Error\n" );
+
+ HalDisplayString( OutBuffer );
+ }
+
+ //
+ // Check for fill ECC errors.
+ //
+
+ if( LogoutFrame->EiStat.UncEccErr == 1 ){
+
+ sprintf( OutBuffer, "Uncorrectable ECC error: %s\n",
+ LogoutFrame->EiStat.FilIrd ? "Icache Fill" : "Dcache Fill" );
+
+ HalDisplayString( OutBuffer );
+
+ sprintf( OutBuffer,
+ "PA: %16Lx Longword0: %x Longword1: %x\n",
+ LogoutFrame->EiAddr.EiAddr,
+ LogoutFrame->FillSyn.Lo,
+ LogoutFrame->FillSyn.Hi );
+
+ HalDisplayString( OutBuffer );
+
+ }
+
+ //
+ // Check for address/command parity error
+ //
+
+ if( LogoutFrame->EiStat.EiParErr == 1 ){
+
+ sprintf( OutBuffer, "Address/Command parity error, Addr=%x\n",
+ LogoutFrame->EiAddr.EiAddr );
+
+ HalDisplayString( OutBuffer );
+
+ }
+
+ //
+ // Check for multiple hard errors.
+ //
+
+ if ( LogoutFrame->ScStat.ScScndErr == 1 ){
+
+ HalDisplayString( "Multiple Scache parity errors detected.\n" );
+ }
+
+ if( LogoutFrame->EiStat.SeoHrdErr == 1 ){
+
+ HalDisplayString( "Multiple external/tag errors detected.\n" );
+
+ }
+
+ return;
+}
+
+
+BOOLEAN
+Halp21164CorrectedErrorInterrupt (
+ VOID
+ )
+
+/*++
+
+Routine Description:
+
+ This is the interrupt handler for the 21164 processor corrected error
+ interrupt.
+
+Arguments:
+
+ None.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ //
+ // Handle any processor correctable errors.
+ //
+
+
+ //
+ // Log the error.
+ //
+ // simply assume this was a fill ecc correctable for now, print
+ // a debug message periodically
+
+ CorrectableErrors += 1;
+
+#if 0 //jnfix
+#if (DBG) || (HALDBG)
+
+ if( (CorrectableErrors % 32) == 0 ){
+ DbgPrint( "Correctable errors = %d\n", CorrectableErrors );
+ }
+
+#endif //DBG || HALDBG
+#endif //0 jnfix
+
+ //
+ // Acknowledge receipt of the correctable error by clearing
+ // the error in the MCES register.
+ //
+
+ HalpUpdateMces( FALSE, TRUE );
+
+ return TRUE;
+
+}