From e611b132f9b8abe35b362e5870b74bce94a1e58e Mon Sep 17 00:00:00 2001 From: Adam Date: Sat, 16 May 2020 20:51:50 -0700 Subject: initial commit --- private/ntos/nthals/halalpha/errframe.h | 697 ++++++++++++++++++++++++++++++++ 1 file changed, 697 insertions(+) create mode 100644 private/ntos/nthals/halalpha/errframe.h (limited to 'private/ntos/nthals/halalpha/errframe.h') diff --git a/private/ntos/nthals/halalpha/errframe.h b/private/ntos/nthals/halalpha/errframe.h new file mode 100644 index 000000000..ced43eb08 --- /dev/null +++ b/private/ntos/nthals/halalpha/errframe.h @@ -0,0 +1,697 @@ +/*++ + +Copyright (c) 1995,1996 Digital Equipment Corporation + +Module Name: + + errframe.h + +Abstract: + + Definitions for both the correctable and uncorrectable error + frames. + +Author: + + Joe Notarangelo 10-Mar-1995 + Chao Chen 24-Apr-1995 + +Environment: + + Kernel mode only. + +Revision History: + +0.1 28-Feb-1995 Joe Notarangelo Initial version. + +0.2 10-Mar-1995 Joe Notarangelo Incorporate initial review comments from + 6-Mar-95 review with: C. Chen, S. Jenness, + Bala, E. Rehm. + +0.3 24-Apr-1995 Chao Chen Made into .h file for inclusion by other + modules. + +0.4 16-Apr-1996 Gene Morgan Add fields to SystemError struct for PSU, + Wachdog events. + +--*/ + +#ifndef ERRFRAME_H +#define ERRFRAME_H + +#include "errplat.h" + +/* + * + * Common defines. + * + */ + +#define ERROR_FRAME_VERSION 0x1 + +#define UNIDENTIFIED 0x0 +#define MEMORY_SPACE 0x1 +#define IO_SPACE 0x2 +#define PROCESSOR_CACHE 0x2 +#define SYSTEM_CACHE 0x3 +#define PROCESSOR_MEMORY 0x4 +#define SYSTEM_MEMORY 0x5 + +#define CACHE_ERROR_MASK 0x2 +#define MEMORY_ERROR_MASK 0x4 + +#define BUS_DMA_READ 0x1 +#define BUS_DMA_WRITE 0x2 +#define BUS_DMA_OP 0x3 +#define BUS_IO_READ 0x4 +#define BUS_IO_WRITE 0x5 +#define BUS_IO_OP 0x6 +#define TLB_MISS_READ 0x7 +#define TLB_MISS_WRITE 0x8 +#define VICTIM_WRITE 0x9 + +#define MAX_UNCORRERR_STRLEN 128 + +#define ERROR_FRAME_SIGNATURE 0xE22F2F2A // ERRORFRA R=2 (18%16) + +/* + * + * Definitions for the correctable error frame. + * + */ + +// +// Correctable Error Flags +// + +typedef struct _CORRECTABLE_FLAGS{ + + // + // Address space + // Unidentified: 00 + // Memory Space: 01 + // I/O Space: 10 + // Reserved: 11 + // + + ULONGLONG AddressSpace: 2; + + // + // Error Interpretation Validity. + // + + ULONGLONG ServerManagementInformationValid: 1; + ULONGLONG PhysicalAddressValid: 1; + ULONGLONG ErrorBitMasksValid: 1; + ULONGLONG ExtendedErrorValid: 1; + ULONGLONG ProcessorInformationValid: 1; + ULONGLONG SystemInformationValid: 1; + + // + // Memory Space Error Source: + // + // Unidentified: 000 + // ProcessorCache: 010 + // SystemCache: 011 + // ProcessorMemory: 100 + // SystemMemory: 101 + // + + ULONGLONG MemoryErrorSource: 4; + + // + // Driver Actions. + // + + ULONGLONG ScrubError: 1; + + // + // Lost errors. + // + + ULONGLONG LostCorrectable: 1; + ULONGLONG LostAddressSpace: 2; + ULONGLONG LostMemoryErrorSource: 4; + +} CORRECTABLE_FLAGS, *PCORRECTABLE_FLAGS; + + // + // Description of CORRECTABLE_FLAG structure: + // + // AddressSpace + // + // Identifies the system address space that was the source of the + // correctable error. + // + // PhysicalAddressValid + // + // Indicates if the physical address in the CORRECTABLE_ERROR + // structure is valid. A value of 1 indicates the physical address + // is valid, a value of 0 indicates the physical address is not + // valid. + // + // ErrorBitMasksValid + // + // Indicates if the error bit mask fields in the CORRECTABLE_ERROR + // structure are valid. A value of 1 indicates the DataBitErrorMask + // and the CheckBitErrorMask are valid, a value of 0 indicates they + // are not valid. + // + // ExtendedErrorValid + // + // Indicates if the extended error information structure in the + // CORRECTABLE_ERROR structure is valid. A value of 1 indicates the + // extended error information structure is valid, a value of 0 + // indicates that it is not. + // + // ProcessorInformationValid + // + // Indicates if the raw processor information pointer in the + // CORRECTABLE_ERROR structure is valid. A value of 1 indicates the + // processor information is valid, a value of 0 indicates it is not. + // + // SystemInformationValid + // + // Indicates if the raw system information pointer in the + // CORRECTABLE_ERROR structure is valid. A value of 1 indicates the + // system information is valid, a value of 0 indicates it is not. + // + // ServerManagementInformationValid + // + // Indicates that the server management information in the extended + // error information structure is valid. The server management + // information relays information about failed fans or high + // temperature in the system. + // + // + // MemoryErrorSource + // + // Identifies the source of a memory error as either main error + // or cache for either a system or processor. + // + // ScrubError + // + // Instructs the driver to scrub the correctable error. If the + // value is 1 the driver should scrub the error, if the value is + // 0 the driver must not scrub the error. + // + // LostCorrectable + // + // Identifies if a lost correctable error has been reported. A + // lost error is an error that reported by the hardware while + // correctable error handling for a previous error was in progress. + // A value of 1 indicates that a correctable error report was lost. + // + // LostAddressSpace + // + // Identifies the system address space that was the source of the + // correctable error. Valid only if LostCorrectable == 1. + // + // LostMemoryErrorSource + // + // Identifies the source of a memory error as either main error + // or cache for either a system or processor. Valid only if + // LostCorrectable == 1. + // + +// +// Processor information. +// + +typedef struct _PROCESSOR_INFO{ + + ULONG ProcessorType; + ULONG ProcessorRevision; + ULONG PhysicalProcessorNumber; + ULONG LogicalProcessorNumber; + +} PROCESSOR_INFO, *PPROCESSOR_INFO; + + // + // Description of PROCESSOR_INFO structure: + // + // ProcessorType + // + // Identifies the type of processor running on the system + // (eg. 21064, 21066, 21164). Note that the type of processor + // is expected to be consistent across the system for MP machines. + // + // ProcessorRevision + // + // Identifies the revision number of the processor running on + // the system. Note that the revision is expected to be consistent + // across the system for MP machines. + // + // PhysicalProcessorNumber + // + // The physical processor number as numbered in the hardware + // specifications. + // + // LogicalProcessorNumber + // + // The logical processor number assigned to the processor by NT. + // + +// +// System Information. +// + +typedef struct _SYSTEM_INFORMATION{ + + UCHAR SystemType[8]; + ULONG ClockSpeed; + ULONG OsRevisionId; + ULONG PalMajorVersion; + ULONG PalMinorVersion; + UCHAR FirmwareRevisionId[16]; + ULONG SystemVariant; + ULONG SystemRevision; + UCHAR SystemSerialNumber[16]; + ULONG ModuleVariant; + ULONG ModuleRevision; + ULONG ModuleSerialNumber; +} SYSTEM_INFORMATION, *PSYSTEM_INFORMATION; + + // + // Description of SYSTEM_INFORMATION structure: + // + // SystemType + // + // Identifies the type of system that reported the error + // (eg. "Sable", "Gamma"). The actual format and value of the + // SystemType string is system-specific. + // + // OsRevisionId + // + // A numeric value that identifies the OS revision executing + // on the system that reported the fault. + // + // PalRevisionId + // + // A numeric value that identifies the pal revision executing + // on the system that reported the fault. + // + // FirmwareRevisionId + // + // A numeric value that identifies the firmware revision executing + // on the system that reported the fault. + // + // SystemVariant + // + // A numeric value used to distinguish variants of the same system + // type. The values and their interpretation are system_specific. + // + // SystemRevision + // + // A numeric value used to distinguish revisions of the same system + // type. The values and their interpretation are system_specific. + // + // + +// +// Extended Error Information. +// + +typedef union _EXTENDED_ERROR{ + + struct{ + struct{ + ULONG CacheLevelValid: 1; + ULONG CacheBoardValid: 1; + ULONG CacheSimmValid: 1; + } Flags; + PROCESSOR_INFO ProcessorInfo; + ULONG CacheLevel; + ULONG CacheBoardNumber; + ULONG CacheSimm; + ULONG TransferType; + ULONG Reserved; + } CacheError; + + struct{ + struct{ + ULONG MemoryBoardValid: 1; + ULONG MemorySimmValid: 1; + } Flags; + PROCESSOR_INFO ProcessorInfo; + ULONG MemoryBoard; + ULONG MemorySimm; + ULONG TransferType; + ULONG Reserved[2]; + } MemoryError; + + struct{ + INTERFACE_TYPE Interface; + ULONG BusNumber; + PHYSICAL_ADDRESS BusAddress; + ULONG TransferType; + ULONG Reserved[5]; + } IoError; + + struct{ + struct{ + ULONG FanNumberValid: 1; + ULONG TempSensorNumberValid: 1; + ULONG PowerSupplyNumberValid: 1; + ULONG WatchDogExpiredValid: 1; + } Flags; + ULONG FanNumber; + ULONG TempSensorNumber; + ULONG PowerSupplyNumber; + ULONG WatchdogExpiration; + ULONG Reserved[5]; + } SystemError; + +} EXTENDED_ERROR, *PEXTENDED_ERROR; + + // + // Description of EXTENDED_ERROR union: + // + // The EXTENDED_ERROR union has different interpretation depending + // upon the AddressSpace and MemoryErrorSource fields of the + // CORRECTABLE_FLAGS structure according to the following table: + // + // 1. AddressSpace=MemorySpace + // MemoryErrorSource = 01x use CacheError structure + // + // 2. AddressSpace=MemorySpace + // MemoryErrorSource = 10x use MemoryError structure + // + // 3. AddressSpace=IoSpace use IoError + // + // 4. AddressSpace=Unidentified use SystemError + // MemoryErrorSource = 0x0 (note: ServerManagementInformationValid + // should be set) + // + // + // CacheError.Flags + // + // Identifies which fields of the CacheError structure are valid. + // CacheLevelValid = 1 indicates CacheLevel is valid. + // CacheBoardValid = 1 indicates CacheBoardNumber is valid. + // CacheSimmValid = 1 indicates CacheSimm is valid. + // + // CacheError.ProcessorInfo + // + // Identifies the processor associated with the error. Most + // frequently will identify the processor that experienced and + // reported the error. However, it is possible that the processor + // that is reporting has experienced the error from another + // processor's cache. This field is valid only if the + // MemoryErrorSource = 010 . + // + // CacheError.CacheLevel + // + // Identifies the level of the cache that caused the error. Primary + // caches are Level 1, Secondary are Level 2, etc.. This field + // only valid if CacheError.Flags.CacheLevelValid == 1. + // + // CacheError.CacheBoardNumber + // + // Identifies the board number of the cache that caused the error. + // This field only valid if + // CacheError.Flags.CacheBoardNumberValid == 1. + // + // CacheError.CacheSimm + // + // Identifies the Cache Simm that caused the error. + // This field only valid if CacheError.Flags.CacheSimmValid == 1. + // + // + // MemoryError.Flags + // + // Identifies which fields of the CacheError structure are valid. + // CacheLevelValid = 1 indicates CacheLevel is valid. + // CacheBoardValid = 1 indicates CacheBoardNumber is valid. + // CacheSimmValid = 1 indicates CacheSimm is valid. + // + // MemoryError.ProcessorInfo + // + // Identifies the processor associated with the error. Most + // frequently will identify the processor that experienced and + // reported the error. However, it is possible that the processor + // that is reporting has experienced the error from another + // processor's cache. This field is valid only if the + // MemoryErrorSource = 010 . + // + // MemoryError.MemoryBoardNumber + // + // Identifies the board number of the cache that caused the error. + // This field only valid if MemoryError.Flags.MemoryBoardValid == 1. + // + // MemoryError.MemorySimm + // + // Identifies the memory SIMM that caused the error. + // This field only valid if MemoryError.Flags.MemorySimmValid == 1. + // + // + // IoError.Interface + // + // Identifies the bus interface type (eg. PCI) of the bus that caused + // the correctable error. + // + // IoError.BusNumber + // + // Identifies the bus number of the bus that caused the correctable + // error. + // + // IoError.BusAddress + // + // Identifies the bus address of the bus that caused the correctable + // error. + // + +// +// Correctable Error Frame. +// + +typedef struct _CORRECTABLE_ERROR{ + + CORRECTABLE_FLAGS Flags; + ULONGLONG PhysicalAddress; + ULONGLONG DataBitErrorMask; + ULONGLONG CheckBitErrorMask; + EXTENDED_ERROR ErrorInformation; + PROCESSOR_INFO ReportingProcessor; + SYSTEM_INFORMATION System; + ULONG RawProcessorInformationLength; + PVOID RawProcessorInformation; + ULONG RawSystemInformationLength; + PVOID RawSystemInformation; + ULONG Reserved; + +} CORRECTABLE_ERROR, *PCORRECTABLE_ERROR; + + // + // Description of CORRECTABLE_ERROR structure: + // + // Flags + // + // The flags describe the various aspects of the error report. The + // CORRECTABLE_FLAGS structure is described below. + // + // PhysicalAddress + // + // The physical CPU address of the quadword that caused the correctable + // error report. This value is optional, its validiity is indicated + // in the Flags. + // + // DataBitErrorMask + // + // A mask that describes which data bits in the corrected word were + // in error. A value of one in the mask indicates that the + // corresponding bit in the data word were in error. + // + // CheckBitErrorMask + // + // A mask that describes which check bits in the corrected word were + // in error. A value of one in the mask indicates that the + // corresponding bit in the check bits were in error. + // + // ErrorInformation + // + // A structure that desribes interpretation of the error. The values + // in the structure are optional, the EXTENDED_ERROR structure is + // described below. + // + // ReportingProcessor + // + // A structure that describes the processor type on the system and + // the processor that reported the error. PROCESSOR_INFO structure + // is described below. + // + // RawProcessorInformationLength + // + // The length of the raw processor error information structure in + // bytes. + // + // RawProcessorInformation + // + // A pointer to the raw processor error information structure. The + // definition of the structure is processor-specific. The definitions + // for the known processors is defined in Appendix A below. + // + // System + // + // A structure that describes the type of system for which the + // error was reported. SYSTEM_INFORMATION structure is described below. + // + // RawSystemInformationLength + // + // The length of the raw processor error information structure in + // bytes. + // + // RawSystemInformation + // + // A pointer to the raw system error information structure. The + // definition of the structure is system/ASIC-specific. The + // definitions for the known systems/ASICs is defined in Appendix B. + // + // + + +/* + * + * Definitions for the uncorrectable error frame. + * + */ + +// +// Uncorrectable Error Flags. +// + +typedef struct _UNCORRECTABLE_FLAGS{ + + // + // Address space + // Unidentified: 00 + // Memory Space: 01 + // I/O Space: 10 + // Reserved: 11 + // + + ULONGLONG AddressSpace: 2; + + // + // Error Interpretation Validity. + // + + ULONGLONG ErrorStringValid: 1; + ULONGLONG PhysicalAddressValid: 1; + ULONGLONG ErrorBitMasksValid: 1; + ULONGLONG ExtendedErrorValid: 1; + ULONGLONG ProcessorInformationValid: 1; + ULONGLONG SystemInformationValid: 1; + + // + // Memory Space Error Source: + // + // Unidentified: 000 + // ProcessorCache: 010 + // SystemCache: 011 + // ProcessorMemory: 100 + // SystemMemory: 101 + // + + ULONGLONG MemoryErrorSource: 4; + +} UNCORRECTABLE_FLAGS, *PUNCORRECTABLE_FLAGS; + + // + // The extended error information, processor information and system + // information structures are identical for correctable and uncorrectable + // errors. The rules for printing the failing FRU are also identical + // to the rules for the correctable errors. + // + +// +// Uncorrectable Error Frame. +// + +typedef struct _UNCORRECTABLE_ERROR{ + + UNCORRECTABLE_FLAGS Flags; + CHAR ErrorString[MAX_UNCORRERR_STRLEN]; + ULONGLONG PhysicalAddress; + ULONGLONG DataBitErrorMask; + ULONGLONG CheckBitErrorMask; + EXTENDED_ERROR ErrorInformation; + PROCESSOR_INFO ReportingProcessor; + SYSTEM_INFORMATION System; + ULONG RawProcessorInformationLength; + PVOID RawProcessorInformation; + ULONG RawSystemInformationLength; + PVOID RawSystemInformation; + ULONG Reserved; + +} UNCORRECTABLE_ERROR, *PUNCORRECTABLE_ERROR; + + +/* + * + * Generic definitions for the error frame. + * + */ + +// +// Generic error frame. +// + +typedef enum _FRAME_TYPE{ + CorrectableFrame, + UncorrectableFrame +} FRAME_TYPE, *PFRAME_TYPE; + +typedef struct _ERROR_FRAME{ + + ULONG Signature; // Needed to make sure that the buffer is infact + // an error frame. + ULONG LengthOfEntireErrorFrame; + FRAME_TYPE FrameType; + ULONG VersionNumber; + ULONG SequenceNumber; + ULONGLONG PerformanceCounterValue; + + union { + CORRECTABLE_ERROR CorrectableFrame; + UNCORRECTABLE_ERROR UncorrectableFrame; + }; + +} ERROR_FRAME, *PERROR_FRAME; + + // + // Description of the generic error frame structure: + // + // FrameType + // + // Specify which frame type we have. Either correctable or + // uncorrectable. + // + // VersionNumber + // + // Defines the version of the error structure. Current version + // number = 1. + // + // SequenceNumber + // + // A number that identifies a particular error frame. + // + // PerformanceCounterValue + // + // The value of the system performance counter when the error was + // reported. The value is determined during error processing by the + // HAL and so may be captured significantly after the hardware + // detected the error. The value cannot be used for fine grained + // timing of when errors occurred but can be used for coarse grained + // timing and approximate timing. + // + // CorrectableFrame + // + // Shared common area for a correctable frame. + // + // UncorrectableFrame + // + // Shared common area for an uncorrectable frame. + // + +#endif //ERRFRAME_H -- cgit v1.2.3