From e611b132f9b8abe35b362e5870b74bce94a1e58e Mon Sep 17 00:00:00 2001 From: Adam Date: Sat, 16 May 2020 20:51:50 -0700 Subject: initial commit --- private/ntos/cache/cachedat.c | 183 ++ private/ntos/cache/cachesub.c | 5633 ++++++++++++++++++++++++++++++++++++++++ private/ntos/cache/cc.h | 1746 +++++++++++++ private/ntos/cache/copysup.c | 2117 +++++++++++++++ private/ntos/cache/dirs | 24 + private/ntos/cache/fssup.c | 3343 ++++++++++++++++++++++++ private/ntos/cache/lazyrite.c | 732 ++++++ private/ntos/cache/logsup.c | 548 ++++ private/ntos/cache/mdlsup.c | 999 +++++++ private/ntos/cache/mp/makefile | 6 + private/ntos/cache/mp/sources | 29 + private/ntos/cache/pinsup.c | 1274 +++++++++ private/ntos/cache/sources.inc | 53 + private/ntos/cache/up/makefile | 6 + private/ntos/cache/up/sources | 27 + private/ntos/cache/vacbsup.c | 1421 ++++++++++ 16 files changed, 18141 insertions(+) create mode 100644 private/ntos/cache/cachedat.c create mode 100644 private/ntos/cache/cachesub.c create mode 100644 private/ntos/cache/cc.h create mode 100644 private/ntos/cache/copysup.c create mode 100644 private/ntos/cache/dirs create mode 100644 private/ntos/cache/fssup.c create mode 100644 private/ntos/cache/lazyrite.c create mode 100644 private/ntos/cache/logsup.c create mode 100644 private/ntos/cache/mdlsup.c create mode 100644 private/ntos/cache/mp/makefile create mode 100644 private/ntos/cache/mp/sources create mode 100644 private/ntos/cache/pinsup.c create mode 100644 private/ntos/cache/sources.inc create mode 100644 private/ntos/cache/up/makefile create mode 100644 private/ntos/cache/up/sources create mode 100644 private/ntos/cache/vacbsup.c (limited to 'private/ntos/cache') diff --git a/private/ntos/cache/cachedat.c b/private/ntos/cache/cachedat.c new file mode 100644 index 000000000..e6755d915 --- /dev/null +++ b/private/ntos/cache/cachedat.c @@ -0,0 +1,183 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + cachedat.c + +Abstract: + + This module implements the Memory Management based cache management + routines for the common Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Global SharedCacheMap lists and resource to synchronize access to it. +// +// + +extern KSPIN_LOCK CcMasterSpinLock; +LIST_ENTRY CcCleanSharedCacheMapList; +SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList; +SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor; + +// +// Worker thread structures: +// +// A spinlock to synchronize all three lists. +// A count of the number of worker threads Cc will use +// A listhead for preinitialized executive work items for Cc use. +// A listhead for an express queue of WORK_QUEUE_ENTRYs +// A listhead for a regular queue of WORK_QUEUE_ENTRYs +// + +extern KSPIN_LOCK CcWorkQueueSpinlock; +ULONG CcNumberWorkerThreads = 0; +LIST_ENTRY CcIdleWorkerThreadList; +LIST_ENTRY CcExpressWorkQueue; +LIST_ENTRY CcRegularWorkQueue; + +// +// Store the current idle delay and target time to clean all. +// + +LARGE_INTEGER CcNoDelay; +LARGE_INTEGER CcFirstDelay = {(ULONG)-(3*LAZY_WRITER_IDLE_DELAY), -1}; +LARGE_INTEGER CcIdleDelay = {(ULONG)-LAZY_WRITER_IDLE_DELAY, -1}; +LARGE_INTEGER CcCollisionDelay = {(ULONG)-LAZY_WRITER_COLLISION_DELAY, -1}; +LARGE_INTEGER CcTargetCleanDelay = {(ULONG)-(LONG)(LAZY_WRITER_IDLE_DELAY * (LAZY_WRITER_MAX_AGE_TARGET + 1)), -1}; + +// +// Spinlock for controlling access to Vacb and related global structures, +// and a counter indicating how many Vcbs are active. +// + +extern KSPIN_LOCK CcVacbSpinLock; +ULONG CcNumberVacbs; + +// +// Pointer to the global Vacb vector. +// + +PVACB CcVacbs; +PVACB CcBeyondVacbs; +PVACB CcNextVictimVacb; + +// +// Deferred write list and respective Thresholds +// + +extern KSPIN_LOCK CcDeferredWriteSpinLock; +LIST_ENTRY CcDeferredWrites; +ULONG CcDirtyPageThreshold; +ULONG CcDirtyPageTarget; +ULONG CcPagesYetToWrite; +ULONG CcPagesWrittenLastTime = 0; +ULONG CcDirtyPagesLastScan = 0; +ULONG CcAvailablePagesThreshold = 100; +ULONG CcTotalDirtyPages = 0; + +// +// Captured system size +// + +MM_SYSTEMSIZE CcCapturedSystemSize; + +// +// Tuning options du Jour +// + +ULONG CcTune = 0; + +// +// Global structure controlling lazy writer algorithms +// + +LAZY_WRITER LazyWriter; + +NPAGED_LOOKASIDE_LIST CcTwilightLookasideList; + +#ifdef CCDBG + +LONG CcDebugTraceLevel = 0; +LONG CcDebugTraceIndent = 0; + +#ifdef CCDBG_LOCK +extern KSPIN_LOCK CcDebugTraceLock; +#endif // def CCDBG_LOCK + +#endif + +// +// Global list of pinned Bcbs which may be examined for debug purposes +// + +#if DBG + +ULONG CcBcbCount; +LIST_ENTRY CcBcbList; +extern KSPIN_LOCK CcBcbSpinLock; + +#endif + +// +// Throw away miss counter. +// + +ULONG CcThrowAway; + +// +// Performance Counters +// + +ULONG CcFastReadNoWait; +ULONG CcFastReadWait; +ULONG CcFastReadResourceMiss; +ULONG CcFastReadNotPossible; + +ULONG CcFastMdlReadNoWait; +ULONG CcFastMdlReadWait; +ULONG CcFastMdlReadResourceMiss; +ULONG CcFastMdlReadNotPossible; + +ULONG CcMapDataNoWait; +ULONG CcMapDataWait; +ULONG CcMapDataNoWaitMiss; +ULONG CcMapDataWaitMiss; + +ULONG CcPinMappedDataCount; + +ULONG CcPinReadNoWait; +ULONG CcPinReadWait; +ULONG CcPinReadNoWaitMiss; +ULONG CcPinReadWaitMiss; + +ULONG CcCopyReadNoWait; +ULONG CcCopyReadWait; +ULONG CcCopyReadNoWaitMiss; +ULONG CcCopyReadWaitMiss; + +ULONG CcMdlReadNoWait; +ULONG CcMdlReadWait; +ULONG CcMdlReadNoWaitMiss; +ULONG CcMdlReadWaitMiss; + +ULONG CcReadAheadIos; + +ULONG CcLazyWriteHotSpots; +ULONG CcLazyWriteIos; +ULONG CcLazyWritePages; +ULONG CcDataFlushes; +ULONG CcDataPages; + +PULONG CcMissCounter = &CcThrowAway; diff --git a/private/ntos/cache/cachesub.c b/private/ntos/cache/cachesub.c new file mode 100644 index 000000000..bbbcb88d9 --- /dev/null +++ b/private/ntos/cache/cachesub.c @@ -0,0 +1,5633 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + cachesub.c + +Abstract: + + This module implements the common subroutines for the Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#include "cc.h" + +extern POBJECT_TYPE IoFileObjectType; + +// +// The Bug check file id for this module +// + +#define BugCheckFileId (CACHE_BUG_CHECK_CACHESUB) + +// +// Define our debug constant +// + +#define me 0x00000002 + +// +// Define those errors which should be retried +// + +#define RetryError(STS) (((STS) == STATUS_VERIFY_REQUIRED) || ((STS) == STATUS_FILE_LOCK_CONFLICT)) + +ULONG CcMaxDirtyWrite = 0x10000; + +// +// Local support routines +// + +BOOLEAN +CcFindBcb ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN OUT PLARGE_INTEGER BeyondLastByte, + OUT PBCB *Bcb + ); + +PBCB +CcAllocateInitializeBcb ( + IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL, + IN OUT PBCB AfterBcb, + IN PLARGE_INTEGER FileOffset, + IN PLARGE_INTEGER Length + ); + +NTSTATUS +CcSetValidData( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER ValidDataLength + ); + +BOOLEAN +CcAcquireByteRangeForWrite ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER TargetOffset OPTIONAL, + IN ULONG TargetLength, + OUT PLARGE_INTEGER FileOffset, + OUT PULONG Length, + OUT PBCB *FirstBcb + ); + +VOID +CcReleaseByteRangeFromWrite ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN PBCB FirstBcb, + IN BOOLEAN VerifyRequired + ); + + +// +// Internal support routine +// + +BOOLEAN +CcPinFileData ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN ReadOnly, + IN BOOLEAN WriteOnly, + IN BOOLEAN Wait, + OUT PBCB *Bcb, + OUT PVOID *BaseAddress, + OUT PLARGE_INTEGER BeyondLastByte + ) + +/*++ + +Routine Description: + + This routine locks the specified range of file data into memory. + + Note that the data desired by the caller (or the first part of it) + may be in one of three states: + + No Bcb exists which describes the data + + A Bcb exists describing the data, but it is not mapped + (BcbOut->BaseAddress == NULL) + + A Bcb exists describing the data, and it is mapped + + Given the above three states, and given that the caller may call + with either Wait == FALSE or Wait == TRUE, this routine has basically + six cases. What has to be done, and the order in which things must be + done varies quite a bit with each of these six cases. The most + straight-forward implementation of this routine, with the least amount + of branching, is achieved by determining which of the six cases applies, + and dispatching fairly directly to that case. The handling of the + cases is summarized in the following table: + + Wait == TRUE Wait == FALSE + ------------ ------------- + + no Bcb Case 1: Case 2: + + CcAllocateInitializeBcb CcMapAndRead (exit if FALSE) + Acquire Bcb Exclusive CcAllocateInitializeBcb + Release BcbList SpinLock Acquire Bcb Shared if not ReadOnly + CcMapAndRead w/ Wait Release BcbList SpinLock + Convert/Release Bcb Resource + + Bcb not Case 3: Case 4: + mapped + Increment PinCount Acquire Bcb Exclusive (exit if FALSE) + Release BcbList SpinLock CcMapAndRead (exit if FALSE) + Acquire Bcb Excl. w/ Wait Increment PinCount + if still not mapped Convert/Release Bcb Resource + CcMapAndRead w/ Wait Release BcbList SpinLock + Convert/Release Bcb Resource + + Bcb mapped Case 5: Case 6: + + Increment PinCount if not ReadOnly + Release BcbList SpinLock Acquire Bcb shared (exit if FALSE) + if not ReadOnly Increment PinCount + Acquire Bcb Shared Release BcbList SpinLock + + It is important to note that most changes to this routine will affect + multiple cases from above. + +Arguments: + + FileObject - Pointer to File Object for file + + FileOffset - Offset in file at which map should begin + + Length - Length of desired map in bytes + + ReadOnly - Supplies TRUE if caller will only read the mapped data (i.e., + TRUE for CcCopyRead, CcMapData and CcMdlRead and FALSE for + everyone else) + + WriteOnly - The specified range of bytes will only be written. + + Wait - Supplies TRUE if it is ok to block the caller's thread + Supplies 3 if it is ok to block the caller's thread and the Bcb should + be exclusive + Supplies FALSE if it is not ok to block the caller's thread + + Bcb - Returns a pointer to the Bcb representing the pinned data. + + BaseAddress - Returns base address of desired data + + BeyondLastByte - Returns the File Offset of the first byte beyond the + last accessible byte. + +Return Value: + + FALSE - if Wait was supplied as TRUE, and it was impossible to lock all + of the data without blocking + TRUE - if the desired data, is being returned + +Raises: + + STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. + This can only occur if Wait was specified as TRUE. (If Wait is + specified as FALSE, and an allocation failure occurs, this + routine simply returns FALSE.) + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + LARGE_INTEGER TrialBound; + KIRQL OldIrql; + PBCB BcbOut = NULL; + ULONG ZeroFlags = 0; + BOOLEAN SpinLockAcquired = FALSE; + BOOLEAN UnmapBcb = FALSE; + BOOLEAN Result = FALSE; + + ULONG ActivePage; + ULONG PageIsDirty; + PVACB ActiveVacb = NULL; + + DebugTrace(+1, me, "CcPinFileData:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + DebugTrace( 0, me, " Wait = %02lx\n", Wait ); + + // + // Get pointer to SharedCacheMap via File Object. + // + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + + // + // See if we have an active Vacb, that we need to free. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // If there is an end of a page to be zeroed, then free that page now, + // so it does not cause our data to get zeroed. If there is an active + // page, free it so we have the correct ValidDataGoal. + // + + if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // Make sure the calling file system is not asking to map beyond the + // end of the section, for example, that it did not forget to do + // CcExtendCacheSection. + // + + ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length ) <= + SharedCacheMap->SectionSize.QuadPart ); + + // + // Initially clear output + // + + *Bcb = NULL; + *BaseAddress = NULL; + + // + // Acquire Bcb List Exclusive to look for Bcb + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SpinLockAcquired = TRUE; + + // + // Use try to guarantee cleanup on the way out. + // + + try { + + BOOLEAN Found; + LARGE_INTEGER FOffset; + LARGE_INTEGER TLength; + PVOID BAddress; + PVACB Vacb; + + // + // Search for Bcb describing the largest matching "prefix" byte range, + // or where to insert it. + // + + TrialBound.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; + Found = CcFindBcb( SharedCacheMap, FileOffset, &TrialBound, &BcbOut ); + + + // + // Cases 1 and 2 - Bcb was not found. + // + // First caculate data to pin down. + // + + if (!Found) { + + // + // Not found, calculate data to pin down. + // + // Round local copy of FileOffset down to page boundary, and + // round copies of size and minimum size up. Also make sure that + // we keep the length from crossing the end of the SharedCacheMap. + // + + FOffset = *FileOffset; + TLength.QuadPart = TrialBound.QuadPart - FOffset.QuadPart; + + TLength.LowPart += FOffset.LowPart & (PAGE_SIZE - 1); + + // + // At this point we can calculate the ReadOnly flag for + // the purposes of whether to use the Bcb resource, and + // we can calculate the ZeroFlags. + // + + if ((!ReadOnly && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) || WriteOnly) { + + // + // We can always zero middle pages, if any. + // + + ZeroFlags = ZERO_MIDDLE_PAGES; + + if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) && + (Length >= PAGE_SIZE)) { + ZeroFlags |= ZERO_FIRST_PAGE; + } + + if ((TLength.LowPart & (PAGE_SIZE - 1)) == 0) { + ZeroFlags |= ZERO_LAST_PAGE; + } + } + + // + // We treat Bcbs as ReadOnly (do not acquire resource) if they + // are in sections for which we have not disabled modified writing. + // + + if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + ReadOnly = TRUE; + } + + TLength.LowPart = ROUND_TO_PAGES( TLength.LowPart ); + + FOffset.LowPart &= ~(PAGE_SIZE - 1); + + // + // Even if we are readonly, we can still zero pages entirely + // beyond valid data length. + // + + if (FOffset.QuadPart >= SharedCacheMap->ValidDataGoal.QuadPart) { + + ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + + } else if ((FOffset.QuadPart + (LONGLONG)PAGE_SIZE) >= + SharedCacheMap->ValidDataGoal.QuadPart) { + + ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } + + // + // We will get into trouble if we try to read more than we + // can map by one Vacb. So make sure that our lengths stay + // within a Vacb. + // + + if (TLength.LowPart > VACB_MAPPING_GRANULARITY) { + + TLength.LowPart = VACB_MAPPING_GRANULARITY; + } + + if ((FOffset.LowPart & ~(VACB_MAPPING_GRANULARITY - 1)) + + != + + ((FOffset.LowPart + TLength.LowPart - 1) & + ~(VACB_MAPPING_GRANULARITY - 1))) { + + TLength.LowPart = VACB_MAPPING_GRANULARITY - + (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)); + } + + + // + // Case 1 - Bcb was not found and Wait is TRUE. + // + // Note that it is important to minimize the time that the Bcb + // List spin lock is held, as well as guarantee we do not take + // any faults while holding this lock. + // + // If we can (and perhaps will) wait, then it is important to + // allocate the Bcb acquire it exclusive and free the Bcb List. + // We then procede to read in the data, and anyone else finding + // our Bcb will have to wait shared to insure that the data is + // in. + // + + if (Wait) { + + BcbOut = CcAllocateInitializeBcb ( SharedCacheMap, + BcbOut, + &FOffset, + &TLength ); + + if (BcbOut == NULL) { + DebugTrace( 0, 0, "Bcb allocation failure\n", 0 ); + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // Now just acquire the newly-allocated Bcb shared, and + // release the spin lock. + // + + if (!ReadOnly) { + if (Wait == 3) { + (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); + } else { + (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); + } + } + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + + // + // Now read in the data. + // + // We set UnmapBcb to be TRUE for the duration of this call, + // so that if we get an exception, we will call CcUnpinFileData + // and probably delete the Bcb. + // + + UnmapBcb = TRUE; + (VOID)CcMapAndRead( SharedCacheMap, + &FOffset, + TLength.LowPart, + ZeroFlags, + TRUE, + &Vacb, + &BAddress ); + + UnmapBcb = FALSE; + + // + // Now we have to reacquire the Bcb List spinlock to load + // up the mapping if we are the first one, else we collided + // with someone else who loaded the mapping first, and we + // will just free our mapping. It is guaranteed that the + // data will be mapped to the same place. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if (BcbOut->BaseAddress == NULL) { + + BcbOut->BaseAddress = BAddress; + BcbOut->Vacb = Vacb; + + } else { + CcFreeVirtualAddress( Vacb ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Calculate Base Address of the data we want. + // + + *BaseAddress = (PCHAR)BcbOut->BaseAddress + + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); + + // + // Success! + // + + try_return( Result = TRUE ); + } + + + // + // Case 2 - Bcb was not found and Wait is FALSE + // + // If we cannot wait, then we go immediately see if the data is + // there (CcMapAndRead), and then only set up the Bcb and release + // the spin lock if the data is there. Note here we call + // CcMapAndRead while holding the spin lock, because we know we + // will not fault and not block before returning. + // + + else { + + // + // Now try to allocate and initialize the Bcb. If we + // fail to allocate one, then return FALSE, since we know that + // Wait = FALSE. The caller may get lucky if he calls + // us back with Wait = TRUE. + // + + BcbOut = CcAllocateInitializeBcb ( SharedCacheMap, + BcbOut, + &FOffset, + &TLength ); + + if (BcbOut == NULL) { + + try_return( Result = FALSE ); + } + + // + // If we are not ReadOnly, we must acquire the newly-allocated + // resource shared, and then we can free the spin lock. + // + + if (!ReadOnly) { + ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); + } + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + + // + // Note that since this call has Wait = FALSE, it cannot + // get an exception (see procedure header). + // + + UnmapBcb = TRUE; + if (!CcMapAndRead( SharedCacheMap, + &FOffset, + TLength.LowPart, + ZeroFlags, + FALSE, + &Vacb, + &BAddress )) { + + try_return( Result = FALSE ); + } + UnmapBcb = FALSE; + + // + // Now we have to reacquire the Bcb List spinlock to load + // up the mapping if we are the first one, else we collided + // with someone else who loaded the mapping first, and we + // will just free our mapping. It is guaranteed that the + // data will be mapped to the same place. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if (BcbOut->BaseAddress == NULL) { + + BcbOut->BaseAddress = BAddress; + BcbOut->Vacb = Vacb; + + } else { + CcFreeVirtualAddress( Vacb ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Calculate Base Address of the data we want. + // + + *BaseAddress = (PCHAR)BcbOut->BaseAddress + + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); + + // + // Success! + // + + try_return( Result = TRUE ); + } + + } else { + + // + // We treat Bcbs as ReadOnly (do not acquire resource) if they + // are in sections for which we have not disabled modified writing. + // + + if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + ReadOnly = TRUE; + } + } + + + // + // Cases 3 and 4 - Bcb is there but not mapped + // + + if (BcbOut->BaseAddress == NULL) { + + // + // It is too complicated to attempt to calculate any ZeroFlags in this + // case, because we have to not only do the tests above, but also + // compare to the byte range in the Bcb since we will be passing + // those parameters to CcMapAndRead. Also, the probability of hitting + // some window where zeroing is of any advantage is quite small. + // + + // + // Set up to just reread the Bcb exactly as the data in it is + // described. + // + + FOffset = BcbOut->FileOffset; + TLength.QuadPart = (LONGLONG)BcbOut->ByteLength; + + // + // Case 3 - Bcb is there but not mapped and Wait is TRUE + // + // Increment the PinCount, and then release the BcbList + // SpinLock so that we can wait to acquire the Bcb exclusive. + // Once we have the Bcb exclusive, map and read it in if no + // one beats us to it. Someone may have beat us to it since + // we had to release the SpinLock above. + // + + if (Wait) { + + BcbOut->PinCount += 1; + + // + // Now we have to release the BcbList SpinLock in order to + // acquire the Bcb shared. + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + if (!ReadOnly) { + if (Wait == 3) { + (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); + } else { + (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); + } + } + + // + // Now procede to map and read the data in. + // + // Now read in the data. + // + // We set UnmapBcb to be TRUE for the duration of this call, + // so that if we get an exception, we will call CcUnpinFileData + // and probably delete the Bcb. + // + + UnmapBcb = TRUE; + (VOID)CcMapAndRead( SharedCacheMap, + &FOffset, + TLength.LowPart, + ZeroFlags, + TRUE, + &Vacb, + &BAddress ); + UnmapBcb = FALSE; + + // + // Now we have to reacquire the Bcb List spinlock to load + // up the mapping if we are the first one, else we collided + // with someone else who loaded the mapping first, and we + // will just free our mapping. It is guaranteed that the + // data will be mapped to the same place. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if (BcbOut->BaseAddress == NULL) { + + BcbOut->BaseAddress = BAddress; + BcbOut->Vacb = Vacb; + + } else { + CcFreeVirtualAddress( Vacb ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // + // Calculate Base Address of the data we want. + // + + *BaseAddress = (PCHAR)BcbOut->BaseAddress + + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); + + // + // Success! + // + + try_return( Result = TRUE ); + } + + + // + // Case 4 - Bcb is there but not mapped, and Wait is FALSE + // + // Since we cannot wait, we go immediately see if the data is + // there (CcMapAndRead), and then only set up the Bcb and release + // the spin lock if the data is there. Note here we call + // CcMapAndRead while holding the spin lock, because we know we + // will not fault and not block before returning. + // + + else { + + if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) { + try_return( Result = FALSE ); + } + + BcbOut->PinCount += 1; + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + + // + // Note that since this call has Wait = FALSE, it cannot + // get an exception (see procedure header). + // + + UnmapBcb = TRUE; + if (!CcMapAndRead( SharedCacheMap, + &BcbOut->FileOffset, + BcbOut->ByteLength, + ZeroFlags, + FALSE, + &Vacb, + &BAddress )) { + + try_return( Result = FALSE ); + } + UnmapBcb = FALSE; + + // + // Now we have to reacquire the Bcb List spinlock to load + // up the mapping if we are the first one, else we collided + // with someone else who loaded the mapping first, and we + // will just free our mapping. It is guaranteed that the + // data will be mapped to the same place. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if (BcbOut->BaseAddress == NULL) { + + BcbOut->BaseAddress = BAddress; + BcbOut->Vacb = Vacb; + + } else { + CcFreeVirtualAddress( Vacb ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Calculate Base Address of the data we want. + // + + *BaseAddress = (PCHAR)BcbOut->BaseAddress + + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); + + // + // Success! + // + + try_return( Result = TRUE ); + } + } + + + // + // Cases 5 and 6 - Bcb is there and it is mapped + // + + else { + + // + // Case 5 - Bcb is there and mapped, and Wait is TRUE + // + // We can just increment the PinCount, release the SpinLock + // and then acquire the Bcb Shared if we are not ReadOnly. + // + + if (Wait) { + + BcbOut->PinCount += 1; + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + + // + // Acquire Bcb Resource shared to insure that it is in memory. + // + + if (!ReadOnly) { + if (Wait == 3) { + (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); + } else { + (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); + } + } + } + + // + // Case 6 - Bcb is there and mapped, and Wait is FALSE + // + // If we are not ReadOnly, we have to first see if we can + // acquire the Bcb shared before incrmenting the PinCount, + // since we will have to return FALSE if we cannot acquire the + // resource. + // + + else { + + // + // Acquire Bcb Resource shared to insure that it is in memory. + // + + if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) { + try_return( Result = FALSE ); + } + BcbOut->PinCount += 1; + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SpinLockAcquired = FALSE; + } + + // + // Calculate Base Address of the data we want. + // + + *BaseAddress = (PCHAR)BcbOut->BaseAddress + + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); + + // + // Success! + // + + try_return( Result = TRUE ); + } + + + try_exit: NOTHING; + + } + + finally { + + // + // Release the spinlock if it is acquired. + // + + if (SpinLockAcquired) { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + // + // An abnormal termination can occur on an allocation failure, + // or on a failure to map and read the buffer. The latter + // operation is performed with UnmapBcb = TRUE, so that we + // know to make the unmap call. + // + + if (UnmapBcb) { + CcUnpinFileData( BcbOut, ReadOnly, UNPIN ); + BcbOut = NULL; + } + + if (Result) { + + *Bcb = BcbOut; + if (BcbOut != NULL) { + *BeyondLastByte = BcbOut->BeyondLastByte; + } + else { + *BeyondLastByte = *FileOffset; + } + } + + DebugTrace( 0, me, " %02lx\n", Result ); + } + + return Result; +} + + +// +// Internal Support Routine +// + +VOID +FASTCALL +CcUnpinFileData ( + IN OUT PBCB Bcb, + IN BOOLEAN ReadOnly, + IN UNMAP_ACTIONS UnmapAction + ) + +/*++ + +Routine Description: + + This routine umaps and unlocks the specified buffer, which was previously + locked and mapped by calling CcPinFileData. + +Arguments: + + Bcb - Pointer previously returned from CcPinFileData. As may be + seen above, this pointer may be either a Bcb or a Vacb. + + ReadOnly - must specify same value as when data was mapped + + UnmapAction - UNPIN or SET_CLEAN + +Return Value: + + None + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + + DebugTrace(+1, me, "CcUnpinFileData >Bcb = %08lx\n", Bcb ); + + // + // Note, since we have to allocate so many Vacbs, we do not use + // a node type code. However, the Vacb starts with a BaseAddress, + // so we assume that the low byte of the Bcb node type code has + // some bits set, which a page-aligned Base Address cannot. + // + + ASSERT( (CACHE_NTC_BCB & 0xFF) != 0 ); + + if (Bcb->NodeTypeCode != CACHE_NTC_BCB) { + + ASSERT(((PVACB)Bcb)->SharedCacheMap->NodeTypeCode == CACHE_NTC_SHARED_CACHE_MAP); + + CcFreeVirtualAddress( (PVACB)Bcb ); + + DebugTrace(-1, me, "CcUnpinFileData -> VOID (simple release)\n", 0 ); + + return; + } + + SharedCacheMap = Bcb->SharedCacheMap; + + // + // We treat Bcbs as ReadOnly (do not acquire resource) if they + // are in sections for which we have not disabled modified writing. + // + + if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + ReadOnly = TRUE; + } + + // + // Synchronize + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + switch (UnmapAction) { + + case UNPIN: + + ASSERT( Bcb->PinCount > 0 ); + + Bcb->PinCount -= 1; + break; + + case SET_CLEAN: + + if (Bcb->Dirty) { + + ULONG Pages = Bcb->ByteLength >> PAGE_SHIFT; + + // + // Reverse the rest of the actions taken when the Bcb was set dirty. + // + + Bcb->Dirty = FALSE; + SharedCacheMap->DirtyPages -= Pages; + CcTotalDirtyPages -= Pages; + + // + // Normally we need to reduce CcPagesYetToWrite appropriately. + // + + if (CcPagesYetToWrite > Pages) { + CcPagesYetToWrite -= Pages; + } else { + CcPagesYetToWrite = 0; + } + + // + // Remove SharedCacheMap from dirty list if nothing more dirty, + // and someone still has the cache map opened. + // + + if ((SharedCacheMap->DirtyPages == 0) && + (SharedCacheMap->OpenCount != 0)) { + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcCleanSharedCacheMapList, + &SharedCacheMap->SharedCacheMapLinks ); + } + } + + break; + + default: + CcBugCheck( UnmapAction, 0, 0 ); + } + + // + // If we brought it to 0, then we have to kill it. + // + + if (Bcb->PinCount == 0) { + + // + // If the Bcb is Dirty, we only release the resource and unmap now. + // + + if (Bcb->Dirty) { + + if (Bcb->BaseAddress != NULL) { + + // + // Capture CcFreeVirtualAddress parameters to locals so that we can + // reset Bcb->BaseAddress and release the spinlock before + // unmapping. + // + + PVOID BaseAddress = Bcb->BaseAddress; + ULONG ByteLength = Bcb->ByteLength; + PVACB Vacb = Bcb->Vacb; + + Bcb->BaseAddress = NULL; + Bcb->Vacb = NULL; + + if (!ReadOnly) { + ExReleaseResource( &Bcb->Resource ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + CcFreeVirtualAddress( Vacb ); + } + else { + + if (!ReadOnly) { + ExReleaseResource( &Bcb->Resource ); + } + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + } + + // + // Otherwise, we also delete the Bcb. + // + + else { + + RemoveEntryList( &Bcb->BcbLinks ); + + if (Bcb->BaseAddress != NULL) { + + CcFreeVirtualAddress( Bcb->Vacb ); + } + + // + // Debug routines used to remove Bcbs from the global list + // + +#if LIST_DBG + + ExAcquireSpinLockAtDpcLevel( &CcBcbSpinLock ); + + if (Bcb->CcBcbLinks.Flink != NULL) { + + RemoveEntryList( &Bcb->CcBcbLinks ); + CcBcbCount -= 1; + } + + ExReleaseSpinLockFromDpcLevel( &CcBcbSpinLock ); + +#endif +#if DBG + if (!ReadOnly) { + ExReleaseResource( &Bcb->Resource ); + } + + // + // ASSERT that the resource is unowned. + // + + ASSERT( Bcb->Resource.ActiveCount == 0 ); +#endif + CcDeallocateBcb( Bcb ); + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + } + + // + // Else we just have to release our Shared access, if we are not + // readonly. We don't need to do this above, since we deallocate + // the entire Bcb there. + // + + else { + + if (!ReadOnly) { + ExReleaseResource( &Bcb->Resource ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcUnpinFileData -> VOID\n", 0 ); + + return; +} + + +VOID +CcSetReadAheadGranularity ( + IN PFILE_OBJECT FileObject, + IN ULONG Granularity + ) + +/*++ + +Routine Description: + + This routine may be called to set the read ahead granularity used by + the Cache Manager. The default is PAGE_SIZE. The number is decremented + and stored as a mask. + +Arguments: + + FileObject - File Object for which granularity shall be set + + Granularity - new granularity, which must be an even power of 2 and + >= PAGE_SIZE + +Return Value: + + None +--*/ + +{ + ((PPRIVATE_CACHE_MAP)FileObject->PrivateCacheMap)->ReadAheadMask = Granularity - 1; +} + + +VOID +CcScheduleReadAhead ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length + ) + +/*++ + +Routine Description: + + This routine is called by Copy Read and Mdl Read file system routines to + perform common Read Ahead processing. The input parameters describe + the current read which has just been completed, or perhaps only started + in the case of Mdl Reads. Based on these parameters, an + assessment is made on how much data should be read ahead, and whether + that data has already been read ahead. + + The processing is divided into two parts: + + CALCULATE READ AHEAD REQUIREMENTS (CcScheduleReadAhead) + + PERFORM READ AHEAD (CcPerformReadAhead) + + File systems should always call CcReadAhead, which will conditionally + call CcScheduleReadAhead (if the read is large enough). If such a call + determines that there is read ahead work to do, and no read ahead is + currently active, then it will set ReadAheadActive and schedule read + ahead to be peformed by the Lazy Writer, who will call CcPeformReadAhead. + +Arguments: + + FileObject - supplies pointer to FileObject on which readahead should be + considered. + + FileOffset - supplies the FileOffset at which the last read just occurred. + + Length - supplies the length of the last read. + +Return Value: + + None +--*/ + +{ + LARGE_INTEGER NewOffset; + LARGE_INTEGER NewBeyond; + LARGE_INTEGER FileOffset1, FileOffset2; + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + PWORK_QUEUE_ENTRY WorkQueueEntry; + ULONG ReadAheadSize; + BOOLEAN Changed = FALSE; + + DebugTrace(+1, me, "CcScheduleReadAhead:\n", 0 ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + PrivateCacheMap = FileObject->PrivateCacheMap; + + if ((PrivateCacheMap == NULL) || + (SharedCacheMap == NULL) || + FlagOn(SharedCacheMap->Flags, DISABLE_READ_AHEAD)) { + + DebugTrace(-1, me, "CcScheduleReadAhead -> VOID (Nooped)\n", 0 ); + + return; + } + + // + // Round boundaries of transfer up to some greater granularity, so that + // sequential reads will be recognized even if a few bytes are skipped + // between records. + // + + NewOffset = *FileOffset; + NewBeyond.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; + + // + // Find the next read ahead boundary beyond the current read. + // + + ReadAheadSize = (Length + PrivateCacheMap->ReadAheadMask) & ~PrivateCacheMap->ReadAheadMask; + FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize; + FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask; + + // + // CALCULATE READ AHEAD REQUIREMENTS + // + + // + // Take out the ReadAhead spinlock to synchronize our read ahead decision. + // + + ExAcquireSpinLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql ); + + // + // Read Ahead Case 0. + // + // Sequential-only hint in the file object. For this case we will + // try and always keep two read ahead granularities read ahead from + // and including the end of the current transfer. This case has the + // lowest overhead, and the code is completely immune to how the + // caller skips around. Sequential files use ReadAheadOffset[1] in + // the PrivateCacheMap as their "high water mark". + // + + if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { + + // + // If the next boundary is greater than or equal to the high-water mark, + // then read ahead. + // + + if (FileOffset2.QuadPart >= PrivateCacheMap->ReadAheadOffset[1].QuadPart) { + + // + // On the first read if we are using a large read ahead granularity, + // and the read did not get it all, we will just get the rest of the + // first data we want. + // + + if ((FileOffset->QuadPart == 0) + + && + + (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1)) + + && + + ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) { + + FileOffset1.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) ); + PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize - FileOffset1.LowPart; + FileOffset2.QuadPart = (LONGLONG)ReadAheadSize; + + // + // Calculate the next read ahead boundary. + // + + } else { + + FileOffset1.QuadPart = PrivateCacheMap->ReadAheadOffset[1].QuadPart + + (LONGLONG)ReadAheadSize; + + // + // If the end of the current read is actually beyond where we would + // normally do our read ahead, then we have fallen behind, and we must + // advance to that spot. + // + + if (FileOffset2.QuadPart > FileOffset1.QuadPart) { + FileOffset1 = FileOffset2; + } + PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize; + FileOffset2.QuadPart = FileOffset1.QuadPart + (LONGLONG)ReadAheadSize; + } + + // + // Now issue the next two read aheads. + // + + PrivateCacheMap->ReadAheadOffset[0] = FileOffset1; + + PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; + PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize; + + Changed = TRUE; + } + + // + // Read Ahead Case 1. + // + // If this is the third of three sequential reads, then we will see if + // we can read ahead. Note that if the first read to a file is to + // offset 0, it passes this test. + // + + } else if ((NewOffset.HighPart == PrivateCacheMap->BeyondLastByte2.HighPart) + + && + + ((NewOffset.LowPart & ~NOISE_BITS) + == (PrivateCacheMap->BeyondLastByte2.LowPart & ~NOISE_BITS)) + + && + + (PrivateCacheMap->FileOffset2.HighPart + == PrivateCacheMap->BeyondLastByte1.HighPart) + + && + + ((PrivateCacheMap->FileOffset2.LowPart & ~NOISE_BITS) + == (PrivateCacheMap->BeyondLastByte1.LowPart & ~NOISE_BITS))) { + + // + // On the first read if we are using a large read ahead granularity, + // and the read did not get it all, we will just get the rest of the + // first data we want. + // + + if ((FileOffset->QuadPart == 0) + + && + + (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1)) + + && + + ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) { + + FileOffset2.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) ); + } + + // + // Round read offset to next read ahead boundary. + // + + else { + FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize; + + FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask; + } + + // + // Set read ahead length to be the same as for the most recent read, + // up to our max. + // + + if (FileOffset2.QuadPart != PrivateCacheMap->ReadAheadOffset[1].QuadPart) { + + ASSERT( FileOffset2.HighPart >= 0 ); + + Changed = TRUE; + PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; + PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize; + } + } + + // + // Read Ahead Case 2. + // + // If this is the third read following a particular stride, then we + // will see if we can read ahead. One example of an application that + // might do this is a spreadsheet. Note that this code even works + // for negative strides. + // + + else if ( ( NewOffset.QuadPart - + PrivateCacheMap->FileOffset2.QuadPart ) == + ( PrivateCacheMap->FileOffset2.QuadPart - + PrivateCacheMap->FileOffset1.QuadPart )) { + + // + // According to the current stride, the next offset will be: + // + // NewOffset + (NewOffset - FileOffset2) + // + // which is the same as: + // + // (NewOffset * 2) - FileOffset2 + // + + FileOffset2.QuadPart = ( NewOffset.QuadPart << 1 ) - PrivateCacheMap->FileOffset2.QuadPart; + + // + // If our stride is going backwards through the file, we + // have to detect the case where the next step would wrap. + // + + if (FileOffset2.HighPart >= 0) { + + // + // The read ahead length must be extended by the same amount that + // we will round the PrivateCacheMap->ReadAheadOffset down. + // + + Length += FileOffset2.LowPart & (PAGE_SIZE - 1); + + // + // Now round the PrivateCacheMap->ReadAheadOffset down. + // + + FileOffset2.LowPart &= ~(PAGE_SIZE - 1); + PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; + + // + // Round to page boundary. + // + + PrivateCacheMap->ReadAheadLength[1] = ROUND_TO_PAGES(Length); + Changed = TRUE; + } + } + + // + // Get out if the ReadAhead requirements did not change. + // + + if (!Changed || PrivateCacheMap->ReadAheadActive) { + + DebugTrace( 0, me, "Read ahead already in progress or no change\n", 0 ); + + ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); + return; + } + + // + // Otherwise, we will proceed and try to schedule the read ahead + // ourselves. + // + + PrivateCacheMap->ReadAheadActive = TRUE; + + // + // Release spin lock on way out + // + + ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); + + // + // Queue the read ahead request to the Lazy Writer's work queue. + // + + DebugTrace( 0, me, "Queueing read ahead to worker thread\n", 0 ); + + WorkQueueEntry = CcAllocateWorkQueueEntry(); + + // + // If we failed to allocate a work queue entry, then, we will + // quietly bag it. Read ahead is only an optimization, and + // no one ever requires that it occur. + // + + if (WorkQueueEntry != NULL) { + + // + // We must reference this file object so that it cannot go away + // until we finish Read Ahead processing in the Worker Thread. + // + + ObReferenceObject ( FileObject ); + + // + // Increment open count to make sure the SharedCacheMap stays around. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SharedCacheMap->OpenCount += 1; + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + WorkQueueEntry->Function = (UCHAR)ReadAhead; + WorkQueueEntry->Parameters.Read.FileObject = FileObject; + + CcPostWorkQueue( WorkQueueEntry, &CcExpressWorkQueue ); + } + + // + // If we failed to allocate a Work Queue Entry, or all of the pages + // are resident we must set the active flag false. + // + + else { + + ExAcquireFastLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql ); + PrivateCacheMap->ReadAheadActive = FALSE; + ExReleaseFastLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcScheduleReadAhead -> VOID\n", 0 ); + + return; +} + + +VOID +FASTCALL +CcPerformReadAhead ( + IN PFILE_OBJECT FileObject + ) + +/*++ + +Routine Description: + + This routine is called by the Lazy Writer to perform read ahead which + has been scheduled for this file by CcScheduleReadAhead. + +Arguments: + + FileObject - supplies pointer to FileObject on which readahead should be + considered. + +Return Value: + + None +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + ULONG i; + LARGE_INTEGER ReadAheadOffset[2]; + ULONG ReadAheadLength[2]; + PCACHE_MANAGER_CALLBACKS Callbacks; + PVOID Context; + ULONG SavedState; + BOOLEAN Done; + BOOLEAN HitEof = FALSE; + BOOLEAN ReadAheadPerformed = FALSE; + BOOLEAN FaultOccurred = FALSE; + PETHREAD Thread = PsGetCurrentThread(); + PVACB Vacb = NULL; + + DebugTrace(+1, me, "CcPerformReadAhead:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + try { + + // + // Since we have the open count biased, we can safely access the + // SharedCacheMap. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + Callbacks = SharedCacheMap->Callbacks; + Context = SharedCacheMap->LazyWriteContext; + + // + // After the first time, keep looping as long as there are new + // read ahead requirements. (We will skip out below.) + // + + while (TRUE) { + + // + // Get SharedCacheMap and PrivateCacheMap. If either are now NULL, get + // out. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // Now capture the information that we need, so that we can drop the + // SharedList Resource. This information is advisory only anyway, and + // the caller must guarantee that the FileObject is referenced. + // + + if (PrivateCacheMap != NULL) { + + ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); + + // + // We are done when the lengths are 0 + // + + Done = ((PrivateCacheMap->ReadAheadLength[0] | + PrivateCacheMap->ReadAheadLength[1]) == 0); + + ReadAheadOffset[0] = PrivateCacheMap->ReadAheadOffset[0]; + ReadAheadOffset[1] = PrivateCacheMap->ReadAheadOffset[1]; + ReadAheadLength[0] = PrivateCacheMap->ReadAheadLength[0]; + ReadAheadLength[1] = PrivateCacheMap->ReadAheadLength[1]; + PrivateCacheMap->ReadAheadLength[0] = 0; + PrivateCacheMap->ReadAheadLength[1] = 0; + + ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Acquire the file shared. + // + + (*Callbacks->AcquireForReadAhead)( Context, TRUE ); + + if ((PrivateCacheMap == NULL) || Done) { + + try_return( NOTHING ); + } + + // + // PERFORM READ AHEAD + // + // + // Now loop until everything is read in. The Read ahead is accomplished + // by touching the pages with an appropriate ReadAhead parameter in MM. + // + + i = 0; + + do { + + LARGE_INTEGER Offset, SavedOffset; + ULONG Length, SavedLength; + + Offset = ReadAheadOffset[i]; + Length = ReadAheadLength[i]; + SavedOffset = Offset; + SavedLength = Length; + + if ((Length != 0) + + && + + ( Offset.QuadPart <= SharedCacheMap->FileSize.QuadPart )) { + + ReadAheadPerformed = TRUE; + + // + // Keep length within file and MAX_READ_AHEAD + // + + if ( ( Offset.QuadPart + (LONGLONG)Length ) >= SharedCacheMap->FileSize.QuadPart ) { + + Length = (ULONG)( SharedCacheMap->FileSize.QuadPart - Offset.QuadPart ); + HitEof = TRUE; + + } + if (Length > MAX_READ_AHEAD) { + Length = MAX_READ_AHEAD; + } + + // + // Now loop to read all of the desired data in. This loop + // is more or less like the same loop to read data in + // CcCopyRead, except that we do not copy anything, just + // unmap as soon as it is in. + // + + while (Length != 0) { + + ULONG ReceivedLength; + PVOID CacheBuffer; + ULONG PagesToGo; + + // + // Call local routine to Map or Access the file data. + // If we cannot map the data because of a Wait condition, + // return FALSE. + // + // Since this routine is intended to be called from + // the finally handler from file system read modules, + // it is imperative that it not raise any exceptions. + // Therefore, if any expected exception is raised, we + // will simply get out. + // + + CacheBuffer = CcGetVirtualAddress( SharedCacheMap, + Offset, + &Vacb, + &ReceivedLength ); + + // + // If we got more than we need, make sure to only transfer + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + // + // Now loop to touch all of the pages, calling MM to insure + // that if we fault, we take in exactly the number of pages + // we need. + // + + PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, + ReceivedLength ); + + CcMissCounter = &CcReadAheadIos; + + while (PagesToGo) { + + MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) ); + FaultOccurred = (BOOLEAN)!MmCheckCachedPageState(CacheBuffer, FALSE); + + CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; + PagesToGo -= 1; + } + CcMissCounter = &CcThrowAway; + + // + // Calculate how much data we have left to go. + // + + Length -= ReceivedLength; + + // + // Assume we did not get all the data we wanted, and set + // Offset to the end of the returned data. + // + + Offset.QuadPart = Offset.QuadPart + (LONGLONG)ReceivedLength; + + // + // It was only a page, so we can just leave this loop + // After freeing the address. + // + + CcFreeVirtualAddress( Vacb ); + Vacb = NULL; + } + } + i += 1; + } while (i <= 1); + + // + // Release the file + // + + (*Callbacks->ReleaseFromReadAhead)( Context ); + } + + try_exit: NOTHING; + } + finally { + + MmResetPageFaultReadAhead(Thread, SavedState); + CcMissCounter = &CcThrowAway; + + // + // If we got an error faulting a single page in, release the Vacb + // here. It is important to free any mapping before dropping the + // resource to prevent purge problems. + // + + if (Vacb != NULL) { + CcFreeVirtualAddress( Vacb ); + } + + // + // Release the file + // + + (*Callbacks->ReleaseFromReadAhead)( Context ); + + // + // To show we are done, we must make sure the PrivateCacheMap is + // still there. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // Show readahead is going inactive. + // + + if (PrivateCacheMap != NULL) { + + ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); + PrivateCacheMap->ReadAheadActive = FALSE; + + // + // If he said sequential only and we smashed into Eof, then + // let's reset the highwater mark in case he wants to read the + // file sequentially again. + // + + if (HitEof && FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { + PrivateCacheMap->ReadAheadOffset[1].LowPart = + PrivateCacheMap->ReadAheadOffset[1].HighPart = 0; + } + + // + // If no faults occurred, turn read ahead off. + // + + if (ReadAheadPerformed && !FaultOccurred) { + PrivateCacheMap->ReadAheadEnabled = FALSE; + } + + ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); + } + + // + // Free SharedCacheMap list + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + ObDereferenceObject( FileObject ); + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcPerformReadAhead -> VOID\n", 0 ); + + return; +} + + +VOID +CcSetDirtyInMask ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length + ) + +/*++ + +Routine Description: + + This routine may be called to set a range of pages dirty in a user data + file, by just setting the corresponding bits in the mask bcb. + +Arguments: + + SharedCacheMap - SharedCacheMap where the pages are to be set dirty. + + FileOffset - FileOffset of first page to set dirty + + Length - Used in conjunction with FileOffset to determine how many pages + to set dirty. + +Return Value: + + None + +--*/ + +{ + KIRQL OldIrql; + PULONG MaskPtr; + ULONG Mask; + PMBCB Mbcb; + ULONG FirstPage; + ULONG LastPage; + LARGE_INTEGER BeyondLastByte; + + // + // Here is the maximum size file supported by this implementation. + // + + ASSERT((FileOffset->HighPart & ~(PAGE_SIZE - 1)) == 0); + + // + // Initialize our locals. + // + + FirstPage = (ULONG)((FileOffset->LowPart >> PAGE_SHIFT) | + (FileOffset->HighPart << (32 - PAGE_SHIFT))); + LastPage = FirstPage + + ((ULONG)((FileOffset->LowPart & (PAGE_SIZE - 1)) + Length - 1) >> PAGE_SHIFT); + BeyondLastByte.LowPart = (LastPage + 1) << PAGE_SHIFT; + BeyondLastByte.HighPart = (LONG)(LastPage >> (32 - PAGE_SHIFT)); + + // + // We have to acquire the shared cache map list, because we + // may be changing lists. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // If there is no Mbcb, or it is not big enough, we will have to allocate one. + // + + Mbcb = SharedCacheMap->Mbcb; + if ((Mbcb == NULL) || (LastPage >= (Mbcb->Bitmap.SizeOfBitMap - 1))) { + + PMBCB NewMbcb; + ULONG RoundedBcbSize = ((sizeof(BCB) + 7) & ~7); + ULONG SizeInBytes = ((LastPage + 1 + 1 + 7) / 8) + sizeof(MBCB); + + // + // If the size needed is not larger than a Bcb, then get one from the + // Bcb zone. + // + + if (SizeInBytes <= RoundedBcbSize) { + + NewMbcb = (PMBCB)CcAllocateInitializeBcb( NULL, NULL, NULL, NULL ); + + if (NewMbcb != NULL) { + NewMbcb->Bitmap.SizeOfBitMap = (RoundedBcbSize - sizeof(MBCB)) * 8; + } else { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // Otherwise, we will allocate one from the pool. We throw in a fudge + // factor of 1 below to account for any bits that may shift off the end, + // plus 4 to insure a long word of 0's at the end for scanning, and then + // round up to a quad word boundary that we will get anyway. + // + + } else { + + ULONG SizeToAllocate = (ULONG)(((SharedCacheMap->SectionSize.LowPart >> (PAGE_SHIFT + 3)) | + (SharedCacheMap->SectionSize.HighPart << (32 - (PAGE_SHIFT + 3)))) + + sizeof(MBCB) + 1 + 7) & ~7; + + NewMbcb = ExAllocatePool( NonPagedPool, SizeToAllocate ); + + if (NewMbcb != NULL) { + RtlZeroMemory( NewMbcb, SizeToAllocate ); + NewMbcb->Bitmap.SizeOfBitMap = (SizeToAllocate - sizeof(MBCB)) * 8; + } else { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + } + + // + // Set in the node type, "empty" FirstDirtyPage state, and the address + // of the bitmap. + // + + NewMbcb->NodeTypeCode = CACHE_NTC_MBCB; + NewMbcb->FirstDirtyPage = MAXULONG; + NewMbcb->Bitmap.Buffer = (PULONG)(NewMbcb + 1); + + // + // If there already was an Mbcb, we need to copy the relevant data from + // it and deallocate it. + // + + if (Mbcb != NULL) { + + NewMbcb->DirtyPages = Mbcb->DirtyPages; + NewMbcb->FirstDirtyPage = Mbcb->FirstDirtyPage; + NewMbcb->LastDirtyPage = Mbcb->LastDirtyPage; + NewMbcb->ResumeWritePage = Mbcb->ResumeWritePage; + RtlCopyMemory( NewMbcb + 1, Mbcb + 1, Mbcb->Bitmap.SizeOfBitMap / 8 ); + + CcDeallocateBcb( (PBCB)Mbcb ); + } + + // + // Finally, set to use our new Mbcb. + // + + SharedCacheMap->Mbcb = Mbcb = NewMbcb; + } + + // + // If this is the first dirty page for this cache map, there is some work + // to do. + // + + if (SharedCacheMap->DirtyPages == 0) { + + // + // If the lazy write scan is not active, then start it. + // + + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + Mbcb->ResumeWritePage = FirstPage; + } + + // + // Now update the first and last dirty page indices and the bitmap. + // + + if (FirstPage < Mbcb->FirstDirtyPage) { + Mbcb->FirstDirtyPage = FirstPage; + } + + if (LastPage > Mbcb->LastDirtyPage) { + Mbcb->LastDirtyPage = LastPage; + } + + MaskPtr = &Mbcb->Bitmap.Buffer[FirstPage / 32]; + Mask = 1 << (FirstPage % 32); + + // + // Loop to set all of the bits and adjust the DirtyPage totals. + // + + for ( ; FirstPage <= LastPage; FirstPage++) { + + if ((*MaskPtr & Mask) == 0) { + + CcTotalDirtyPages += 1; + SharedCacheMap->DirtyPages += 1; + Mbcb->DirtyPages += 1; + *MaskPtr |= Mask; + } + + Mask <<= 1; + + if (Mask == 0) { + + MaskPtr += 1; + Mask = 1; + } + } + + // + // See if we need to advance our goal for ValidDataLength. + // + + BeyondLastByte.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; + + if ( BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) { + + SharedCacheMap->ValidDataGoal = BeyondLastByte; + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); +} + + +VOID +CcSetDirtyPinnedData ( + IN PVOID BcbVoid, + IN PLARGE_INTEGER Lsn OPTIONAL + ) + +/*++ + +Routine Description: + + This routine may be called to set a Bcb (returned by CcPinFileData) + dirty, and a candidate for the Lazy Writer. All Bcbs should be set + dirty by calling this routine, even if they are to be flushed + another way. + +Arguments: + + Bcb - Supplies a pointer to a pinned (by CcPinFileData) Bcb, to + be set dirty. + + Lsn - Lsn to be remembered with page. + +Return Value: + + None + +--*/ + +{ + PBCB Bcbs[2]; + PBCB *BcbPtrPtr; + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + + DebugTrace(+1, me, "CcSetDirtyPinnedData: Bcb = %08lx\n", BcbVoid ); + + // + // Assume this is a normal Bcb, and set up for loop below. + // + + Bcbs[0] = (PBCB)BcbVoid; + Bcbs[1] = NULL; + BcbPtrPtr = &Bcbs[0]; + + // + // If it is an overlap Bcb, then point into the Bcb vector + // for the loop. + // + + if (Bcbs[0]->NodeTypeCode == CACHE_NTC_OBCB) { + BcbPtrPtr = &((POBCB)Bcbs[0])->Bcbs[0]; + } + + // + // Loop to set all Bcbs dirty + // + + while (*BcbPtrPtr != NULL) { + + Bcbs[0] = *(BcbPtrPtr++); + + // + // Should be no ReadOnly Bcbs + // + + ASSERT(((ULONG)Bcbs[0] & 1) != 1); + + SharedCacheMap = Bcbs[0]->SharedCacheMap; + + // + // We have to acquire the shared cache map list, because we + // may be changing lists. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if (!Bcbs[0]->Dirty) { + + ULONG Pages = Bcbs[0]->ByteLength >> PAGE_SHIFT; + + // + // Set dirty to keep the Bcb from going away until + // it is set Undirty, and assign the next modification time stamp. + // + + Bcbs[0]->Dirty = TRUE; + + // + // Initialize the OldestLsn field. + // + + if (ARGUMENT_PRESENT(Lsn)) { + Bcbs[0]->OldestLsn = *Lsn; + Bcbs[0]->NewestLsn = *Lsn; + } + + // + // Move it to the dirty list if these are the first dirty pages, + // and this is not disabled for write behind. + // + // Increase the count of dirty bytes in the shared cache map. + // + + if ((SharedCacheMap->DirtyPages == 0) && + !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { + + // + // If the lazy write scan is not active, then start it. + // + + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + } + + SharedCacheMap->DirtyPages += Pages; + CcTotalDirtyPages += Pages; + } + + // + // If this Lsn happens to be older/newer than the ones we have stored, then + // change it. + // + + if (ARGUMENT_PRESENT(Lsn)) { + + if ((Bcbs[0]->OldestLsn.QuadPart == 0) || (Lsn->QuadPart < Bcbs[0]->OldestLsn.QuadPart)) { + Bcbs[0]->OldestLsn = *Lsn; + } + + if (Lsn->QuadPart > Bcbs[0]->NewestLsn.QuadPart) { + Bcbs[0]->NewestLsn = *Lsn; + } + } + + // + // See if we need to advance our goal for ValidDataLength. + // + + if ( Bcbs[0]->BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) { + + SharedCacheMap->ValidDataGoal = Bcbs[0]->BeyondLastByte; + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcSetDirtyPinnedData -> VOID\n", 0 ); +} + + +NTSTATUS +CcSetValidData( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER ValidDataLength + ) + +/*++ + +Routine Description: + + This routine is used to call the File System to update ValidDataLength + for a file. + +Arguments: + + FileObject - A pointer to a referenced file object describing which file + the read should be performed from. + + ValidDataLength - Pointer to new ValidDataLength. + +Return Value: + + Status of operation. + +--*/ + +{ + PIO_STACK_LOCATION IrpSp; + PDEVICE_OBJECT DeviceObject; + NTSTATUS Status; + FILE_END_OF_FILE_INFORMATION Buffer; + IO_STATUS_BLOCK IoStatus; + KEVENT Event; + PIRP Irp; + + DebugTrace(+1, me, "CcSetValidData:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace2(0, me, " ValidDataLength = %08lx, %08lx\n", + ValidDataLength->LowPart, ValidDataLength->HighPart ); + + // + // Copy ValidDataLength to our buffer. + // + + Buffer.EndOfFile = *ValidDataLength; + + // + // Initialize the event. + // + + KeInitializeEvent( &Event, NotificationEvent, FALSE ); + + // + // Begin by getting a pointer to the device object that the file resides + // on. + // + + DeviceObject = IoGetRelatedDeviceObject( FileObject ); + + // + // Allocate an I/O Request Packet (IRP) for this in-page operation. + // + + Irp = IoAllocateIrp( DeviceObject->StackSize, FALSE ); + if (Irp == NULL) { + + DebugTrace(-1, me, "CcSetValidData-> STATUS_INSUFFICIENT_RESOURCES\n", 0 ); + + return STATUS_INSUFFICIENT_RESOURCES; + } + + // + // Get a pointer to the first stack location in the packet. This location + // will be used to pass the function codes and parameters to the first + // driver. + // + + IrpSp = IoGetNextIrpStackLocation( Irp ); + + // + // Fill in the IRP according to this request, setting the flags to + // just cause IO to set the event and deallocate the Irp. + // + + Irp->Flags = IRP_PAGING_IO | IRP_SYNCHRONOUS_PAGING_IO; + Irp->RequestorMode = KernelMode; + Irp->UserIosb = &IoStatus; + Irp->UserEvent = &Event; + Irp->Tail.Overlay.OriginalFileObject = FileObject; + Irp->Tail.Overlay.Thread = PsGetCurrentThread(); + Irp->AssociatedIrp.SystemBuffer = &Buffer; + + // + // Fill in the normal read parameters. + // + + IrpSp->MajorFunction = IRP_MJ_SET_INFORMATION; + IrpSp->FileObject = FileObject; + IrpSp->DeviceObject = DeviceObject; + IrpSp->Parameters.SetFile.Length = sizeof(FILE_END_OF_FILE_INFORMATION); + IrpSp->Parameters.SetFile.FileInformationClass = FileEndOfFileInformation; + IrpSp->Parameters.SetFile.FileObject = NULL; + IrpSp->Parameters.SetFile.AdvanceOnly = TRUE; + + // + // Queue the packet to the appropriate driver based on whether or not there + // is a VPB associated with the device. This routine should not raise. + // + + Status = IoCallDriver( DeviceObject, Irp ); + + // + // If pending is returned (which is a successful status), + // we must wait for the request to complete. + // + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject( &Event, + Executive, + KernelMode, + FALSE, + (PLARGE_INTEGER)NULL); + } + + // + // If we got an error back in Status, then the Iosb + // was not written, so we will just copy the status + // there, then test the final status after that. + // + + if (!NT_SUCCESS(Status)) { + IoStatus.Status = Status; + } + + DebugTrace(-1, me, "CcSetValidData-> %08lx\n", IoStatus.Status ); + + return IoStatus.Status; +} + + +// +// Internal Support Routine +// + +BOOLEAN +CcAcquireByteRangeForWrite ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER TargetOffset OPTIONAL, + IN ULONG TargetLength, + OUT PLARGE_INTEGER FileOffset, + OUT PULONG Length, + OUT PBCB *FirstBcb + ) + +/*++ + +Routine Description: + + This routine is called by the Lazy Writer to try to find a contiguous + range of bytes from the specified SharedCacheMap that are dirty and + should be flushed. After flushing, these bytes should be released + by calling CcReleaseByteRangeFromWrite. + +Arguments: + + SharedCacheMap - for the file for which the dirty byte range is sought + + TargetOffset - If specified, then only the specified range is + to be flushed. + + TargetLength - If target offset specified, this completes the range. + In any case, this field is zero for the Lazy Writer, + and nonzero for explicit flush calls. + + FileOffset - Returns the offset for the beginning of the dirty byte + range to flush + + Length - Returns the length of bytes in the range. + + FirstBcb - Returns the first Bcb in the list for the range, to be used + when calling CcReleaseByteRangeFromWrite, or NULL if dirty + pages were found in the mask Bcb. + +Return Value: + + FALSE - if no dirty byte range could be found to match the necessary + criteria. + + TRUE - if a dirty byte range is being returned. + +--*/ + +{ + KIRQL OldIrql; + PMBCB Mbcb; + PBCB Bcb; + LARGE_INTEGER LsnToFlushTo = {0, 0}; + + DebugTrace(+1, me, "CcAcquireByteRangeForWrite:\n", 0); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap); + + // + // Initially clear outputs. + // + + FileOffset->QuadPart = 0; + *Length = 0; + + // + // We must acquire the CcMasterSpinLock. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // See if there is a simple Mask Bcb, and if there is anything dirty in + // it. If so we will simply handle that case here by processing the bitmap. + // + + Mbcb = SharedCacheMap->Mbcb; + + if ((Mbcb != NULL) && + (Mbcb->DirtyPages != 0) && + ((Mbcb->PagesToWrite != 0) || (TargetLength != 0))) { + + PULONG EndPtr; + PULONG MaskPtr; + ULONG Mask; + ULONG FirstDirtyPage; + ULONG OriginalFirstDirtyPage; + + // + // If a target range was specified (outside call to CcFlush for a range), + // then calculate FirstPage and EndPtr based on these inputs. + // + + if (ARGUMENT_PRESENT(TargetOffset)) { + + FirstDirtyPage = (ULONG)(TargetOffset->QuadPart >> PAGE_SHIFT); + EndPtr = &Mbcb->Bitmap.Buffer[(ULONG)((TargetOffset->QuadPart + TargetLength - 1) >> PAGE_SHIFT) / 32]; + + // + // We do not grow the bitmap with the file, only as we set dirty + // pages, so it is possible that the caller is off the end. If + // If even the first page is off the end, we will catch it below. + // + + if (EndPtr > &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]) { + + EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]; + } + + // + // Otherwise, for the Lazy Writer pick up where we left off. + // + + } else { + + // + // If a length was specified, then it is an explicit flush, and + // we want to start with the first dirty page. + // + + FirstDirtyPage = Mbcb->FirstDirtyPage; + + // + // Otherwise, it is the Lazy Writer, so pick up at the resume + // point so long as that is beyond the FirstDirtyPage. + // + + if ((TargetLength == 0) && (Mbcb->ResumeWritePage >= FirstDirtyPage)) { + FirstDirtyPage = Mbcb->ResumeWritePage; + } + EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]; + } + + // + // Form a few other inputs for our dirty page scan. + // + + MaskPtr = &Mbcb->Bitmap.Buffer[FirstDirtyPage / 32]; + Mask = (ULONG)(-1 << (FirstDirtyPage % 32)); + OriginalFirstDirtyPage = FirstDirtyPage; + + // + // Because of the possibility of getting stuck on a "hot spot" which gets + // modified over and over, we want to be very careful to resume exactly + // at the recorded resume point. If there is nothing there, then we + // fall into the loop below to scan for nozero long words in the bitmap, + // starting at the next longword. + // + + if ((MaskPtr > EndPtr) || (*MaskPtr & Mask) == 0) { + + MaskPtr += 1; + Mask = (ULONG)-1; + FirstDirtyPage = (FirstDirtyPage + 32) & ~31; + + // + // If we go beyond the end, then we must wrap back to the first + // dirty page. We will just go back to the start of the first + // longword. + // + + if (MaskPtr > EndPtr) { + + // + // If this is an explicit flush, get out when we hit the end + // of the range. + // + + if (TargetLength != 0) { + + goto Scan_Bcbs; + } + + MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32]; + FirstDirtyPage = Mbcb->FirstDirtyPage & ~31; + OriginalFirstDirtyPage = Mbcb->FirstDirtyPage; + + // + // We can also backup the last dirty page hint to our + // resume point. + // + + ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage); + + Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1; + } + + // + // To scan the bitmap faster, we scan for entire long words which are + // nonzero. + // + + while (*MaskPtr == 0) { + + MaskPtr += 1; + FirstDirtyPage += 32; + + // + // If we go beyond the end, then we must wrap back to the first + // dirty page. We will just go back to the start of the first + // longword. + // + + if (MaskPtr > EndPtr) { + + // + // If this is an explicit flush, get out when we hit the end + // of the range. + // + + if (TargetLength != 0) { + + goto Scan_Bcbs; + } + + MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32]; + FirstDirtyPage = Mbcb->FirstDirtyPage & ~31; + OriginalFirstDirtyPage = Mbcb->FirstDirtyPage; + + // + // We can also backup the last dirty page hint to our + // resume point. + // + + ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage); + + Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1; + } + } + } + + // + // Calculate the first set bit in the mask that we hit on. + // + + Mask = ~Mask + 1; + + // + // Now loop to find the first set bit. + // + + while ((*MaskPtr & Mask) == 0) { + + Mask <<= 1; + FirstDirtyPage += 1; + } + + // + // If a TargetOffset was specified, then make sure we do not start + // beyond the specified range. + // + + if (ARGUMENT_PRESENT(TargetOffset) && + (FirstDirtyPage >= ((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT))) { + + goto Scan_Bcbs; + } + + // + // Now loop to count the set bits at that point, clearing them as we + // go because we plan to write the corresponding pages. Stop as soon + // as we find a clean page, or we reach our maximum write size. Of + // course we want to ignore long word boundaries and keep trying to + // extend the write. We do not check for wrapping around the end of + // the bitmap here, because we guarantee some zero bits at the end + // in CcSetDirtyInMask. + // + + while (((*MaskPtr & Mask) != 0) && (*Length < (MAX_WRITE_BEHIND / PAGE_SIZE)) && + (!ARGUMENT_PRESENT(TargetOffset) || ((FirstDirtyPage + *Length) < + (ULONG)((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)))) { + + ASSERT(MaskPtr <= (&Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32])); + + *MaskPtr -= Mask; + *Length += 1; + Mask <<= 1; + + if (Mask == 0) { + + MaskPtr += 1; + Mask = 1; + + if (MaskPtr > EndPtr) { + break; + } + } + } + + // + // Now reduce the count of pages we were supposed to write this time, + // possibly clearing this count. + // + + if (*Length < Mbcb->PagesToWrite) { + + Mbcb->PagesToWrite -= *Length; + + } else { + + Mbcb->PagesToWrite = 0; + } + + // + // Reduce the dirty page counts by the number of pages we just cleared. + // + + ASSERT(Mbcb->DirtyPages >= *Length); + + CcTotalDirtyPages -= *Length; + SharedCacheMap->DirtyPages -= *Length; + Mbcb->DirtyPages -= *Length; + + // + // Normally we need to reduce CcPagesYetToWrite appropriately. + // + + if (CcPagesYetToWrite > *Length) { + CcPagesYetToWrite -= *Length; + } else { + CcPagesYetToWrite = 0; + } + + // + // If we took out the last dirty page, then move the SharedCacheMap + // back to the clean list. + // + + if (SharedCacheMap->DirtyPages == 0) { + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcCleanSharedCacheMapList, + &SharedCacheMap->SharedCacheMapLinks ); + } + + // + // If the number of dirty pages for the Mcb went to zero, we can reset + // our hint fields now. + // + + if (Mbcb->DirtyPages == 0) { + + Mbcb->FirstDirtyPage = MAXULONG; + Mbcb->LastDirtyPage = 0; + Mbcb->ResumeWritePage = 0; + + // + // Otherwise we have to update the hint fields. + // + + } else { + + // + // Advance the first dirty page hint if we can. + // + + if (Mbcb->FirstDirtyPage == OriginalFirstDirtyPage) { + + Mbcb->FirstDirtyPage = FirstDirtyPage + *Length; + } + + // + // Set to resume the next scan at the next bit for + // the Lazy Writer. + // + + if (TargetLength == 0) { + + Mbcb->ResumeWritePage = FirstDirtyPage + *Length; + } + } + + // + // We can save a callback by letting our caller know when + // we have no more pages to write. + // + + if (IsListEmpty(&SharedCacheMap->BcbList)) { + SharedCacheMap->PagesToWrite = Mbcb->PagesToWrite; + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Now form all of our outputs. We calculated *Length as a page count, + // but our caller wants it in bytes. + // + + *Length <<= PAGE_SHIFT; + FileOffset->QuadPart = (LONGLONG)FirstDirtyPage << PAGE_SHIFT; + *FirstBcb = NULL; + + DebugTrace2(0, me, " LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " TRUE\n", 0 ); + + return TRUE; + } + + // + // We get here if there is no Mbcb or no dirty pages in it. Note that we + // wouldn't even be here if there were no dirty pages in this SharedCacheMap. + // + + // + // Now point to last Bcb in List, and loop until we hit one of the + // breaks below or the beginning of the list. + // + +Scan_Bcbs: + + // + // Use while TRUE to handle case where the current target range wraps + // (escape is at the bottom). + // + + while (TRUE) { + + Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks ); + + // + // If this is a large file, and we are to resume from a nonzero FileOffset, + // call CcFindBcb to get a quicker start. + // + + if ((SharedCacheMap->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && + !ARGUMENT_PRESENT(TargetOffset) && + (SharedCacheMap->BeyondLastFlush != 0)) { + + LARGE_INTEGER TempQ; + + TempQ.QuadPart = SharedCacheMap->BeyondLastFlush + PAGE_SIZE; + + // + // Position ourselves. If we did not find a Bcb for the BeyondLastFlush + // page, then a lower FileOffset was returned, so we want to move forward + // one. + // + + if (!CcFindBcb( SharedCacheMap, + (PLARGE_INTEGER)&SharedCacheMap->BeyondLastFlush, + &TempQ, + &Bcb )) { + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); + } + } + + while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) { + + // + // Skip over this item if it is a listhead. + // + + if (Bcb->NodeTypeCode != CACHE_NTC_BCB) { + + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); + continue; + } + + // + // If we are doing a specified range, then get out if we hit a + // higher Bcb. + // + + if (ARGUMENT_PRESENT(TargetOffset) && + ((TargetOffset->QuadPart + TargetLength) <= Bcb->FileOffset.QuadPart)) { + + break; + } + + // + // If we have not started a run, then see if this Bcb is a candidate + // to start one. + // + + if (*Length == 0) { + + // + // Else see if the Bcb is dirty, and is in our specified range, if + // there is one. + // + + if (!Bcb->Dirty || + (ARGUMENT_PRESENT(TargetOffset) && (TargetOffset->QuadPart >= Bcb->BeyondLastByte.QuadPart)) || + (!ARGUMENT_PRESENT(TargetOffset) && (Bcb->FileOffset.QuadPart < SharedCacheMap->BeyondLastFlush))) { + + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); + continue; + } + } + + // + // Else, if we have started a run, then if this guy cannot be + // appended to the run, then break. Note that we ignore the + // Bcb's modification time stamp here to simplify the test. + // + // If the Bcb is currently pinned, then there is no sense in causing + // contention, so we will skip over this guy as well. + // + + else { + if (!Bcb->Dirty || ( Bcb->FileOffset.QuadPart != ( FileOffset->QuadPart + (LONGLONG)*Length)) + || (*Length + Bcb->ByteLength > MAX_WRITE_BEHIND) + || (Bcb->PinCount != 0)) { + + break; + } + } + + // + // Increment PinCount to prevent Bcb from going away once the + // SpinLock is released, or we set it clean for the case where + // modified write is allowed. + // + + Bcb->PinCount += 1; + + // + // Release the SpinLock before waiting on the resource. + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) && + !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { + + // + // Now acquire the Bcb exclusive, so that we know that nobody + // has it pinned and thus no one can be modifying the described + // buffer. To acquire the first Bcb in a run, we can afford + // to wait, because we are not holding any resources. However + // if we already have a Bcb, then we better not wait, because + // someone could have this Bcb pinned, and then wait for the + // Bcb we already have exclusive. + // + // For streams for which we have not disabled modified page + // writing, we do not need to acquire this resource, and the + // foreground processing will not be acquiring the Bcb either. + // + + if (!ExAcquireResourceExclusive( &Bcb->Resource, + (BOOLEAN)(*Length == 0) )) { + + DebugTrace( 0, me, "Could not acquire 2nd Bcb\n", 0 ); + + // + // Release the Bcb count we took out above. We say + // ReadOnly = TRUE since we do not own the resource, + // and SetClean = FALSE because we just want to decement + // the count. + // + + CcUnpinFileData( Bcb, TRUE, UNPIN ); + + // + // When we leave the loop, we have to have the spin lock + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + break; + } + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // If someone has the file open WriteThrough, then the Bcb may no + // longer be dirty. If so, call CcUnpinFileData to decrement the + // PinCount we incremented and free the resource. + // + + if (!Bcb->Dirty) { + + // + // Release the spinlock so that we can call CcUnpinFileData + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + CcUnpinFileData( Bcb, FALSE, UNPIN ); + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // Now if we already have some data we can just break to return + // it, otherwise we have to restart the scan, since our Bcb + // may have gone away. + // + + if (*Length != 0) { + break; + } + else { + + Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks ); + continue; + } + } + + // + // If we are not in the disable modified write mode (normal user data) + // then we must set the buffer clean before doing the write, since we + // are unsynchronized with anyone producing dirty data. That way if we, + // for example, are writing data out while it is actively being changed, + // at least the changer will mark the buffer dirty afterwards and cause + // us to write it again later. + // + + } else { + + CcUnpinFileData( Bcb, TRUE, SET_CLEAN ); + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + + DebugTrace( 0, me, "Adding Bcb = %08lx to run\n", Bcb ); + + // + // Update all of our return values. Note that FirstBcb refers to the + // FirstBcb in terms of how the Bcb list is ordered. Since the Bcb list + // is ordered by descending file offsets, FirstBcb will actually return + // the Bcb with the highest FileOffset. + // + + if (*Length == 0) { + *FileOffset = Bcb->FileOffset; + } + *FirstBcb = Bcb; + *Length += Bcb->ByteLength; + + // + // If there is a log file flush callback for this stream, then we must + // remember the largest Lsn we are about to flush. + // + + if ((SharedCacheMap->FlushToLsnRoutine != NULL) && + (Bcb->NewestLsn.QuadPart > LsnToFlushTo.QuadPart)) { + + LsnToFlushTo = Bcb->NewestLsn; + } + + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); + } + + // + // If we found something, update our range last flush range and reduce + // PagesToWrite. + // + + if (*Length != 0) { + + // + // If this is the Lazy Writer, then update BeyondLastFlush and + // the PagesToWrite target. + // + + if (!ARGUMENT_PRESENT(TargetOffset)) { + + SharedCacheMap->BeyondLastFlush = FileOffset->QuadPart + *Length; + + if (SharedCacheMap->PagesToWrite > (*Length >> PAGE_SHIFT)) { + SharedCacheMap->PagesToWrite -= (*Length >> PAGE_SHIFT); + } else { + SharedCacheMap->PagesToWrite = 0; + } + } + + break; + + // + // Else, if we scanned the entire file, get out - nothing to write now. + // + + } else if ((SharedCacheMap->BeyondLastFlush == 0) || ARGUMENT_PRESENT(TargetOffset)) { + break; + } + + // + // Otherwise, we may have not found anything because there is nothing + // beyond the last flush. In that case it is time to wrap back to 0 + // and keep scanning. + // + + SharedCacheMap->BeyondLastFlush = 0; + } + + + + // + // Now release the spinlock file while we go off and do the I/O + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // If we need to flush to some Lsn, this is the time to do it now + // that we have found the largest Lsn and freed the spin lock. + // + + if (LsnToFlushTo.QuadPart != 0) { + + try { + + (*SharedCacheMap->FlushToLsnRoutine) ( SharedCacheMap->LogHandle, + LsnToFlushTo ); + } except( CcExceptionFilter( GetExceptionCode() )) { + + // + // If there was an error, it will be raised. We cannot + // write anything until we successfully flush the log + // file, so we will release everything here and just + // return with 0 bytes. + // + + LARGE_INTEGER LastOffset; + PBCB NextBcb; + + // + // Now loop to free up all of the Bcbs. Set the time + // stamps to 0, so that we are guaranteed to try to + // flush them again on the next sweep. + // + + do { + NextBcb = CONTAINING_RECORD( (*FirstBcb)->BcbLinks.Flink, BCB, BcbLinks ); + + // + // Skip over any listheads. + // + + if ((*FirstBcb)->NodeTypeCode == CACHE_NTC_BCB) { + + LastOffset = (*FirstBcb)->FileOffset; + + CcUnpinFileData( *FirstBcb, FALSE, UNPIN ); + } + + *FirstBcb = NextBcb; + } while (FileOffset->QuadPart != LastOffset.QuadPart); + + // + // Show we did not acquire anything. + // + + *Length = 0; + } + } + + // + // If we got anything, return TRUE. + // + + DebugTrace2(0, me, " LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " %02lx\n", *Length != 0 ); + + return ((BOOLEAN)(*Length != 0)); +} + + +// +// Internal Support Routine +// + +VOID +CcReleaseByteRangeFromWrite ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN PBCB FirstBcb, + IN BOOLEAN VerifyRequired + ) + +/*++ + +Routine Description: + + This routine is called by the Lazy Writer to free a range of bytes and + clear all dirty bits, for a byte range returned by CcAcquireByteRangeForWrite. + +Arguments: + + SharedCacheMap - As supplied to CcAcquireByteRangeForWrite + + FileOffset - As returned from CcAcquireByteRangeForWrite + + Length - As returned from CcAcquirebyteRangeForWrite + + FirstBcb - As returned from CcAcquireByteRangeForWrite + + VerifyRequired - supplied as TRUE if a verify required error was received. + In this case we must mark/leave the data dirty so that + we will try to write it again. + +Return Value: + + None + +--*/ + +{ + LARGE_INTEGER LastOffset; + PBCB NextBcb; + + DebugTrace(+1, me, "CcReleaseByteRangeFromWrite:\n", 0); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + + // + // If it is a mask Mbcb we are getting, then we only have to check + // for VerifyRequired. + // + + if (FirstBcb == NULL) { + + ASSERT(Length != 0); + + if (VerifyRequired) { + CcSetDirtyInMask( SharedCacheMap, FileOffset, Length ); + } + + DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0); + + return; + } + + // + // Now loop to free up all of the Bcbs. If modified writing is disabled + // for each Bcb, then we are to set it clean here, since we are synchronized + // with callers who set the data dirty. Otherwise we only have the Bcb pinned + // so it will not go away, and we only unpin it here. + // + + do { + NextBcb = CONTAINING_RECORD( FirstBcb->BcbLinks.Flink, BCB, BcbLinks ); + + // + // Skip over any listheads. + // + + if (FirstBcb->NodeTypeCode == CACHE_NTC_BCB) { + + LastOffset = FirstBcb->FileOffset; + + // + // If this is file system metadata (we disabled modified writing), + // then this is the time to mark the buffer clean, so long as we + // did not get verify required. + // + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + + CcUnpinFileData( FirstBcb, + BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND), + SET_CLEAN ); + } + + // + // If we got verify required, we have to mark the buffer dirty again + // so we will try again later. Note we have to make this call again + // to make sure the right thing happens with time stamps. + // + + if (VerifyRequired) { + CcSetDirtyPinnedData( FirstBcb, NULL ); + } + + // + // Finally remove a pin count left over from CcAcquireByteRangeForWrite. + // + + CcUnpinFileData( FirstBcb, TRUE, UNPIN ); + } + + FirstBcb = NextBcb; + } while (FileOffset->QuadPart != LastOffset.QuadPart); + + DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0); +} + + +// +// Internal Support Routine +// + +NTSTATUS +FASTCALL +CcWriteBehind ( + IN PSHARED_CACHE_MAP SharedCacheMap + ) + +/*++ + +Routine Description: + + This routine may be called with Wait = FALSE to see if write behind + is required, or with Wait = TRUE to perform write behind as required. + + The code is very similar to the the code that the Lazy Writer performs + for each SharedCacheMap. The main difference is in the call to + CcAcquireByteRangeForWrite. Write Behind does not care about time + stamps (passing ULONG to accept all time stamps), but it will never + dump the first (highest byte offset) buffer in the list if the last + byte of that buffer is not yet written. The Lazy Writer does exactly + the opposite, in the sense that it is totally time-driven, and will + even dump a partially modified buffer if it sits around long enough. + +Arguments: + + SharedCacheMap - Pointer to SharedCacheMap to be written + +Return Value: + + FALSE - if write behind is required, but the caller supplied + Wait = FALSE + + TRUE - if write behind is complete or not required + +--*/ + +{ + IO_STATUS_BLOCK IoStatus; + KIRQL OldIrql; + ULONG ActivePage; + ULONG PageIsDirty; + PMBCB Mbcb; + NTSTATUS Status; + ULONG FileExclusive = FALSE; + PVACB ActiveVacb = NULL; + + DebugTrace(+1, me, "CcWriteBehind\n", 0 ); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); + + // + // First we have to acquire the file for LazyWrite, to avoid + // deadlocking with writers to the file. We do this via the + // CallBack procedure specified to CcInitializeCacheMap. + // + + (*SharedCacheMap->Callbacks->AcquireForLazyWrite) + ( SharedCacheMap->LazyWriteContext, TRUE ); + + // + // See if there is a previous active page to clean up, but only + // do so now if it is the last dirty page or no users have the + // file open. We will free it below after dropping the spinlock. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + if ((SharedCacheMap->DirtyPages <= 1) || (SharedCacheMap->OpenCount == 0)) { + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // Increment open count so that our caller's views stay available + // for CcGetVacbMiss. We could be tying up all of the views, and + // still need to write file sizes. + // + + SharedCacheMap->OpenCount += 1; + + // + // If there is a mask bcb, then we need to establish a target for + // it to flush. + // + + if ((Mbcb = SharedCacheMap->Mbcb) != 0) { + + // + // Set a target of pages to write, assuming that any Active + // Vacb will increase the number. + // + + Mbcb->PagesToWrite = Mbcb->DirtyPages + ((ActiveVacb != NULL) ? 1 : 0); + + if (Mbcb->PagesToWrite > CcPagesYetToWrite) { + + Mbcb->PagesToWrite = CcPagesYetToWrite; + } + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + // + // Now free the active Vacb, if we found one. + // + + if (ActiveVacb != NULL) { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // Now perform the lazy writing for this file via a special call + // to CcFlushCache. He recognizes us by the &CcNoDelay input to + // FileOffset, which signifies a Lazy Write, but is subsequently + // ignored. + // + + CcFlushCache( SharedCacheMap->FileObject->SectionObjectPointer, + &CcNoDelay, + 1, + &IoStatus ); + + // + // No need for the Lazy Write resource now. + // + + (*SharedCacheMap->Callbacks->ReleaseFromLazyWrite) + ( SharedCacheMap->LazyWriteContext ); + + // + // Check if we need to put up a popup. + // + + if (!NT_SUCCESS(IoStatus.Status) && !RetryError(IoStatus.Status)) { + + // + // We lost writebehind data. Try to get the filename. If we can't, + // then just raise the error returned by the failing write + // + + POBJECT_NAME_INFORMATION FileNameInfo; + NTSTATUS QueryStatus; + ULONG whocares; + + FileNameInfo = ExAllocatePool(PagedPool,1024); + + if ( FileNameInfo ) { + QueryStatus = ObQueryNameString( SharedCacheMap->FileObject, + FileNameInfo, + 1024, + &whocares ); + + if ( !NT_SUCCESS(QueryStatus) ) { + ExFreePool(FileNameInfo); + FileNameInfo = NULL; + } + } + + if ( FileNameInfo ) { + IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&FileNameInfo->Name, NULL ); + ExFreePool(FileNameInfo); + } else { + if ( SharedCacheMap->FileObject->FileName.Length && + SharedCacheMap->FileObject->FileName.MaximumLength && + SharedCacheMap->FileObject->FileName.Buffer ) { + + IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&SharedCacheMap->FileObject->FileName, NULL ); + } + } + + // + // See if there is any deferred writes we can post. + // + + } else if (!IsListEmpty(&CcDeferredWrites)) { + CcPostDeferredWrites(); + } + + // + // Now acquire CcMasterSpinLock again to + // see if we need to call CcUninitialize before returning. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // If the the current ValidDataGoal is greater (or equal) than ValidDataLength, + // then we must see if we have advanced beyond the current ValidDataLength. + // + // If we have NEVER written anything out from this shared cache map, then + // there is no need to check anything associtated with valid data length + // here. We will come by here again when, and if, anybody actually + // modifies the file and we lazy write some data. + // + + Status = STATUS_SUCCESS; + if (FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED) && + (SharedCacheMap->ValidDataGoal.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart) && + (SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG) && + (SharedCacheMap->FileSize.QuadPart != 0)) { + + LARGE_INTEGER NewValidDataLength = {0,0}; + + // + // If the Bcb List is completely empty, then we must have written + // everything, and then new ValidDataLength is equal to ValidDataGoal. + // + + if (SharedCacheMap->DirtyPages == 0) { + + NewValidDataLength = SharedCacheMap->ValidDataGoal; + } + + // + // Else we will look at the last Bcb in the descending-order Bcb + // list, and see if it describes data beyond ValidDataGoal. + // + // (This test is logically too conservative. For example, the last Bcb + // may not even be dirty (in which case we should look at its + // predecessor), or we may have earlier written valid data to this + // byte range (which also means if we knew this we could look at + // the predessor). This simply means that the Lazy Writer may not + // successfully get ValidDataLength updated in a file being randomly + // accessed until the level of file access dies down, or at the latest + // until the file is closed. However, security will never be + // compromised.) + // + + else { + + PBCB LastBcb; + PMBCB Mbcb = SharedCacheMap->Mbcb; + + if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0)) { + + NewValidDataLength.QuadPart = (LONGLONG)Mbcb->FirstDirtyPage << PAGE_SHIFT; + } + + LastBcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, + BCB, + BcbLinks ); + + while (&LastBcb->BcbLinks != &SharedCacheMap->BcbList) { + + if ((LastBcb->NodeTypeCode == CACHE_NTC_BCB) && LastBcb->Dirty) { + break; + } + + LastBcb = CONTAINING_RECORD( LastBcb->BcbLinks.Flink, + BCB, + BcbLinks ); + } + + // + // Check the Base of the last entry. + // + + if ((&LastBcb->BcbLinks != &SharedCacheMap->BcbList) && + (LastBcb->FileOffset.QuadPart < NewValidDataLength.QuadPart )) { + + NewValidDataLength = LastBcb->FileOffset; + } + } + + // + // If New ValidDataLength has been written, then we have to + // call the file system back to update it. We must temporarily + // drop our global list while we do this, which is safe to do since + // we have not cleared WRITE_QUEUED. + // + // Note we keep calling any time we wrote the last page of the file, + // to solve the "famous" AFS Server problem. The file system will + // truncate our valid data call to whatever is currently valid. But + // then if he writes a little more, we do not want to stop calling + // back. + // + + if ( NewValidDataLength.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart ) { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Call file system to set new valid data. We have no + // one to tell if this doesn't work. + // + + Status = CcSetValidData( SharedCacheMap->FileObject, + &NewValidDataLength ); + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + if (NT_SUCCESS(Status)) { + SharedCacheMap->ValidDataLength = NewValidDataLength; +#ifdef TOMM + } else if ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)) { + DbgPrint("Unexpected status from CcSetValidData: %08lx, FileObject: %08lx\n", + Status, + SharedCacheMap->FileObject); + DbgBreakPoint(); +#endif TOMM + } + } + } + + // + // Show we are done. + // + + SharedCacheMap->OpenCount -= 1; + + // + // Make an approximate guess about whether we will call CcDeleteSharedCacheMap or not + // to truncate the file. If we fail to acquire here, then we will not delete below, + // and just catch it on a subsequent pass. + // + + if (FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED) && (SharedCacheMap->OpenCount == 0)) { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + FsRtlAcquireFileExclusive( SharedCacheMap->FileObject ); + FileExclusive = TRUE; + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + + // + // Otherwise see if we are to delete this SharedCacheMap. Note + // we go ahead and release the Resource first, because with + // OpenCount == 0 and an empty Bcb list, no one will be trying + // to access this SharedCacheMap but us. Also, by releasing first + // we avoid a deadlock with the file system when the FileObject is + // dereferenced. Note that CcDeleteSharedCacheMap requires that + // the CcMasterSpinLock already be acquired, and it + // releases it. We have to clear the indirect pointer in this + // case, because no one else will do it. + // + // Also do not delete the SharedCacheMap if we got an error on + // the ValidDataLength callback. If we get a resource allocation + // failure or a retryable error (due to log file full?), we have + // no one to tell, so we must just loop back and try again. Of + // course all I/O errors are just too bad. + // + + if ((SharedCacheMap->OpenCount == 0) + + && + + ((SharedCacheMap->DirtyPages == 0) || ((SharedCacheMap->FileSize.QuadPart == 0) && + !FlagOn(SharedCacheMap->Flags, PIN_ACCESS))) + + && + + (FileExclusive || !FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED)) + + && + + (NT_SUCCESS(Status) || ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)))) { + + CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FileExclusive ); + } + + // + // In the normal case, we just release the resource on the way out. + // + + else { + + // + // Now release the file if we have it. + // + + if (FileExclusive) { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + FsRtlReleaseFile( SharedCacheMap->FileObject ); + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + + ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcWriteBehind->VOID\n", 0 ); + + return IoStatus.Status; +} + + +VOID +CcFlushCache ( + IN PSECTION_OBJECT_POINTERS SectionObjectPointer, + IN PLARGE_INTEGER FileOffset OPTIONAL, + IN ULONG Length, + OUT PIO_STATUS_BLOCK IoStatus OPTIONAL + ) + +/*++ + +Routine Description: + + This routine may be called to flush dirty data from the cache to the + cached file on disk. Any byte range within the file may be flushed, + or the entire file may be flushed by omitting the FileOffset parameter. + + This routine does not take a Wait parameter; the caller should assume + that it will always block. + +Arguments: + + SectionObjectPointer - A pointer to the Section Object Pointers + structure in the nonpaged Fcb. + + + FileOffset - If this parameter is supplied (not NULL), then only the + byte range specified by FileOffset and Length are flushed. + If &CcNoDelay is specified, then this signifies the call + from the Lazy Writer, and the lazy write scan should resume + as normal from the last spot where it left off in the file. + + Length - Defines the length of the byte range to flush, starting at + FileOffset. This parameter is ignored if FileOffset is + specified as NULL. + + IoStatus - The I/O status resulting from the flush operation. + +Return Value: + + None. + +--*/ + +{ + LARGE_INTEGER NextFileOffset, TargetOffset; + ULONG NextLength; + PBCB FirstBcb; + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + IO_STATUS_BLOCK TrashStatus; + PVOID TempVa; + ULONG RemainingLength, TempLength; + NTSTATUS PopupStatus; + BOOLEAN HotSpot; + ULONG BytesWritten = 0; + BOOLEAN PopupRequired = FALSE; + BOOLEAN VerifyRequired = FALSE; + BOOLEAN IsLazyWriter = FALSE; + BOOLEAN FreeActiveVacb = FALSE; + PVACB ActiveVacb = NULL; + NTSTATUS Status = STATUS_SUCCESS; + + DebugTrace(+1, me, "CcFlushCache:\n", 0 ); + DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", + ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart + : 0, + ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart + : 0 ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + // + // If IoStatus passed a Null pointer, set up to through status away. + // + + if (!ARGUMENT_PRESENT(IoStatus)) { + IoStatus = &TrashStatus; + } + IoStatus->Status = STATUS_SUCCESS; + IoStatus->Information = 0; + + // + // See if this is the Lazy Writer. Since he wants to use this common + // routine, which is also a public routine callable by file systems, + // the Lazy Writer shows his call by specifying CcNoDelay as the file offset! + // + // Also, in case we do not write anything because we see only HotSpot(s), + // initialize the Status to indicate a retryable error, so CcWorkerThread + // knows we did not make any progress. Of course any actual flush will + // overwrite this code. + // + + if (FileOffset == &CcNoDelay) { + IoStatus->Status = STATUS_VERIFY_REQUIRED; + IsLazyWriter = TRUE; + FileOffset = NULL; + } + + // + // If there is nothing to do, return here. + // + + if (ARGUMENT_PRESENT(FileOffset) && (Length == 0)) { + + DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 ); + return; + } + + // + // See if the file is cached. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap = SectionObjectPointer->SharedCacheMap; + + if (SharedCacheMap != NULL) { + + // + // Increment the open count to keep it from going away. + // + + SharedCacheMap->OpenCount += 1; + + if ((SharedCacheMap->NeedToZero != NULL) || (SharedCacheMap->ActiveVacb != NULL)) { + + ULONG FirstPage = 0; + ULONG LastPage = MAXULONG; + + if (ARGUMENT_PRESENT(FileOffset)) { + + FirstPage = (ULONG)(FileOffset->QuadPart >> PAGE_SHIFT); + LastPage = (ULONG)((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT); + } + + // + // Make sure we do not flush the active page without zeroing any + // uninitialized data. Also, it is very important to free the active + // page if it is the one to be flushed, so that we get the dirty + // bit out to the Pfn. + // + + if (((((LONGLONG)LastPage + 1) << PAGE_SHIFT) > SharedCacheMap->ValidDataGoal.QuadPart) || + + ((SharedCacheMap->NeedToZero != NULL) && + (FirstPage <= SharedCacheMap->NeedToZeroPage) && + (LastPage >= SharedCacheMap->NeedToZeroPage)) || + + ((SharedCacheMap->ActiveVacb != NULL) && + (FirstPage <= SharedCacheMap->ActivePage) && + (LastPage >= SharedCacheMap->ActivePage))) { + + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, RemainingLength, TempLength ); + FreeActiveVacb = TRUE; + } + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + if (FreeActiveVacb) { + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, RemainingLength, TempLength ); + } + + // + // Scan for dirty pages if there is a shared cache map. + // + + if (SharedCacheMap != NULL) { + + // + // If FileOffset was not specified then set to flush entire region + // and set valid data length to the goal so that we will not get + // any more call backs. + // + + if (!IsLazyWriter && !ARGUMENT_PRESENT(FileOffset)) { + + SharedCacheMap->ValidDataLength = SharedCacheMap->ValidDataGoal; + } + + // + // If this is an explicit flush, initialize our offset to scan for. + // + + if (ARGUMENT_PRESENT(FileOffset)) { + TargetOffset = *FileOffset; + } + + // + // Assume we want to pass the explicit flush flag in Length. + // But overwrite it if a length really was specified. On + // subsequent loops, NextLength will have some nonzero value. + // + + NextLength = 1; + if (Length != 0) { + NextLength = Length; + } + + // + // Loop as long as we find buffers to flush for this + // SharedCacheMap, and we are not trying to delete the guy. + // + + while (((SharedCacheMap->PagesToWrite != 0) || !IsLazyWriter) + + && + ((SharedCacheMap->FileSize.QuadPart != 0) || + FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) + + && + + !VerifyRequired + + && + + CcAcquireByteRangeForWrite ( SharedCacheMap, + IsLazyWriter ? NULL : (ARGUMENT_PRESENT(FileOffset) ? + &TargetOffset : NULL), + IsLazyWriter ? 0: NextLength, + &NextFileOffset, + &NextLength, + &FirstBcb )) { + + // + // Assume this range is not a hot spot. + // + + HotSpot = FALSE; + + // + // We defer calling Mm to set address range modified until here, to take + // overhead out of the main line path, and to reduce the number of TBIS + // on a multiprocessor. + // + + RemainingLength = NextLength; + + do { + + // + // See if the next file offset is mapped. (If not, the dirty bit + // was propagated on the unmap.) + // + + if ((TempVa = CcGetVirtualAddressIfMapped( SharedCacheMap, + NextFileOffset.QuadPart + NextLength - RemainingLength, + &ActiveVacb, + &TempLength)) != NULL) { + + // + // Reduce TempLength to RemainingLength if necessary, and + // call MM. + // + + if (TempLength > RemainingLength) { + TempLength = RemainingLength; + } + + // + // Clear the Dirty bit (if set) in the PTE and set the + // Pfn modified. Assume if the Pte was dirty, that this may + // be a hot spot. Do not do hot spots for metadata, and unless + // they are within ValidDataLength as reported to the file system + // via CcSetValidData. + // + + HotSpot = (BOOLEAN)((MmSetAddressRangeModified(TempVa, TempLength) || HotSpot) && + ((NextFileOffset.QuadPart + NextLength) < + (SharedCacheMap->ValidDataLength.QuadPart)) && + ((SharedCacheMap->LazyWritePassCount & 0xF) != 0) && IsLazyWriter) && + !FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED); + + CcFreeVirtualAddress( ActiveVacb ); + + } else { + + // + // Reduce TempLength to RemainingLength if necessary. + // + + if (TempLength > RemainingLength) { + TempLength = RemainingLength; + } + } + + // + // Reduce RemainingLength by what we processed. + // + + RemainingLength -= TempLength; + + // + // Loop until done. + // + + } while (RemainingLength != 0); + + CcLazyWriteHotSpots += HotSpot; + + // + // Now flush, now flush if we do not think it is a hot spot. + // + + if (!HotSpot) { + + MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, + &NextFileOffset, + NextLength, + IoStatus, + !IsLazyWriter ); + + if (NT_SUCCESS(IoStatus->Status)) { + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED); + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + // + // Increment performance counters + // + + if (IsLazyWriter) { + + CcLazyWriteIos += 1; + CcLazyWritePages += (NextLength + PAGE_SIZE - 1) >> PAGE_SHIFT; + } + + } else { + + LARGE_INTEGER Offset = NextFileOffset; + ULONG RetryLength = NextLength; + + DebugTrace2( 0, 0, "I/O Error on Cache Flush: %08lx, %08lx\n", + IoStatus->Status, IoStatus->Information ); + + if (RetryError(IoStatus->Status)) { + + VerifyRequired = TRUE; + + // + // Loop to write each page individually, starting with one + // more try on the page that got the error, in case that page + // or any page beyond it can be successfully written + // individually. Note that Offset and RetryLength are + // guaranteed to be in integral pages, but the Information + // field from the failed request is not. + // + // We ignore errors now, and give it one last shot, before + // setting the pages clean (see below). + // + + } else { + + do { + + DebugTrace2( 0, 0, "Trying page at offset %08lx, %08lx\n", + Offset.LowPart, Offset.HighPart ); + + MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer, + &Offset, + PAGE_SIZE, + IoStatus, + !IsLazyWriter ); + + DebugTrace2( 0, 0, "I/O status = %08lx, %08lx\n", + IoStatus->Status, IoStatus->Information ); + + if (NT_SUCCESS(IoStatus->Status)) { + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED); + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + } + + if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) { + + PopupRequired = TRUE; + PopupStatus = IoStatus->Status; + } + + VerifyRequired = VerifyRequired || RetryError(IoStatus->Status); + + Offset.QuadPart = Offset.QuadPart + (LONGLONG)PAGE_SIZE; + RetryLength -= PAGE_SIZE; + + } while(RetryLength > 0); + } + } + } + + // + // Now release the Bcb resources and set them clean. Note we do not check + // here for errors, and just returned in the I/O status. Errors on writes + // are rare to begin with. Nonetheless, our strategy is to rely on + // one or more of the following (depending on the file system) to prevent + // errors from getting to us. + // + // - Retries and/or other forms of error recovery in the disk driver + // - Mirroring driver + // - Hot fixing in the noncached path of the file system + // + // In the unexpected case that a write error does get through, we + // *currently* just set the Bcbs clean anyway, rather than let + // Bcbs and pages accumulate which cannot be written. Note we did + // a popup above to at least notify the guy. + // + // Set the pages dirty again if we either saw a HotSpot or got + // verify required. + // + + CcReleaseByteRangeFromWrite ( SharedCacheMap, + &NextFileOffset, + NextLength, + FirstBcb, + (BOOLEAN)(HotSpot || VerifyRequired) ); + + // + // See if there is any deferred writes we should post. + // + + BytesWritten += NextLength; + if ((BytesWritten >= 0x40000) && !IsListEmpty(&CcDeferredWrites)) { + CcPostDeferredWrites(); + BytesWritten = 0; + } + + // + // Now for explicit flushes, we should advance our range. + // + + if (ARGUMENT_PRESENT(FileOffset)) { + + NextFileOffset.QuadPart += NextLength; + + // + // Done yet? + // + + if ((FileOffset->QuadPart + Length) <= NextFileOffset.QuadPart) { + break; + } + + // + // Calculate new target range + // + + NextLength = (ULONG)((FileOffset->QuadPart + Length) - NextFileOffset.QuadPart); + TargetOffset = NextFileOffset; + } + } + } + + // + // If there is a user-mapped file, then we perform the "service" of + // flushing even data not written via the file system. To do this + // we simply reissue the original flush, sigh. + // + + if ((SharedCacheMap == NULL) + + || + + FlagOn(((PFSRTL_COMMON_FCB_HEADER)(SharedCacheMap->FileObject->FsContext))->Flags, + FSRTL_FLAG_USER_MAPPED_FILE) && !IsLazyWriter) { + + // + // Call MM to flush the section through our view. + // + + DebugTrace( 0, mm, "MmFlushSection:\n", 0 ); + DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", + ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart + : 0, + ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart + : 0 ); + DebugTrace( 0, mm, " RegionSize = %08lx\n", Length ); + + try { + + Status = MmFlushSection( SectionObjectPointer, + FileOffset, + Length, + IoStatus, + TRUE ); + + } except( CcExceptionFilter( IoStatus->Status = GetExceptionCode() )) { + + KdPrint(("CACHE MANAGER: MmFlushSection raised %08lx\n", IoStatus->Status)); + } + + DebugTrace2(0, mm, " Status, IoStatus->Information ); + } + + // + // Now we can get rid of the open count, and clean up as required. + // + + if (SharedCacheMap != NULL) { + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + // + // Make sure and return the first error to our caller. In the + // case of the Lazy Writer, a popup will be issued. + // + + if (PopupRequired) { + IoStatus->Status = PopupStatus; + } + + // + // Let the Lazy writer know if we did anything, so he can + + DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 ); + + return; +} + + +VOID +CcRepinBcb ( + IN PVOID Bcb + ) + +/*++ + +Routine Description: + + This routine may be called by a file system to pin a Bcb an additional + time in order to reserve it for Write Through or error recovery. + Typically the file system would do this the first time that it sets a + pinned buffer dirty while processing a WriteThrough request, or any + time that it determines that a buffer will be required for WriteThrough. + + The call to this routine must be followed by a call to CcUnpinRepinnedBcb. + CcUnpinRepinnedBcb should normally be called during request completion + after all other resources have been released. CcUnpinRepinnedBcb + synchronously writes the buffer (for WriteThrough requests) and performs + the matching unpin for this call. + +Arguments: + + Bcb - Supplies a pointer to a previously pinned Bcb + +Return Value: + + None. + +--*/ + +{ + KIRQL OldIrql; + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + ((PBCB)Bcb)->PinCount += 1; + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); +} + + +VOID +CcUnpinRepinnedBcb ( + IN PVOID Bcb, + IN BOOLEAN WriteThrough, + OUT PIO_STATUS_BLOCK IoStatus + ) + +/*++ + +Routine Description: + + This routine may be called to Write a previously pinned buffer + through to the file. It must have been preceded by a call to + CcRepinBcb. As this routine must acquire the Bcb + resource exclusive, the caller must be extremely careful to avoid + deadlocks. Ideally the caller owns no resources at all when it + calls this routine, or else the caller should guarantee that it + has nothing else pinned in this same file. (The latter rule is + the one used to avoid deadlocks in calls from CcCopyWrite and + CcMdlWrite.) + +Arguments: + + Bcb - Pointer to a Bcb which was previously specified in a call + to CcRepinBcb. + + WriteThrough - TRUE if the Bcb should be written through. + + IoStatus - Returns the I/O status for the operation. + +Return Value: + + None. + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap = ((PBCB)Bcb)->SharedCacheMap; + + DebugTrace(+1, me, "CcUnpinRepinnedBcb\n", 0 ); + DebugTrace( 0, me, " Bcb = %08lx\n", Bcb ); + DebugTrace( 0, me, " WriteThrough = %02lx\n", WriteThrough ); + + // + // Set status to success for non write through case. + // + + IoStatus->Status = STATUS_SUCCESS; + + if (WriteThrough) { + + // + // Acquire Bcb exclusive to eliminate possible modifiers of the buffer, + // since we are about to write its buffer. + // + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + ExAcquireResourceExclusive( &((PBCB)Bcb)->Resource, TRUE ); + } + + // + // Now, there is a chance that the LazyWriter has already written + // it, since the resource was free. We will only write it if it + // is still dirty. + // + + if (((PBCB)Bcb)->Dirty) { + + // + // First we make sure that the dirty bit in the PFN database is set. + // + + ASSERT( ((PBCB)Bcb)->BaseAddress != NULL ); + MmSetAddressRangeModified( ((PBCB)Bcb)->BaseAddress, + ((PBCB)Bcb)->ByteLength ); + + // + // Now release the Bcb resource and set it clean. Note we do not check + // here for errors, and just return the I/O status. Errors on writes + // are rare to begin with. Nonetheless, our strategy is to rely on + // one or more of the following (depending on the file system) to prevent + // errors from getting to us. + // + // - Retries and/or other forms of error recovery in the disk driver + // - Mirroring driver + // - Hot fixing in the noncached path of the file system + // + // In the unexpected case that a write error does get through, we + // report it to our caller, but go ahead and set the Bcb clean. There + // seems to be no point in letting Bcbs (and pages in physical memory) + // accumulate which can never go away because we get an unrecoverable I/O + // error. + // + + // + // We specify TRUE here for ReadOnly so that we will keep the + // resource during the flush. + // + + CcUnpinFileData( (PBCB)Bcb, TRUE, SET_CLEAN ); + + // + // Write it out. + // + + MmFlushSection( ((PBCB)Bcb)->SharedCacheMap->FileObject->SectionObjectPointer, + &((PBCB)Bcb)->FileOffset, + ((PBCB)Bcb)->ByteLength, + IoStatus, + TRUE ); + + // + // If we got verify required, we have to mark the buffer dirty again + // so we will try again later. + // + + if (RetryError(IoStatus->Status)) { + CcSetDirtyPinnedData( (PBCB)Bcb, NULL ); + } + + // + // Now remove the final pin count now that we have set it clean. + // + + CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN ); + + // + // See if there is any deferred writes we can post. + // + + if (!IsListEmpty(&CcDeferredWrites)) { + CcPostDeferredWrites(); + } + } + else { + + // + // Lazy Writer got there first, just free the resource and unpin. + // + + CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN ); + + } + + DebugTrace2(0, me, " Status, + IoStatus->Information ); + } + + // + // Non-WriteThrough case + // + + else { + + CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN ); + + // + // Set status to success for non write through case. + // + + IoStatus->Status = STATUS_SUCCESS; + } + + DebugTrace(-1, me, "CcUnpinRepinnedBcb -> VOID\n", 0 ); +} + + +// +// Internal Support Routine +// + +BOOLEAN +CcFindBcb ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN OUT PLARGE_INTEGER BeyondLastByte, + OUT PBCB *Bcb + ) + +/*++ + +Routine Description: + + This routine is called to find a Bcb describing the specified byte range + of a file. It returns TRUE if it could at least find a Bcb which describes + the beginning of the specified byte range, or else FALSE if the first + part of the byte range is not present. In the latter case, the requested + byte range (TrialLength) is truncated if there is currently a Bcb which + describes bytes beyond the beginning of the byte range. + + The caller may see if the entire byte range is being returned by examining + the Bcb, and the caller (or caller's caller) may then make subsequent + calls if the data is not all returned. + + The BcbList SpinLock must be currently acquired. + +Arguments: + + SharedCacheMap - Supplies a pointer to the SharedCacheMap for the file + in which the byte range is desired. + + FileOffset - Supplies the file offset for the beginning of the desired + byte range. + + BeyondLastByte - Supplies the file offset of the ending of the desired + byte range + 1. Note that this offset will be truncated + on return if the Bcb was not found, but bytes beyond the + beginning of the Bcb are contained in another Bcb. + + Bcb - returns a Bcb describing the beginning of the byte range if also + returning TRUE, or else the point in the Bcb list to insert after. + +Return Value: + + FALSE - if no Bcb describes the beginning of the desired byte range + + TRUE - if a Bcb is being returned describing at least an initial + part of the byte range. + +--*/ + +{ + PLIST_ENTRY BcbList; + PBCB Bcbt; + BOOLEAN Found = FALSE; + + DebugTrace(+1, me, "CcFindBcb:\n", 0 ); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace2(0, me, " TrialLength = %08lx, %08lx\n", TrialLength->LowPart, + TrialLength->HighPart ); + + // + // We want to terminate scans by testing the NodeTypeCode field from the + // BcbLinks, so we want to see the SharedCacheMap signature from the same + // offset. + // + + ASSERT(FIELD_OFFSET(SHARED_CACHE_MAP, BcbList) == FIELD_OFFSET(BCB, BcbLinks)); + + // + // Similarly, when we hit one of the BcbListHeads in the array, small negative + // offsets are all structure pointers, so we are counting on the Bcb signature + // to have some non-Ulong address bits set. + // + + ASSERT((CACHE_NTC_BCB & 3) != 0); + + // + // Get address of Bcb listhead that is *after* the Bcb we are looking for, + // for backwards scan. + // + + BcbList = &SharedCacheMap->BcbList; + if ((FileOffset->QuadPart + SIZE_PER_BCB_LIST) < SharedCacheMap->SectionSize.QuadPart) { + BcbList = GetBcbListHead( SharedCacheMap, FileOffset->QuadPart + SIZE_PER_BCB_LIST ); + } + + // + // Search for an entry that overlaps the specified range, or until we hit + // a listhead. + // + + Bcbt = CONTAINING_RECORD(BcbList->Flink, BCB, BcbLinks); + + // + // First see if we really have to do Large arithmetic or not, and + // then use either a 32-bit loop or a 64-bit loop to search for + // the Bcb. + // + + if (FileOffset->HighPart == 0) { + + // + // 32-bit - loop until we get back to a listhead. + // + + while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) { + + // + // Since the Bcb list is in descending order, we first check + // if we are completely beyond the current entry, and if so + // get out. + // + + if (FileOffset->LowPart >= Bcbt->BeyondLastByte.LowPart) { + break; + } + + // + // Next check if the first byte we are looking for is + // contained in the current Bcb. If so, we either have + // a partial hit and must truncate to the exact amount + // we have found, or we may have a complete hit. In + // either case we break with Found == TRUE. + // + + if (FileOffset->LowPart >= Bcbt->FileOffset.LowPart) { + Found = TRUE; + break; + } + + // + // Now we know we must loop back and keep looking, but we + // still must check for the case where the tail end of the + // bytes we are looking for are described by the current + // Bcb. If so we must truncate what we are looking for, + // because this routine is only supposed to return bytes + // from the start of the desired range. + // + + if (BeyondLastByte->LowPart >= Bcbt->FileOffset.LowPart) { + BeyondLastByte->LowPart = Bcbt->FileOffset.LowPart; + } + + // + // Advance to next entry in list (which is possibly back to + // the listhead) and loop back. + // + + Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink, + BCB, + BcbLinks ); + + } + + } else { + + // + // 64-bit - Loop until we get back to a listhead. + // + + while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) { + + // + // Since the Bcb list is in descending order, we first check + // if we are completely beyond the current entry, and if so + // get out. + // + + if (FileOffset->QuadPart >= Bcbt->BeyondLastByte.QuadPart) { + break; + } + + // + // Next check if the first byte we are looking for is + // contained in the current Bcb. If so, we either have + // a partial hit and must truncate to the exact amount + // we have found, or we may have a complete hit. In + // either case we break with Found == TRUE. + // + + if (FileOffset->QuadPart >= Bcbt->FileOffset.QuadPart) { + Found = TRUE; + break; + } + + // + // Now we know we must loop back and keep looking, but we + // still must check for the case where the tail end of the + // bytes we are looking for are described by the current + // Bcb. If so we must truncate what we are looking for, + // because this routine is only supposed to return bytes + // from the start of the desired range. + // + + if (BeyondLastByte->QuadPart >= Bcbt->FileOffset.QuadPart) { + BeyondLastByte->QuadPart = Bcbt->FileOffset.QuadPart; + } + + // + // Advance to next entry in list (which is possibly back to + // the listhead) and loop back. + // + + Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink, + BCB, + BcbLinks ); + + } + } + + *Bcb = Bcbt; + + DebugTrace2(0, me, " LowPart, + TrialLength->HighPart ); + DebugTrace( 0, me, " %02lx\n", Found ); + + return Found; +} + + +// +// Internal Support Routine +// + +PBCB +CcAllocateInitializeBcb ( + IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL, + IN OUT PBCB AfterBcb, + IN PLARGE_INTEGER FileOffset, + IN PLARGE_INTEGER TrialLength + ) + +/*++ + +Routine Description: + + This routine allocates and initializes a Bcb to describe the specified + byte range, and inserts it into the Bcb List of the specified Shared + Cache Map. The Bcb List spin lock must currently be acquired. + + CcMasterSpinLock must be acquired on entry. + +Arguments: + + SharedCacheMap - Supplies the SharedCacheMap for the new Bcb. + + AfterBcb - Supplies where in the descending-order BcbList the new Bcb + should be inserted: either the ListHead (masquerading as + a Bcb) or a Bcb. + + FileOffset - Supplies File Offset for the desired data. + + TrialLength - Supplies length of desired data. + +Return Value: + + Address of the allocated and initialized Bcb + +--*/ + +{ + PBCB Bcb; + CSHORT NodeIsInZone; + ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7; + + // + // Loop until we have a new Work Queue Entry + // + + while (TRUE) { + + PVOID Segment; + ULONG SegmentSize; + + Bcb = ExAllocateFromZone( &LazyWriter.BcbZone ); + + if (Bcb != NULL) { + NodeIsInZone = 1; + break; + } + + // + // Allocation failure - on large systems, extend zone + // + + if ( MmQuerySystemSize() == MmLargeSystem ) { + + SegmentSize = sizeof(ZONE_SEGMENT_HEADER) + RoundedBcbSize * 32; + + if ((Segment = ExAllocatePool( NonPagedPool, SegmentSize)) == NULL) { + + return NULL; + } + + if (!NT_SUCCESS(ExExtendZone( &LazyWriter.BcbZone, Segment, SegmentSize ))) { + CcBugCheck( 0, 0, 0 ); + } + } else { + if ((Bcb = ExAllocatePool( NonPagedPool, sizeof(BCB))) == NULL) { + return NULL; + } + NodeIsInZone = 0; + break; + } + } + + // + // Initialize the newly allocated Bcb. First zero it, then fill in + // nonzero fields. + // + + RtlZeroMemory( Bcb, RoundedBcbSize ); + + Bcb->NodeIsInZone = NodeIsInZone; + + // + // For Mbcb's, SharedCacheMap is NULL, and the rest of this initialization + // is not desired. + // + + if (SharedCacheMap != NULL) { + + Bcb->NodeTypeCode = CACHE_NTC_BCB; + Bcb->FileOffset = *FileOffset; + Bcb->ByteLength = TrialLength->LowPart; + Bcb->BeyondLastByte.QuadPart = FileOffset->QuadPart + TrialLength->QuadPart; + Bcb->PinCount += 1; + ExInitializeResource( &Bcb->Resource ); + Bcb->SharedCacheMap = SharedCacheMap; + + // + // Now insert the Bcb in the Bcb List + // + + InsertTailList( &AfterBcb->BcbLinks, &Bcb->BcbLinks ); + + // + // If this resource was no write behind, let Ex know that the + // resource will never be acquired exclusive. Also disable + // boost (I know this is useless, but KenR said I had to do it). + // + + if (SharedCacheMap && + FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { +#if DBG + SetFlag(Bcb->Resource.Flag, ResourceNeverExclusive); +#endif + ExDisableResourceBoost( &Bcb->Resource ); + } + + + } + + return Bcb; +} + + +// +// Internal support routine +// + +VOID +FASTCALL +CcDeallocateBcb ( + IN PBCB Bcb + ) + +/*++ + +Routine Description: + + This routine deallocates a Bcb to the BcbZone. It must + already be removed from the BcbList. + + CcMasterSpinLock must be acquired on entry. + +Arguments: + + Bcb - the Bcb to deallocate + +Return Value: + + None + +--*/ + +{ + // + // Deallocate Resource structures + // + + if (Bcb->NodeTypeCode == CACHE_NTC_BCB) { + + ExDeleteResource( &Bcb->Resource ); + } + + if ( Bcb->NodeIsInZone ) { + + // + // Synchronize access to the BcbZone + // + + ExFreeToZone( &LazyWriter.BcbZone, + Bcb ); + } else { + ExFreePool(Bcb); + } + return; +} + + +// +// Internal Support Routine +// + +BOOLEAN +CcMapAndRead( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN ULONG ZeroFlags, + IN BOOLEAN Wait, + OUT PVACB *Vacb, + OUT PVOID *BaseAddress + ) + +/*++ + +Routine Description: + + This routine may be called to insure that the specified data is mapped, + read into memory and locked. If TRUE is returned, then the + correct I/O status for the transfer is also returned, along with + a system-space address for the data. + +Arguments: + + SharedCacheMap - Supplies the address of the SharedCacheMap for the + data. + + FileOffset - Supplies the file offset of the desired data. + + Length - Supplies the total amount of data desired. + + ZeroFlags - Defines which pages may be zeroed if not resident. + + Wait - Supplies FALSE if the caller is not willing to block for the + data, or TRUE if the caller is willing to block. + + Vacb - Returns the address of the Vacb which is mapping the enclosing + virtual address range. + + BaseAddress - Returns the system base address at which the data may + be accessed. + +Return Value: + + FALSE - if the caller supplied Wait = FALSE and the data could not + be returned without blocking. + + TRUE - if the data is being returned. + + Note: this routine may raise an exception due to a map or read failure, + however, this can only happen if Wait was specified as TRUE, since + mapping and reading will not be performed if the caller cannot wait. + +--*/ + +{ + ULONG ReceivedLength; + ULONG ZeroCase; + ULONG SavedState; + BOOLEAN Result = FALSE; + PETHREAD Thread = PsGetCurrentThread(); + + DebugTrace(+1, me, "CcMapAndRead:\n", 0 ); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + *BaseAddress = NULL; + *Vacb = NULL; + + *BaseAddress = CcGetVirtualAddress( SharedCacheMap, + *FileOffset, + Vacb, + &ReceivedLength ); + + ASSERT( ReceivedLength >= Length ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + + // + // try around everything for cleanup. + // + + try { + + PVOID CacheBuffer; + ULONG PagesToGo; + + // + // If we got more than we need, make sure to only use + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + // + // Now loop to touch all of the pages, calling MM to insure + // that if we fault, we take in exactly the number of pages + // we need. + // + + CacheBuffer = *BaseAddress; + PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, + ReceivedLength ); + + // + // Loop to touch or zero the pages. + // + + ZeroCase = ZERO_FIRST_PAGE; + + while (PagesToGo) { + + // + // If we cannot zero this page, or Mm failed to return + // a zeroed page, then just fault it in. + // + + MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) ); + + if (!FlagOn(ZeroFlags, ZeroCase) || + !MmCheckCachedPageState(CacheBuffer, TRUE)) { + + // + // If we get here, it is almost certainly due to the fact + // that we can not take a zero page. MmCheckCachedPageState + // will so rarely return FALSE, that we will not worry + // about it. We will only check if the page is there if + // Wait is FALSE, so that we can do the right thing. + // + + if (!MmCheckCachedPageState(CacheBuffer, FALSE) && !Wait) { + try_return( Result = FALSE ); + } + } + + CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; + PagesToGo -= 1; + + if (PagesToGo == 1) { + ZeroCase = ZERO_LAST_PAGE; + } else { + ZeroCase = ZERO_MIDDLE_PAGES; + } + } + + try_return( Result = TRUE ); + + try_exit: NOTHING; + } + + // + // Cleanup on the way out. + // + + finally { + + MmResetPageFaultReadAhead(Thread, SavedState); + + // + // If not successful, cleanup on the way out. Most of the errors + // can only occur as the result of an abnormal termination after + // successfully checking and locking the pages. + // + + if (Result == FALSE) { + + CcFreeVirtualAddress( *Vacb ); + *Vacb = NULL; + *BaseAddress = NULL; + } + } + + DebugTrace( 0, me, " %02lx\n", Result ); + + return Result; +} + + +// +// Internal Support Routine +// + +VOID +CcFreeActiveVacb ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PVACB ActiveVacb OPTIONAL, + IN ULONG ActivePage, + IN ULONG PageIsDirty + ) + +/*++ + +Routine Description: + + This routine may be called to zero the end of a locked page or + free the ActiveVacb for a Shared Cache Map, if there is one. + Note that some callers are not synchronized with foreground + activity, and may therefore not have an ActiveVacb. Examples + of unsynchronized callers are CcZeroEndOfLastPage (which is + called by MM) and any flushing done by CcWriteBehind. + +Arguments: + + SharedCacheMap - SharedCacheMap to examine for page to be zeroed. + + ActiveVacb - Vacb to free + + ActivePage - Page that was used + + PageIsDirty - ACTIVE_PAGE_IS_DIRTY if the active page is dirty + +Return Value: + + None + +--*/ + +{ + LARGE_INTEGER ActiveOffset; + PVOID ActiveAddress; + ULONG BytesLeftInPage; + KIRQL OldIrql; + + // + // If the page was locked, then unlock it. + // + + if (SharedCacheMap->NeedToZero != NULL) { + + // + // Zero the rest of the page under spinlock control, + // and then clear the address field. This field makes + // zero->nonzero transitions only when the file is exclusive, + // but it can make nonzero->zero transitions any time the + // spinlock is not held. + // + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + // + // The address could already be gone. + // + + ActiveAddress = SharedCacheMap->NeedToZero; + if (ActiveAddress != NULL) { + + BytesLeftInPage = PAGE_SIZE - ((((ULONG)ActiveAddress - 1) & (PAGE_SIZE - 1)) + 1); + RtlZeroBytes( ActiveAddress, BytesLeftInPage ); + SharedCacheMap->NeedToZero = NULL; + } + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + + // + // Now call MM to unlock the address. Note we will never store the + // address at the start of the page, but we can sometimes store + // the start of the next page when we have exactly filled the page. + // + + if (ActiveAddress != NULL) { + MmUnlockCachedPage( (PVOID)((PCHAR)ActiveAddress - 1) ); + } + } + + // + // See if caller actually has an ActiveVacb + // + + if (ActiveVacb != NULL) { + + // + // See if the page is dirty + // + + if (PageIsDirty) { + + ActiveOffset.QuadPart = (LONGLONG)ActivePage << PAGE_SHIFT; + ActiveAddress = (PVOID)((PCHAR)ActiveVacb->BaseAddress + + (ActiveOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1))); + + // + // Tell the Lazy Writer to write the page. + // + + CcSetDirtyInMask( SharedCacheMap, &ActiveOffset, PAGE_SIZE ); + + // + // Now we need to clear the flag and decrement some counts if there is + // no other active Vacb which snuck in. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock ); + if ((SharedCacheMap->ActiveVacb == NULL) && + FlagOn(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY)) { + + ClearFlag(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY); + SharedCacheMap->DirtyPages -= 1; + CcTotalDirtyPages -= 1; + } + ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock ); + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + } + + // + // Now free the Vacb. + // + + CcFreeVirtualAddress( ActiveVacb ); + } +} + + +// +// Internal Support Routine +// + +VOID +CcMapAndCopy( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PVOID UserBuffer, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN ULONG ZeroFlags, + IN BOOLEAN WriteThrough + ) + +/*++ + +Routine Description: + + This routine may be called to copy the specified user data to the + cache via a special Mm routine which copies the data to uninitialized + pages and returns. + +Arguments: + + SharedCacheMap - Supplies the address of the SharedCacheMap for the + data. + + UserBuffer - unsafe buffer supplying the user's data to be written + + FileOffset - Supplies the file offset to be modified + + Length - Supplies the total amount of data + + ZeroFlags - Defines which pages may be zeroed if not resident. + + WriteThrough - Supplies whether the data is to be written through or not + +Return Value: + + None + +--*/ + +{ + ULONG ReceivedLength; + ULONG ZeroCase; + PVOID CacheBuffer; + PVOID SavedMappedBuffer; + ULONG SavedMappedLength; + ULONG ActivePage; + KIRQL OldIrql; + LARGE_INTEGER PFileOffset; + IO_STATUS_BLOCK IoStatus; + NTSTATUS Status; + ULONG SavedState; + BOOLEAN MorePages; + ULONG SavedTotalLength = Length; + LARGE_INTEGER LocalOffset = *FileOffset; + ULONG PageOffset = FileOffset->LowPart & (PAGE_SIZE - 1); + PVACB Vacb = NULL; + PETHREAD Thread = PsGetCurrentThread(); + + // + // Initialize SavePage to TRUE to skip the finally clause on zero-length + // writes. + // + + BOOLEAN SavePage = TRUE; + + DebugTrace(+1, me, "CcMapAndCopy:\n", 0 ); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + // + // try around everything for cleanup. + // + + try { + + while (Length != 0) { + + CacheBuffer = CcGetVirtualAddress( SharedCacheMap, + LocalOffset, + &Vacb, + &ReceivedLength ); + + // + // If we got more than we need, make sure to only use + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + SavedMappedBuffer = CacheBuffer; + SavedMappedLength = ReceivedLength; + Length -= ReceivedLength; + + // + // Now loop to touch all of the pages, calling MM to insure + // that if we fault, we take in exactly the number of pages + // we need. + // + + CacheBuffer = (PVOID)((PCHAR)CacheBuffer - PageOffset); + ReceivedLength += PageOffset; + + // + // Loop to touch or zero the pages. + // + + ZeroCase = ZERO_FIRST_PAGE; + + // + // Set up offset to page for use below. + // + + PFileOffset = LocalOffset; + PFileOffset.LowPart -= PageOffset; + + while (TRUE) { + + // + // Calculate whether we wish to save an active page + // or not. + // + + SavePage = ((Length == 0) && + (ReceivedLength < PAGE_SIZE) && + (SavedTotalLength <= (PAGE_SIZE / 2)) && + !WriteThrough && + (SharedCacheMap->FileObject->SectionObjectPointer->ImageSectionObject == NULL) && + (SharedCacheMap->Mbcb != NULL) && + ((ULONG)((ULONGLONG)PFileOffset.QuadPart >> PAGE_SHIFT) < + (SharedCacheMap->Mbcb->Bitmap.SizeOfBitMap - 1))); + + MorePages = (ReceivedLength > PAGE_SIZE); + + // + // Copy the data to the user buffer. + // + + try { + + // + // It is possible that there is a locked page + // hanging around, and so we need to nuke it here. + // + + if (SharedCacheMap->NeedToZero != NULL) { + CcFreeActiveVacb( SharedCacheMap, NULL, 0, 0 ); + } + + Status = STATUS_SUCCESS; + if (FlagOn(ZeroFlags, ZeroCase)) { + + Status = MmCopyToCachedPage( CacheBuffer, + UserBuffer, + PageOffset, + MorePages ? + (PAGE_SIZE - PageOffset) : + (ReceivedLength - PageOffset), + SavePage ); + + if (!NT_SUCCESS(Status)) { + + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_INVALID_USER_BUFFER )); + } + + // + // Otherwise, we have to actually copy the data ourselves. + // + + } else { + + MmSetPageFaultReadAhead( Thread, + (MorePages && FlagOn(ZeroFlags, ZERO_LAST_PAGE)) ? 1 : 0); + + RtlCopyBytes( (PVOID)((PCHAR)CacheBuffer + PageOffset), + UserBuffer, + MorePages ? + (PAGE_SIZE - PageOffset) : + (ReceivedLength - PageOffset) ); + + MmResetPageFaultReadAhead( Thread, SavedState ); + + } + + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Now get out quickly if it is a small write and we want + // to save the page. + // + + if (SavePage) { + + ActivePage = (ULONG)( (ULONGLONG)Vacb->Overlay.FileOffset.QuadPart >> PAGE_SHIFT ) + + (((PCHAR)CacheBuffer - (PCHAR)Vacb->BaseAddress) >> + PAGE_SHIFT); + + PFileOffset.LowPart += ReceivedLength; + + // + // If the cache page was not locked, then clear the address + // to zero from. + // + + if (Status == STATUS_CACHE_PAGE_LOCKED) { + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + ASSERT(SharedCacheMap->NeedToZero == NULL); + + SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + + (PFileOffset.LowPart & (PAGE_SIZE - 1))); + SharedCacheMap->NeedToZeroPage = ActivePage; + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + } + + SetActiveVacb( SharedCacheMap, + OldIrql, + Vacb, + ActivePage, + ACTIVE_PAGE_IS_DIRTY ); + + try_return( NOTHING ); + } + + // + // If it looks like we may save a page and exit on the next loop, + // then we must make sure to mark the current page dirty. Note + // that Cc[Fast]CopyWrite will finish the last part of any page + // before allowing us to free the Active Vacb above, therefore + // this case only occurs for a small random write. + // + + if ((SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough) { + + CcSetDirtyInMask( SharedCacheMap, &PFileOffset, ReceivedLength ); + } + + UserBuffer = (PVOID)((PCHAR)UserBuffer + (PAGE_SIZE - PageOffset)); + PageOffset = 0; + + // + // If there is more than a page to go (including what we just + // copied), then adjust our buffer pointer and counts, and + // determine if we are to the last page yet. + // + + if (MorePages) { + + CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; + ReceivedLength -= PAGE_SIZE; + + // + // Update our offset to the page. Note that 32-bit + // add is ok since we cannot cross a Vacb boundary + // and we reinitialize this offset before entering + // this loop again. + // + + PFileOffset.LowPart += PAGE_SIZE; + + if (ReceivedLength > PAGE_SIZE) { + ZeroCase = ZERO_MIDDLE_PAGES; + } else { + ZeroCase = ZERO_LAST_PAGE; + } + + } else { + + break; + } + } + + // + // If there is still more to write (ie. we are going to step + // onto the next vacb) AND we just dirtied more than 64K, then + // do a vicarious MmFlushSection here. This prevents us from + // creating unlimited dirty pages while holding the file + // resource exclusive. We also do not need to set the pages + // dirty in the mask in this case. + // + + if (Length > CcMaxDirtyWrite) { + + MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength ); + MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, + &LocalOffset, + SavedMappedLength, + &IoStatus, + TRUE ); + + if (!NT_SUCCESS(IoStatus.Status)) { + ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + + // + // For write through files, call Mm to propagate the dirty bits + // here while we have the view mapped, so we know the flush will + // work below. Again - do not set dirty in the mask. + // + + } else if (WriteThrough) { + + MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength ); + + // + // For the normal case, just set the pages dirty for the Lazy Writer + // now. + // + + } else { + + CcSetDirtyInMask( SharedCacheMap, &LocalOffset, SavedMappedLength ); + } + + CcFreeVirtualAddress( Vacb ); + Vacb = NULL; + + // + // If we have to loop back to get at least a page, it will be ok to + // zero the first page. If we are not getting at least a page, we + // must make sure we clear the ZeroFlags if we cannot zero the last + // page. + // + + if (Length >= PAGE_SIZE) { + ZeroFlags |= ZERO_FIRST_PAGE; + } else if ((ZeroFlags & ZERO_LAST_PAGE) == 0) { + ZeroFlags = 0; + } + + // + // Note that if ReceivedLength (and therefore SavedMappedLength) + // was truncated to the transfer size then the new LocalOffset + // computed below is not correct. This is not an issue since + // in that case (Length == 0) and we would never get here. + // + + LocalOffset.QuadPart = LocalOffset.QuadPart + (LONGLONG)SavedMappedLength; + } + try_exit: NOTHING; + } + + // + // Cleanup on the way out. + // + + finally { + + MmResetPageFaultReadAhead( Thread, SavedState ); + + // + // We have no work to do if we have squirreled away the Vacb. + // + + if (!SavePage || AbnormalTermination()) { + + // + // Make sure we do not leave anything mapped or dirty in the PTE + // on the way out. + // + + if (Vacb != NULL) { + + CcFreeVirtualAddress( Vacb ); + } + + // + // Either flush the whole range because of write through, or + // mark it dirty for the lazy writer. + // + + if (WriteThrough) { + + MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer, + FileOffset, + SavedTotalLength, + &IoStatus, + TRUE ); + + if (!NT_SUCCESS(IoStatus.Status)) { + ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + + // + // Advance ValidDataGoal + // + + LocalOffset.QuadPart = FileOffset->QuadPart + (LONGLONG)SavedTotalLength; + if (LocalOffset.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) { + SharedCacheMap->ValidDataGoal = LocalOffset; + } + } + } + } + + DebugTrace(-1, me, "CcMapAndCopy -> %02lx\n", Result ); + + return; +} + + +#ifdef CCDBG +VOID +CcDump ( + IN PVOID Ptr + ) + +{ + PVOID Junk = Ptr; +} +#endif diff --git a/private/ntos/cache/cc.h b/private/ntos/cache/cc.h new file mode 100644 index 000000000..aff15f746 --- /dev/null +++ b/private/ntos/cache/cc.h @@ -0,0 +1,1746 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + cc.h + +Abstract: + + This module is a header file for the Memory Management based cache + management routines for the common Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#ifndef _CCh_ +#define _CCh_ + +#include + +#ifdef MEMPRINT +#include +#endif + +// +// This turns on the Bcb list debugging in a debug system. Set value +// to 0 to turn off. +// + +#if DBG +#define LIST_DBG 1 +#endif + +#include + +#include + +// +// Tag all of our allocations if tagging is turned on +// + +#undef FsRtlAllocatePool +#undef FsRtlAllocatePoolWithQuota + +#define FsRtlAllocatePool(a,b) FsRtlAllocatePoolWithTag(a,b,' cC') +#define FsRtlAllocatePoolWithQuota(a,b) FsRtlAllocatePoolWithQuotaTag(a,b,' cC') + +#undef ExAllocatePool +#undef ExAllocatePoolWithQuota + +#define ExAllocatePool(a,b) ExAllocatePoolWithTag(a,b,' cC') +#define ExAllocatePoolWithQuota(a,b) ExAllocatePoolWithQuotaTag(a,b,' cC') + +// +// Peek at number of available pages. +// + +extern ULONG MmAvailablePages; + +#if DBG +// #define MIPS_PREFILL 0 +#endif + +#ifdef MIPS +#ifdef MIPS_PREFILL +VOID +KeSweepDcache ( + IN BOOLEAN AllProcessors + ); +#endif +#endif + +// +// Define our node type codes. +// + +#define CACHE_NTC_SHARED_CACHE_MAP (0x2FF) +#define CACHE_NTC_PRIVATE_CACHE_MAP (0x2FE) +#define CACHE_NTC_BCB (0x2FD) +#define CACHE_NTC_DEFERRED_WRITE (0x2FC) +#define CACHE_NTC_MBCB (0x2FB) +#define CACHE_NTC_OBCB (0x2FA) + +// +// The following definitions are used to generate meaningful blue bugcheck +// screens. On a bugcheck the file system can output 4 ulongs of useful +// information. The first ulong will have encoded in it a source file id +// (in the high word) and the line number of the bugcheck (in the low word). +// The other values can be whatever the caller of the bugcheck routine deems +// necessary. +// +// Each individual file that calls bugcheck needs to have defined at the +// start of the file a constant called BugCheckFileId with one of the +// CACHE_BUG_CHECK_ values defined below and then use CcBugCheck to bugcheck +// the system. +// + +#define CACHE_BUG_CHECK_CACHEDAT (0x00010000) +#define CACHE_BUG_CHECK_CACHESUB (0x00020000) +#define CACHE_BUG_CHECK_COPYSUP (0x00030000) +#define CACHE_BUG_CHECK_FSSUP (0x00040000) +#define CACHE_BUG_CHECK_LAZYRITE (0x00050000) +#define CACHE_BUG_CHECK_LOGSUP (0x00060000) +#define CACHE_BUG_CHECK_MDLSUP (0x00070000) +#define CACHE_BUG_CHECK_PINSUP (0x00080000) +#define CACHE_BUG_CHECK_VACBSUP (0x00090000) + +#define CcBugCheck(A,B,C) { KeBugCheckEx(CACHE_MANAGER, BugCheckFileId | __LINE__, A, B, C ); } + +// +// Define maximum View Size (These constants are currently so chosen so +// as to be exactly a page worth of PTEs. +// + +#define DEFAULT_CREATE_MODULO ((ULONG)(0x00100000)) +#define DEFAULT_EXTEND_MODULO ((ULONG)(0x00100000)) + +// +// For FO_SEQUENTIAL_ONLY files, define how far we go before umapping +// views. +// + +#define SEQUENTIAL_ONLY_MAP_LIMIT ((ULONG)(0x00080000)) + +// +// Define some constants to drive read ahead +// + +// +// Set max read ahead (some drivers, such as AT break up transfers >= 128kb) +// + +#define MAX_READ_AHEAD (MM_MAXIMUM_DISK_IO_SIZE) + +// +// Set maximum write behind / lazy write (most drivers break up transfers >= 64kb) +// + +#define MAX_WRITE_BEHIND (MM_MAXIMUM_DISK_IO_SIZE) + +// +// Define constants to control zeroing of file data: one constant to control +// how much data we will actually zero ahead in the cache, and another to +// control what the maximum transfer size is that we will use to write zeros. +// + +#define MAX_ZERO_TRANSFER (MM_MAXIMUM_DISK_IO_SIZE) +#define MAX_ZEROS_IN_CACHE (0x10000) + +// +// Define constants controlling when the Bcb list is broken into a +// pendaflex-style array of listheads, and how the correct listhead +// is found. Begin when file size exceeds 2MB, and cover 512KB per +// listhead. At 512KB per listhead, the BcbListArray is the same +// size as the Vacb array, i.e., it doubles the size. +// + +#define BEGIN_BCB_LIST_ARRAY (0x200000) +#define SIZE_PER_BCB_LIST (VACB_MAPPING_GRANULARITY * 2) +#define BCB_LIST_SHIFT (VACB_OFFSET_SHIFT + 1) + +#define GetBcbListHead(SCM,OFF) ( \ + (((SCM)->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && \ + FlagOn((SCM)->Flags, MODIFIED_WRITE_DISABLED)) ? \ + ((PLIST_ENTRY)((SCM)->Vacbs) + (((SCM)->SectionSize.QuadPart + (OFF)) >> BCB_LIST_SHIFT)) : \ + &(SCM)->BcbList \ + ) + +// +// NOISE_BITS defines how many bits are masked off when testing for +// sequential reads. This allows the reader to skip up to 7 bytes +// for alignment purposes, and we still consider the next read to be +// sequential. Starting and ending addresses are masked by this pattern +// before comparison. +// + +#define NOISE_BITS (0x7) + +// +// Define some constants to drive the Lazy Writer +// + +#define LAZY_WRITER_IDLE_DELAY ((LONG)(10000000)) +#define LAZY_WRITER_COLLISION_DELAY ((LONG)(1000000)) + +// +// The following target should best be a power of 2 +// + +#define LAZY_WRITER_MAX_AGE_TARGET ((ULONG)(8)) + +// +// The global Cache Manager debug level variable, its values are: +// +// 0x00000000 Always gets printed (used when about to bug check) +// +// 0x00000001 FsSup +// 0x00000002 CacheSub +// 0x00000004 CopySup +// 0x00000008 PinSup +// +// 0x00000010 MdlSup +// 0x00000020 LazyRite +// 0x00000040 +// 0x00000080 +// +// 0x00000100 Trace all Mm calls +// + +#define mm (0x100) + +// +// Miscellaneous support macros. +// +// ULONG +// FlagOn ( +// IN ULONG Flags, +// IN ULONG SingleFlag +// ); +// +// BOOLEAN +// BooleanFlagOn ( +// IN ULONG Flags, +// IN ULONG SingleFlag +// ); +// +// VOID +// SetFlag ( +// IN ULONG Flags, +// IN ULONG SingleFlag +// ); +// +// VOID +// ClearFlag ( +// IN ULONG Flags, +// IN ULONG SingleFlag +// ); +// + +#define FlagOn(F,SF) ( \ + (((F) & (SF))) \ +) + +#define BooleanFlagOn(F,SF) ( \ + (BOOLEAN)(((F) & (SF)) != 0) \ +) + +#define SetFlag(F,SF) { \ + (F) |= (SF); \ +} + +#define ClearFlag(F,SF) { \ + (F) &= ~(SF); \ +} + + +// +// Define the Virtual Address Control Block, which controls all mapping +// performed by the Cache Manager. +// + +// +// First some constants +// + +#define PREALLOCATED_VACBS (4) + +// +// Virtual Address Control Block +// + +typedef struct _VACB { + + // + // Base Address for this control block. + // + + PVOID BaseAddress; + + // + // Pointer to the Shared Cache Map using this Vacb. + // + + struct _SHARED_CACHE_MAP *SharedCacheMap; + + // + // Overlay for remembering mapped offset within the Shared Cache Map, + // and the count of the number of times this Vacb is in use. + // + + union { + + // + // File Offset within Shared Cache Map + // + + LARGE_INTEGER FileOffset; + + // + // Count of number of times this Vacb is in use. The size of this + // count is calculated to be adequate, while never large enough to + // overwrite nonzero bits of the MappedOffset, which is a multiple + // of VACB_MAPPING_GRANULARITY. + // + + USHORT ActiveCount; + + } Overlay; + +} VACB, *PVACB; + + +// +// The Private Cache Map is a structure pointed to by the File Object, whenever +// a file is opened with caching enabled (default). +// + +typedef struct _PRIVATE_CACHE_MAP { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeByteSize; + + // + // Pointer to FileObject for this PrivateCacheMap. + // + + PFILE_OBJECT FileObject; + + // + // READ AHEAD CONTROL + // + // Read ahead history for determining when read ahead might be + // beneficial. + // + + LARGE_INTEGER FileOffset1; + LARGE_INTEGER BeyondLastByte1; + + LARGE_INTEGER FileOffset2; + LARGE_INTEGER BeyondLastByte2; + + // + // Current read ahead requirements. + // + // Array element 0 is optionally used for recording remaining bytes + // required for satisfying a large Mdl read. + // + // Array element 1 is used for predicted read ahead. + // + + LARGE_INTEGER ReadAheadOffset[2]; + ULONG ReadAheadLength[2]; + + // + // SpinLock controlling access to following fields + // + + KSPIN_LOCK ReadAheadSpinLock; + + // + // Read Ahead mask formed from Read Ahead granularity - 1 + // + + ULONG ReadAheadMask; + + // + // Links for list of all PrivateCacheMaps linked to the same + // SharedCacheMap. + // + + LIST_ENTRY PrivateLinks; + + // + // This flag says read ahead is currently active, which means either + // a file system call to CcReadAhead is still determining if the + // desired data is already resident, or else a request to do read ahead + // has been queued to a worker thread. + // + + BOOLEAN ReadAheadActive; + + // + // Flag to say whether read ahead is currently enabled for this + // FileObject/PrivateCacheMap. On read misses it is enabled on + // read ahead hits it will be disabled. Initially disabled. + // + + BOOLEAN ReadAheadEnabled; + +} PRIVATE_CACHE_MAP; + +typedef PRIVATE_CACHE_MAP *PPRIVATE_CACHE_MAP; + + +// +// The Shared Cache Map is a per-file structure pointed to indirectly by +// each File Object. The File Object points to a pointer in a single +// FS-private structure for the file (Fcb). The SharedCacheMap maps the +// first part of the file for common access by all callers. +// + +typedef struct _SHARED_CACHE_MAP { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeByteSize; + + // + // Number of times this file has been opened cached. + // + + ULONG OpenCount; + + // + // Actual size of file, primarily for restricting Read Ahead. Initialized + // on creation and maintained by extend and truncate operations. + // + // NOTE: This field may never be moved, thanks to the late DavidGoe, + // who should have written this comment himself :-( cache.h + // exports a macro which "knows" that FileSize is the second + // longword in the Cache Map! + // + + LARGE_INTEGER FileSize; + + // + // Bcb Listhead. The BcbList is ordered by descending + // FileOffsets, to optimize misses in the sequential I/O case. + // + + LIST_ENTRY BcbList; + + // + // Size of section created. + // + + LARGE_INTEGER SectionSize; + + // + // ValidDataLength for file, as currently stored by the file system. + // + + LARGE_INTEGER ValidDataLength; + + // + // Goal for ValidDataLength, when current dirty data is written. + // + + LARGE_INTEGER ValidDataGoal; + + // + // Pointer to a contiguous array of Vacb pointers which control mapping + // to this file, along with Vacbs (currently) for a 1MB file. + // + + PVACB InitialVacbs[PREALLOCATED_VACBS]; + PVACB * Vacbs; + + // + // Referenced pointer to original File Object on which the SharedCacheMap + // was created. + // + + PFILE_OBJECT FileObject; + + // + // Describe Active Vacb and Page for copysup optimizations. + // + + volatile PVACB ActiveVacb; + ULONG ActivePage; + + // + // Virtual address needing zero to end of page + // + + volatile PVOID NeedToZero; + ULONG NeedToZeroPage; + + // + // Fields for synchronizing on active requests. + // + + KSPIN_LOCK ActiveVacbSpinLock; + ULONG VacbActiveCount; + + // + // THE NEXT TWO FIELDS MUST BE ADJACENT, TO SUPPORT + // SHARED_CACHE_MAP_LIST_CURSOR! + // + // Links for Global SharedCacheMap List + // + + LIST_ENTRY SharedCacheMapLinks; + + // + // Shared Cache Map flags (defined below) + // + + ULONG Flags; + + // + // Mask Bcb for this SharedCacheMap, if there is one. + // + + struct _MBCB *Mbcb; + + // + // Number of dirty pages in this SharedCacheMap. Used to trigger + // write behind. + // + + ULONG DirtyPages; + + // + // Pointer to the common Section Object used by the file system. + // + + PVOID Section; + + // + // Status variable set by creator of SharedCacheMap + // + + NTSTATUS Status; + + // + // This event pointer is used to handle creation collisions. + // If a second thread tries to call CcInitializeCacheMap for the + // same file, while BeingCreated (below) is TRUE, then that thread + // will allocate an event store it here (if not already allocated), + // and wait on it. The first creator will set this event when it + // is done. The event is not deleted until CcUninitializedCacheMap + // is called, to avoid possible race conditions. (Note that normally + // the event never has to be allocated. + // + + PKEVENT CreateEvent; + + // + // This points to an event used to wait for active count to go to zero + // + + PKEVENT WaitOnActiveCount; + + // + // These two fields control the writing of large metadata + // streams. The first field gives a target for the current + // flush interval, and the second field stores the end of + // the last flush that occurred on this file. + // + + ULONG PagesToWrite; + LONGLONG BeyondLastFlush; + + // + // Pointer to structure of routines used by the Lazy Writer to Acquire + // and Release the file for Lazy Write and Close, to avoid deadlocks, + // and the context to call them with. + // + + PCACHE_MANAGER_CALLBACKS Callbacks; + + PVOID LazyWriteContext; + + // + // Listhead of all PrivateCacheMaps linked to this SharedCacheMap. + // + + LIST_ENTRY PrivateList; + + // + // Log handle specified for this shared cache map, for support of routines + // in logsup.c + // + + PVOID LogHandle; + + // + // Callback routine specified for flushing to Lsn. + // + + PFLUSH_TO_LSN FlushToLsnRoutine; + + // + // Dirty Page Threshold for this stream + // + + ULONG DirtyPageThreshold; + + // + // Lazy Writer pass count. Used by the Lazy Writer for + // no modified write streams, which are not serviced on + // every pass in order to avoid contention with foreground + // activity. + // + + ULONG LazyWritePassCount; + + // + // This event pointer is used to allow a file system to be notified when + // the deletion of a shared cache map. + // + // This has to be provided here because the cache manager may decide to + // "Lazy Delete" the shared cache map, and some network file systems + // will want to know when the lazy delete completes. + // + + PCACHE_UNINITIALIZE_EVENT UninitializeEvent; + + // + // Reserved for alignment + // + + ULONG Reserved; + + // + // This is a scratch event which can be used either for + // a CreateEvent or a WaitOnActiveCount event. It is + // difficult to share this event, because of the very + // careful semantics by which they are cleared. On the + // other hand, both events are relatively rarely used + // (especially the CreateEvent), so it will be rare that + // we will actually use both for the same file, and have + // to allocate one. + // + + KEVENT Event; + + // + // Preallocate on PrivateCacheMap to reduce pool allocations. + // + + PRIVATE_CACHE_MAP PrivateCacheMap; + +} SHARED_CACHE_MAP; + +typedef SHARED_CACHE_MAP *PSHARED_CACHE_MAP; + +// +// Shared Cache Map Flags +// + +// +// Read ahead has been disabled on this file. +// + +#define DISABLE_READ_AHEAD 0x0001 + +// +// Write behind has been disabled on this file. +// + +#define DISABLE_WRITE_BEHIND 0x0002 + +// +// This flag indicates whether CcInitializeCacheMap was called with +// PinAccess = TRUE. +// + +#define PIN_ACCESS 0x0004 + +// +// This flag indicates that a truncate is required when OpenCount +// goes to 0. +// + +#define TRUNCATE_REQUIRED 0x0010 + +// +// This flag indicates that a LazyWrite request is queued. +// + +#define WRITE_QUEUED 0x0020 + +// +// This flag indicates that we have never seen anyone cache +// the file except for with FO_SEQUENTIAL_ONLY, so we should +// tell MM to dump pages when we unmap. +// + +#define ONLY_SEQUENTIAL_ONLY_SEEN 0x0040 + +// +// Active Page is locked +// + +#define ACTIVE_PAGE_IS_DIRTY 0x0080 + +// +// Flag to say that a create is in progress. +// + +#define BEING_CREATED 0x0100 + +// +// Flag to say that modified write was disabled on the section. +// + +#define MODIFIED_WRITE_DISABLED 0x0200 + +// +// Flag that indicates if a lazy write ever occurred on this file. +// + +#define LAZY_WRITE_OCCURRED 0x0400 + +// +// Flag that indicates this structure is only a cursor, only the +// SharedCacheMapLinks and Flags are valid! +// + +#define IS_CURSOR 0x0800 + +// +// Cursor structure for traversing the SharedCacheMap lists. Anyone +// scanning these lists must verify that the IS_CURSOR flag is clear +// before looking at other SharedCacheMap fields. +// + + +typedef struct _SHARED_CACHE_MAP_LIST_CURSOR { + + // + // Links for Global SharedCacheMap List + // + + LIST_ENTRY SharedCacheMapLinks; + + // + // Shared Cache Map flags, IS_CURSOR must be set. + // + + ULONG Flags; + +} SHARED_CACHE_MAP_LIST_CURSOR, *PSHARED_CACHE_MAP_LIST_CURSOR; + + + +// +// This structure is a "mask" Bcb. For fast simple write operations, +// a mask Bcb is used so that we basically only have to set bits to remember +// where the dirty data is. +// + +typedef struct _MBCB { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeIsInZone; + + // + // Number of dirty pages (set bits) in the bitmap below. + // + + ULONG DirtyPages; + + // + // First and last dirty pages + // + + ULONG FirstDirtyPage; + ULONG LastDirtyPage; + + // + // This is a hint on where to resume writing, since we will not + // always write all of the dirty data at once. + // + + ULONG ResumeWritePage; + + // + // This field is used as a scratch area for the Lazy Writer to + // guide how much he will write each time he wakes up. + // + + ULONG PagesToWrite; + + // + // Rtl Bitmap structure to describe the bits to follow. + // + + RTL_BITMAP Bitmap; + +} MBCB; + +typedef MBCB *PMBCB; + + +// +// This is the Buffer Control Block structure for representing data which +// is "pinned" in memory by one or more active requests and/or dirty. This +// structure is created the first time that a call to CcPinFileData specifies +// a particular integral range of pages. It is deallocated whenever the Pin +// Count reaches 0 and the Bcb is not Dirty. +// +// NOTE: The first four fields must be the same as the PUBLIC_BCB. +// + +typedef struct _BCB { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeIsInZone; + + // + // Byte FileOffset and and length of entire buffer + // + + ULONG ByteLength; + LARGE_INTEGER FileOffset; + + // + // Links for BcbList in SharedCacheMap + // + + LIST_ENTRY BcbLinks; + + // + // Byte FileOffset of last byte in buffer (used for searching) + // + + LARGE_INTEGER BeyondLastByte; + + // + // Oldest Lsn (if specified) when this buffer was set dirty. + // + + LARGE_INTEGER OldestLsn; + + // + // Most recent Lsn specified when this buffer was set dirty. + // The FlushToLsnRoutine is called with this Lsn. + // + + LARGE_INTEGER NewestLsn; + + // + // Pointer to Vacb via which this Bcb is mapped. + // + + PVACB Vacb; + + // + // Links and caller addresses for the global Bcb list (for debug only) + // + +#if LIST_DBG + LIST_ENTRY CcBcbLinks; + PVOID CallerAddress; + PVOID CallersCallerAddress; +#endif + + // + // Count of threads actively using this Bcb to process a request. + // This must be manipulated under protection of the BcbListSpinLock + // in the SharedCacheMap. + // + + ULONG PinCount; + + // + // Resource to synchronize buffer access. Pinning Readers and all Writers + // of the described buffer take out shared access (synchronization of + // buffer modifications is strictly up to the caller). Note that pinning + // readers do not declare if they are going to modify the buffer or not. + // Anyone writing to disk takes out exclusive access, to prevent the buffer + // from changing while it is being written out. + // + + ERESOURCE Resource; + + // + // Pointer to SharedCacheMap for this Bcb. + // + + PSHARED_CACHE_MAP SharedCacheMap; + + // + // This is the Base Address at which the buffer can be seen in + // system space. All access to buffer data should go through this + // address. + // + + PVOID BaseAddress; + + // + // Flags + // + + BOOLEAN Dirty; + +} BCB; + +typedef BCB *PBCB; + +// +// This is the Overlap Buffer Control Block structure for representing data which +// is "pinned" in memory and must be represented by multiple Bcbs due to overlaps. +// +// NOTE: The first four fields must be the same as the PUBLIC_BCB. +// + +typedef struct _OBCB { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeByteSize; + + // + // Byte FileOffset and and length of entire buffer + // + + ULONG ByteLength; + LARGE_INTEGER FileOffset; + + // + // Vector of Bcb pointers. + // + + PBCB Bcbs[ANYSIZE_ARRAY]; + +} OBCB; + +typedef OBCB *POBCB; + + +// +// Struct for remembering deferred writes for later posting. +// + +typedef struct _DEFERRED_WRITE { + + // + // Type and size of this record + // + + CSHORT NodeTypeCode; + CSHORT NodeByteSize; + + // + // The file to be written. + // + + PFILE_OBJECT FileObject; + + // + // Number of bytes the caller intends to write + // + + ULONG BytesToWrite; + + // + // Links for the deferred write queue. + // + + LIST_ENTRY DeferredWriteLinks; + + // + // If this event pointer is not NULL, then this event will + // be signalled when the write is ok, rather than calling + // the PostRoutine below. + // + + PKEVENT Event; + + // + // The posting routine and its parameters + // + + PCC_POST_DEFERRED_WRITE PostRoutine; + PVOID Context1; + PVOID Context2; + + BOOLEAN LimitModifiedPages; + +} DEFERRED_WRITE, *PDEFERRED_WRITE; + + +// +// Struct controlling the Lazy Writer algorithms +// + +typedef struct _LAZY_WRITER { + + // + // A few Mm routines still require a process. + // + + PEPROCESS OurProcess; + + // + // Work queue. + // + + LIST_ENTRY WorkQueue; + + // + // Zone for Bcbs. + // + + ZONE_HEADER BcbZone; + + // + // Dpc and Timer Structures used for activating periodic scan when active. + // + + KDPC ScanDpc; + KTIMER ScanTimer; + + // + // Boolean to say whether Lazy Writer scan is active or not. + // + + BOOLEAN ScanActive; + + // + // Boolean indicating if there is any other reason for Lazy Writer to + // wake up. + // + + BOOLEAN OtherWork; + +} LAZY_WRITER; + + +// +// Work queue entry for the worker threads, with an enumerated +// function code. +// +// NOTE: THIS STRUCTURE MUST REMAIN 64-bit ALIGNED IN SIZE, SINCE +// IT IS ZONE ALLOCATED. +// + +typedef enum _WORKER_FUNCTION { + Noop = 0, + ReadAhead, + WriteBehind, + LazyWriteScan + } WORKER_FUNCTION; + +typedef struct _WORK_QUEUE_ENTRY { + + // + // List entry for our work queues. + // + + LIST_ENTRY WorkQueueLinks; + + // + // Define a union to contain function-specific parameters. + // + + union { + + // + // Read parameters (for read ahead) + // + + struct { + PFILE_OBJECT FileObject; + } Read; + + // + // Write parameters (for write behind) + // + + struct { + PSHARED_CACHE_MAP SharedCacheMap; + } Write; + + } Parameters; + + // + // Function code for this entry: + // + + UCHAR Function; + +} WORK_QUEUE_ENTRY, *PWORK_QUEUE_ENTRY; + +// +// This is a structure apended to the end of an MDL +// + +typedef struct _MDL_WRITE { + + // + // This field is for the use of the Server to stash anything interesting + // + + PVOID ServerContext; + + // + // This is the resource to release when the write is complete. + // + + PERESOURCE Resource; + + // + // This is thread caller's thread, and the thread that must release + // the resource. + // + + ERESOURCE_THREAD Thread; + + // + // This links all the pending MDLs through the shared cache map. + // + + LIST_ENTRY MdlLinks; + +} MDL_WRITE, *PMDL_WRITE; + + +// +// Common Private routine definitions for the Cache Manager +// + +#define GetActiveVacb(SCM,IRQ,V,P,D) { \ + ExAcquireFastLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \ + (V) = (SCM)->ActiveVacb; \ + if ((V) != NULL) { \ + (P) = (SCM)->ActivePage; \ + (SCM)->ActiveVacb = NULL; \ + (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \ + } \ + ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \ +} + +#define GetActiveVacbAtDpcLevel(SCM,V,P,D) { \ + ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \ + (V) = (SCM)->ActiveVacb; \ + if ((V) != NULL) { \ + (P) = (SCM)->ActivePage; \ + (SCM)->ActiveVacb = NULL; \ + (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \ + } \ + ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \ +} + +// +// When setting dirty, when we set ACTIVE_PAGE_IS_DIRTY the first time, +// we increment the dirty counts, and they never get decremented until +// CcFreeActiveVacb. If we are trying to set and there is already an +// active Vacb *or* we are trying to set a clean one and the flag above +// is set, we do not allow it, and we just free the vacb (we only want +// to handle the clean transition in one place). +// +// MP & UP cases are separately defined, because I do not trust the compiler +// to otherwise generate the optimal UP code. +// + + +// +// In the MP case, we test if we are setting the page dirty, because then +// we must acquire CcMasterSpinLock to diddle CcDirtyPages. +// + +#if !defined(NT_UP) \ + +#define SetActiveVacb(SCM,IRQ,V,P,D) { \ + if (D) { \ + ExAcquireSpinLock(&CcMasterSpinLock, &(IRQ)); \ + ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \ + } else { \ + ExAcquireSpinLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \ + } \ + do { \ + if ((SCM)->ActiveVacb == NULL) { \ + if (((SCM)->Flags & ACTIVE_PAGE_IS_DIRTY) != (D)) { \ + if (D) { \ + (SCM)->ActiveVacb = (V); \ + (SCM)->ActivePage = (P); \ + (V) = NULL; \ + SetFlag((SCM)->Flags, ACTIVE_PAGE_IS_DIRTY); \ + CcTotalDirtyPages += 1; \ + (SCM)->DirtyPages += 1; \ + if ((SCM)->DirtyPages == 1) { \ + PLIST_ENTRY Blink; \ + PLIST_ENTRY Entry; \ + PLIST_ENTRY Flink; \ + PLIST_ENTRY Head; \ + Entry = &(SCM)->SharedCacheMapLinks; \ + Blink = Entry->Blink; \ + Flink = Entry->Flink; \ + Blink->Flink = Flink; \ + Flink->Blink = Blink; \ + Head = &CcDirtySharedCacheMapList.SharedCacheMapLinks; \ + Blink = Head->Blink; \ + Entry->Flink = Head; \ + Entry->Blink = Blink; \ + Blink->Flink = Entry; \ + Head->Blink = Entry; \ + if (!LazyWriter.ScanActive) { \ + LazyWriter.ScanActive = TRUE; \ + ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \ + ExReleaseSpinLock(&CcMasterSpinLock, (IRQ)); \ + KeSetTimer( &LazyWriter.ScanTimer, \ + CcFirstDelay, \ + &LazyWriter.ScanDpc ); \ + break; \ + } \ + } \ + } \ + } else { \ + (SCM)->ActiveVacb = (V); \ + (SCM)->ActivePage = (P); \ + (V) = NULL; \ + } \ + } \ + if (D) { \ + ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \ + ExReleaseSpinLock(&CcMasterSpinLock, (IRQ)); \ + } else { \ + ExReleaseSpinLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \ + } \ + if ((V) != NULL) { \ + CcFreeActiveVacb( (SCM), (V), (P), (D)); \ + } \ + } while (FALSE); \ +} + +// +// In the UP case, any FastLock will do, so we just use the ActiveVacb lock, and do not +// explicitly acquire CcMasterSpinLock. +// + +#else + +#define SetActiveVacb(SCM,IRQ,V,P,D) { \ + ExAcquireFastLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \ + do { \ + if ((SCM)->ActiveVacb == NULL) { \ + if (((SCM)->Flags & ACTIVE_PAGE_IS_DIRTY) != (D)) { \ + if (D) { \ + (SCM)->ActiveVacb = (V); \ + (SCM)->ActivePage = (P); \ + (V) = NULL; \ + SetFlag((SCM)->Flags, ACTIVE_PAGE_IS_DIRTY); \ + CcTotalDirtyPages += 1; \ + (SCM)->DirtyPages += 1; \ + if ((SCM)->DirtyPages == 1) { \ + PLIST_ENTRY Blink; \ + PLIST_ENTRY Entry; \ + PLIST_ENTRY Flink; \ + PLIST_ENTRY Head; \ + Entry = &(SCM)->SharedCacheMapLinks; \ + Blink = Entry->Blink; \ + Flink = Entry->Flink; \ + Blink->Flink = Flink; \ + Flink->Blink = Blink; \ + Head = &CcDirtySharedCacheMapList.SharedCacheMapLinks; \ + Blink = Head->Blink; \ + Entry->Flink = Head; \ + Entry->Blink = Blink; \ + Blink->Flink = Entry; \ + Head->Blink = Entry; \ + if (!LazyWriter.ScanActive) { \ + LazyWriter.ScanActive = TRUE; \ + ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \ + KeSetTimer( &LazyWriter.ScanTimer, \ + CcFirstDelay, \ + &LazyWriter.ScanDpc ); \ + break; \ + } \ + } \ + } \ + } else { \ + (SCM)->ActiveVacb = (V); \ + (SCM)->ActivePage = (P); \ + (V) = NULL; \ + } \ + } \ + ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \ + if ((V) != NULL) { \ + CcFreeActiveVacb( (SCM), (V), (P), (D)); \ + } \ + } while (FALSE); \ +} + +#endif + +VOID +CcPostDeferredWrites ( + ); + +BOOLEAN +CcPinFileData ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN ReadOnly, + IN BOOLEAN WriteOnly, + IN BOOLEAN Wait, + OUT PBCB *Bcb, + OUT PVOID *BaseAddress, + OUT PLARGE_INTEGER BeyondLastByte + ); + +typedef enum { + UNPIN, + SET_CLEAN +} UNMAP_ACTIONS; + +VOID +FASTCALL +CcUnpinFileData ( + IN OUT PBCB Bcb, + IN BOOLEAN ReadOnly, + IN UNMAP_ACTIONS UnmapAction + ); + +VOID +FASTCALL +CcDeallocateBcb ( + IN PBCB Bcb + ); + +VOID +FASTCALL +CcPerformReadAhead ( + IN PFILE_OBJECT FileObject + ); + +VOID +CcSetDirtyInMask ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length + ); + +NTSTATUS +FASTCALL +CcWriteBehind ( + IN PSHARED_CACHE_MAP SharedCacheMap + ); + +#define ZERO_FIRST_PAGE 1 +#define ZERO_MIDDLE_PAGES 2 +#define ZERO_LAST_PAGE 4 + +BOOLEAN +CcMapAndRead( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN ULONG ZeroFlags, + IN BOOLEAN Wait, + OUT PVACB *Vacb, + OUT PVOID *BaseAddress + ); + +VOID +CcFreeActiveVacb ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PVACB ActiveVacb OPTIONAL, + IN ULONG ActivePage, + IN ULONG PageIsDirty + ); + +VOID +CcMapAndCopy( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PVOID UserBuffer, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN ULONG ZeroFlags, + IN BOOLEAN WriteThrough + ); + +VOID +CcScanDpc ( + IN PKDPC Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2 + ); + +VOID +CcScheduleLazyWriteScan ( + ); + +VOID +CcStartLazyWriter ( + IN PVOID NotUsed + ); + +#define CcAllocateWorkQueueEntry() \ + (PWORK_QUEUE_ENTRY)ExAllocateFromNPagedLookasideList(&CcTwilightLookasideList) + +#define CcFreeWorkQueueEntry(_entry_) \ + ExFreeToNPagedLookasideList(&CcTwilightLookasideList, (_entry_)) + +VOID +FASTCALL +CcPostWorkQueue ( + IN PWORK_QUEUE_ENTRY WorkQueueEntry, + IN PLIST_ENTRY WorkQueue + ); + +VOID +CcWorkerThread ( + PVOID ExWorkQueueItem + ); + +VOID +FASTCALL +CcDeleteSharedCacheMap ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN KIRQL ListIrql, + IN ULONG ReleaseFile + ); + +// +// This exception filter handles STATUS_IN_PAGE_ERROR correctly +// + +LONG +CcCopyReadExceptionFilter( + IN PEXCEPTION_POINTERS ExceptionPointer, + IN PNTSTATUS ExceptionCode + ); + +// +// Exception filter for Worker Threads in lazyrite.c +// + +LONG +CcExceptionFilter ( + IN NTSTATUS ExceptionCode + ); + +#ifdef CCDBG +VOID +CcDump ( + IN PVOID Ptr + ); +#endif + +// +// Vacb routines +// + +VOID +CcInitializeVacbs( + ); + +PVOID +CcGetVirtualAddressIfMapped ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LONGLONG FileOffset, + OUT PVACB *Vacb, + OUT PULONG ReceivedLength + ); + +PVOID +CcGetVirtualAddress ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER FileOffset, + OUT PVACB *Vacb, + OUT PULONG ReceivedLength + ); + +VOID +FASTCALL +CcFreeVirtualAddress ( + IN PVACB Vacb + ); + +VOID +CcWaitOnActiveCount ( + IN PSHARED_CACHE_MAP SharedCacheMap + ); + +VOID +FASTCALL +CcCreateVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER NewSectionSize + ); + +VOID +CcExtendVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER NewSectionSize + ); + +BOOLEAN +FASTCALL +CcUnmapVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset OPTIONAL, + IN ULONG Length + ); + +// +// Define references to global data +// + +extern KSPIN_LOCK CcMasterSpinLock; +extern LIST_ENTRY CcCleanSharedCacheMapList; +extern SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList; +extern SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor; +extern NPAGED_LOOKASIDE_LIST CcTwilightLookasideList; +extern KSPIN_LOCK CcWorkQueueSpinlock; +extern ULONG CcNumberWorkerThreads; +extern LIST_ENTRY CcIdleWorkerThreadList; +extern LIST_ENTRY CcExpressWorkQueue; +extern LIST_ENTRY CcRegularWorkQueue; +extern LARGE_INTEGER CcNoDelay; +extern LARGE_INTEGER CcFirstDelay; +extern LARGE_INTEGER CcIdleDelay; +extern LARGE_INTEGER CcCollisionDelay; +extern LARGE_INTEGER CcTargetCleanDelay; +extern LAZY_WRITER LazyWriter; +extern KSPIN_LOCK CcVacbSpinLock; +extern ULONG CcNumberVacbs; +extern PVACB CcVacbs; +extern PVACB CcBeyondVacbs; +extern PVACB CcNextVictimVacb; +extern KSPIN_LOCK CcDeferredWriteSpinLock; +extern LIST_ENTRY CcDeferredWrites; +extern ULONG CcDirtyPageThreshold; +extern ULONG CcDirtyPageTarget; +extern ULONG CcDirtyPagesLastScan; +extern ULONG CcPagesYetToWrite; +extern ULONG CcPagesWrittenLastTime; +extern ULONG CcAvailablePagesThreshold; +extern ULONG CcTotalDirtyPages; +extern ULONG CcTune; +extern ULONG CcLazyWriteHotSpots; +extern MM_SYSTEMSIZE CcCapturedSystemSize; + + +// +// Here is a page of macros stolen directly from Pinball... +// + +// +// The following macros are used to establish the semantics needed +// to do a return from within a try-finally clause. As a rule every +// try clause must end with a label call try_exit. For example, +// +// try { +// : +// : +// +// try_exit: NOTHING; +// } finally { +// +// : +// : +// } +// +// Every return statement executed inside of a try clause should use the +// try_return macro. If the compiler fully supports the try-finally construct +// then the macro should be +// +// #define try_return(S) { return(S); } +// +// If the compiler does not support the try-finally construct then the macro +// should be +// +// #define try_return(S) { S; goto try_exit; } +// + +#define try_return(S) { S; goto try_exit; } + +#ifdef CCDBG + +extern LONG CcDebugTraceLevel; +extern LONG CcDebugTraceIndent; + +#ifndef CCDBG_LOCK + +#define DebugTrace(INDENT,LEVEL,X,Y) { \ + LONG _i; \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + DbgPrint("%08lx:",_i); \ + if ((INDENT) < 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + if (CcDebugTraceIndent < 0) { \ + CcDebugTraceIndent = 0; \ + } \ + for (_i=0; _i 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + } \ +} + +#define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \ + LONG _i; \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + DbgPrint("%08lx:",_i); \ + if ((INDENT) < 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + if (CcDebugTraceIndent < 0) { \ + CcDebugTraceIndent = 0; \ + } \ + for (_i=0; _i 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + } \ +} + +#define DebugDump(STR,LEVEL,PTR) { \ + LONG _i; \ + VOID CcDump(); \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + DbgPrint("%08lx:",_i); \ + DbgPrint(STR); \ + if (PTR != NULL) {CcDump(PTR);} \ + DbgBreakPoint(); \ + } \ +} + +#else // ndef CCDBG_LOCK + +extern KSPIN_LOCK CcDebugTraceLock; + +#define DebugTrace(INDENT,LEVEL,X,Y) { \ + LONG _i; \ + KIRQL _oldIrql; \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \ + DbgPrint("%08lx:",_i); \ + if ((INDENT) < 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + if (CcDebugTraceIndent < 0) { \ + CcDebugTraceIndent = 0; \ + } \ + for (_i=0; _i 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \ + } \ +} + +#define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \ + LONG _i; \ + KIRQL _oldIrql; \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \ + DbgPrint("%08lx:",_i); \ + if ((INDENT) < 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + if (CcDebugTraceIndent < 0) { \ + CcDebugTraceIndent = 0; \ + } \ + for (_i=0; _i 0) { \ + CcDebugTraceIndent += (INDENT); \ + } \ + ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \ + } \ +} + +#define DebugDump(STR,LEVEL,PTR) { \ + LONG _i; \ + KIRQL _oldIrql; \ + VOID CcDump(); \ + if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \ + _i = (ULONG)PsGetCurrentThread(); \ + ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \ + DbgPrint("%08lx:",_i); \ + DbgPrint(STR); \ + if (PTR != NULL) {CcDump(PTR);} \ + DbgBreakPoint(); \ + ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \ + } \ +} + +#endif // else ndef CCDBG_LOCK + +#else + +#undef CCDBG_LOCK + +#define DebugTrace(INDENT,LEVEL,X,Y) {NOTHING;} + +#define DebugTrace2(INDENT,LEVEL,X,Y,Z) {NOTHING;} + +#define DebugDump(STR,LEVEL,PTR) {NOTHING;} + +#endif // CCDBG + +// +// Global list of pinned Bcbs which may be examined for debug purposes +// + +#if DBG + +extern ULONG CcBcbCount; +extern LIST_ENTRY CcBcbList; +extern KSPIN_LOCK CcBcbSpinLock; + +#endif + +#endif // _CCh_ diff --git a/private/ntos/cache/copysup.c b/private/ntos/cache/copysup.c new file mode 100644 index 000000000..e462014b8 --- /dev/null +++ b/private/ntos/cache/copysup.c @@ -0,0 +1,2117 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + copysup.c + +Abstract: + + This module implements the copy support routines for the Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Define our debug constant +// + +#define me 0x00000004 + + +BOOLEAN +CcCopyRead ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Wait, + OUT PVOID Buffer, + OUT PIO_STATUS_BLOCK IoStatus + ) + +/*++ + +Routine Description: + + This routine attempts to copy the specified file data from the cache + into the output buffer, and deliver the correct I/O status. It is *not* + safe to call this routine from Dpc level. + + If the caller does not want to block (such as for disk I/O), then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to supply all of the requested data without + blocking, then this routine will return FALSE. However, if the + data is immediately accessible in the cache and no blocking is + required, this routine copies the data and returns TRUE. + + If the caller supplies Wait as TRUE, then this routine is guaranteed + to copy the data and return TRUE. If the data is immediately + accessible in the cache, then no blocking will occur. Otherwise, + the the data transfer from the file into the cache will be initiated, + and the caller will be blocked until the data can be returned. + + File system Fsd's should typically supply Wait = TRUE if they are + processing a synchronous I/O requests, or Wait = FALSE if they are + processing an asynchronous request. + + File system or Server Fsp threads should supply Wait = TRUE. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + + Buffer - Pointer to output buffer to which data should be copied. + + IoStatus - Pointer to standard I/O status block to receive the status + for the transfer. (STATUS_SUCCESS guaranteed for cache + hits, otherwise the actual I/O status is returned.) + + Note that even if FALSE is returned, the IoStatus.Information + field will return the count of any bytes successfully + transferred before a blocking condition occured. The caller + may either choose to ignore this information, or resume + the copy later accounting for bytes transferred. + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not delivered + + TRUE - if the data is being delivered + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + PVACB Vacb; + PBCB Bcb; + PVACB ActiveVacb; + ULONG ActivePage; + ULONG PageIsDirty; + ULONG SavedState; + KIRQL OldIrql; + NTSTATUS Status; + ULONG OriginalLength = Length; + ULONG PageCount = COMPUTE_PAGES_SPANNED(((PVOID)FileOffset->LowPart), Length); + PETHREAD Thread = PsGetCurrentThread(); + BOOLEAN GotAMiss = FALSE; + + DebugTrace(+1, me, "CcCopyRead\n", 0 ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + // + // Get pointer to shared and private cache maps + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // Check for read past file size, the caller must filter this case out. + // + + ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length) <= SharedCacheMap->FileSize.QuadPart ); + + // + // If read ahead is enabled, then do the read ahead here so it + // overlaps with the copy (otherwise we will do it below). + // Note that we are assuming that we will not get ahead of our + // current transfer - if read ahead is working it should either + // already be in memory or else underway. + // + + if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) { + CcScheduleReadAhead( FileObject, FileOffset, Length ); + } + + FOffset = *FileOffset; + + // + // Increment performance counters + // + + if (Wait) { + HOT_STATISTIC(CcCopyReadWait) += 1; + + // + // This is not an exact solution, but when IoPageRead gets a miss, + // it cannot tell whether it was CcCopyRead or CcMdlRead, but since + // the miss should occur very soon, by loading the pointer here + // probably the right counter will get incremented, and in any case, + // we hope the errrors average out! + // + + CcMissCounter = &CcCopyReadWaitMiss; + + } else { + HOT_STATISTIC(CcCopyReadNoWait) += 1; + } + + // + // See if we have an active Vacb, that we can just copy to. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + if (ActiveVacb != NULL) { + + if ((ULONG)(FOffset.QuadPart >> VACB_OFFSET_SHIFT) == (ActivePage >> (VACB_OFFSET_SHIFT - PAGE_SHIFT))) { + + ULONG LengthToCopy = VACB_MAPPING_GRANULARITY - (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)); + + if (SharedCacheMap->NeedToZero != NULL) { + + PVOID NeedToZero; + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + // + // Note that the NeedToZero could be cleared, since we + // tested it without the spinlock. + // + + NeedToZero = SharedCacheMap->NeedToZero; + if (NeedToZero != NULL) { + + RtlZeroMemory( NeedToZero, PAGE_SIZE - ((((ULONG)NeedToZero - 1) & (PAGE_SIZE - 1)) + 1) ); + SharedCacheMap->NeedToZero = NULL; + } + + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + + if (NeedToZero != NULL) { + MmUnlockCachedPage( (PVOID)((PCHAR)NeedToZero - 1) ); + } + } + + // + // Reduce LengthToCopy if it is greater than our caller's length. + // + + if (LengthToCopy > Length) { + LengthToCopy = Length; + } + + // + // Copy the data to the user buffer. + // + + try { + + MmSetPageFaultReadAhead( Thread, PageCount - 1 ); + RtlCopyBytes( Buffer, + (PVOID)((PCHAR)ActiveVacb->BaseAddress + + (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1))), + LengthToCopy ); + + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + MmResetPageFaultReadAhead( Thread, SavedState ); + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Now adjust FOffset and Length by what we copied. + // + + Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy); + FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)LengthToCopy; + Length -= LengthToCopy; + + } + + // + // If that was all the data, then remember the Vacb + // + + if (Length == 0) { + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Otherwise we must free it because we will map other vacbs below. + // + + } else { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + } + + // + // Not all of the transfer will come back at once, so we have to loop + // until the entire transfer is complete. + // + + while (Length != 0) { + + ULONG ReceivedLength; + LARGE_INTEGER BeyondLastByte; + + // + // Call local routine to Map or Access the file data, then move the data, + // then call another local routine to free the data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + // Note that this call may result in an exception, however, if it + // does no Bcb is returned and this routine has absolutely no + // cleanup to perform. Therefore, we do not have a try-finally + // and we allow the possibility that we will simply be unwound + // without notice. + // + + if (Wait) { + + CacheBuffer = CcGetVirtualAddress( SharedCacheMap, + FOffset, + &Vacb, + &ReceivedLength ); + + BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength; + + } else if (!CcPinFileData( FileObject, + &FOffset, + Length, + TRUE, + FALSE, + FALSE, + &Bcb, + &CacheBuffer, + &BeyondLastByte )) { + + DebugTrace(-1, me, "CcCopyRead -> FALSE\n", 0 ); + + HOT_STATISTIC(CcCopyReadNoWaitMiss) += 1; + + // + // Enable ReadAhead if we missed. + // + + PrivateCacheMap->ReadAheadEnabled = TRUE; + + return FALSE; + + } else { + + // + // Calculate how much data is described by Bcb starting at our desired + // file offset. + // + + ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart); + } + + // + // If we got more than we need, make sure to only transfer + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + // + // It is possible for the user buffer to become no longer accessible + // since it was last checked by the I/O system. If we fail to access + // the buffer we must raise a status that the caller's exception + // filter considers as "expected". Also we unmap the Bcb here, since + // we otherwise would have no other reason to put a try-finally around + // this loop. + // + + try { + + ULONG PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, + ReceivedLength ) - 1; + + // + // We know exactly how much we want to read here, and we do not + // want to read any more in case the caller is doing random access. + // Our read ahead logic takes care of detecting sequential reads, + // and tends to do large asynchronous read aheads. So far we have + // only mapped the data and we have not forced any in. What we + // do now is get into a loop where we copy a page at a time and + // just prior to each move, we tell MM how many additional pages + // we would like to have read in, in the event that we take a + // fault. With this strategy, for cache hits we never make a single + // expensive call to MM to guarantee that the data is in, yet if we + // do take a fault, we are guaranteed to only take one fault because + // we will read all of the data in for the rest of the transfer. + // + // We test first for the multiple page case, to keep the small + // reads faster. + // + + if (PagesToGo != 0) { + + ULONG MoveLength; + ULONG LengthToGo = ReceivedLength; + + while (LengthToGo != 0) { + + MoveLength = (PCHAR)(ROUND_TO_PAGES(((PCHAR)CacheBuffer + 1))) - + (PCHAR)CacheBuffer; + + if (MoveLength > LengthToGo) { + MoveLength = LengthToGo; + } + + // + // Here's hoping that it is cheaper to call Mm to see if + // the page is valid. If not let Mm know how many pages + // we are after before doing the move. + // + + MmSetPageFaultReadAhead( Thread, PagesToGo ); + GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE ); + + RtlCopyBytes( Buffer, CacheBuffer, MoveLength ); + + PagesToGo -= 1; + + LengthToGo -= MoveLength; + Buffer = (PCHAR)Buffer + MoveLength; + CacheBuffer = (PCHAR)CacheBuffer + MoveLength; + } + + // + // Handle the read here that stays on a single page. + // + + } else { + + // + // Here's hoping that it is cheaper to call Mm to see if + // the page is valid. If not let Mm know how many pages + // we are after before doing the move. + // + + MmSetPageFaultReadAhead( Thread, 0 ); + GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE ); + + RtlCopyBytes( Buffer, CacheBuffer, ReceivedLength ); + + Buffer = (PCHAR)Buffer + ReceivedLength; + } + + } + except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + CcMissCounter = &CcThrowAway; + + // + // If we get an exception, then we have to renable page fault + // clustering and unmap on the way out. + // + + MmResetPageFaultReadAhead( Thread, SavedState ); + + + if (Wait) { + CcFreeVirtualAddress( Vacb ); + } else { + CcUnpinFileData( Bcb, TRUE, UNPIN ); + } + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Update number of bytes transferred. + // + + Length -= ReceivedLength; + + // + // Unmap the data now, and calculate length left to transfer. + // + + if (Wait) { + + // + // If there is more to go, just free this vacb. + // + + if (Length != 0) { + + CcFreeVirtualAddress( Vacb ); + + // + // Otherwise save it for the next time through. + // + + } else { + + SetActiveVacb( SharedCacheMap, OldIrql, Vacb, (ULONG)(FOffset.QuadPart >> PAGE_SHIFT), 0 ); + break; + } + + } else { + CcUnpinFileData( Bcb, TRUE, UNPIN ); + } + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data. + // + + FOffset = BeyondLastByte; + } + + MmResetPageFaultReadAhead( Thread, SavedState ); + + CcMissCounter = &CcThrowAway; + + // + // Now enable read ahead if it looks like we got any misses, and do + // the first one. + // + + if (GotAMiss && !PrivateCacheMap->ReadAheadEnabled) { + + PrivateCacheMap->ReadAheadEnabled = TRUE; + CcScheduleReadAhead( FileObject, FileOffset, OriginalLength ); + } + + // + // Now that we have described our desired read ahead, let's + // shift the read history down. + // + + PrivateCacheMap->FileOffset1 = PrivateCacheMap->FileOffset2; + PrivateCacheMap->BeyondLastByte1 = PrivateCacheMap->BeyondLastByte2; + PrivateCacheMap->FileOffset2 = *FileOffset; + PrivateCacheMap->BeyondLastByte2.QuadPart = + FileOffset->QuadPart + (LONGLONG)OriginalLength; + + IoStatus->Status = STATUS_SUCCESS; + IoStatus->Information = OriginalLength; + + DebugTrace(-1, me, "CcCopyRead -> TRUE\n", 0 ); + + return TRUE; +} + + +VOID +CcFastCopyRead ( + IN PFILE_OBJECT FileObject, + IN ULONG FileOffset, + IN ULONG Length, + IN ULONG PageCount, + OUT PVOID Buffer, + OUT PIO_STATUS_BLOCK IoStatus + ) + +/*++ + +Routine Description: + + This routine attempts to copy the specified file data from the cache + into the output buffer, and deliver the correct I/O status. + + This is a faster version of CcCopyRead which only supports 32-bit file + offsets and synchronicity (Wait = TRUE). + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + PageCount - Number of pages spanned by the read. + + Buffer - Pointer to output buffer to which data should be copied. + + IoStatus - Pointer to standard I/O status block to receive the status + for the transfer. (STATUS_SUCCESS guaranteed for cache + hits, otherwise the actual I/O status is returned.) + + Note that even if FALSE is returned, the IoStatus.Information + field will return the count of any bytes successfully + transferred before a blocking condition occured. The caller + may either choose to ignore this information, or resume + the copy later accounting for bytes transferred. + +Return Value: + + None + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + PVACB Vacb; + PVACB ActiveVacb; + ULONG ActivePage; + ULONG PageIsDirty; + ULONG SavedState; + KIRQL OldIrql; + NTSTATUS Status; + LARGE_INTEGER OriginalOffset; + ULONG OriginalLength = Length; + PETHREAD Thread = PsGetCurrentThread(); + BOOLEAN GotAMiss = FALSE; + + DebugTrace(+1, me, "CcFastCopyRead\n", 0 ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + // + // Get pointer to shared and private cache maps + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // Check for read past file size, the caller must filter this case out. + // + + ASSERT( (FileOffset + Length) <= SharedCacheMap->FileSize.LowPart ); + + // + // If read ahead is enabled, then do the read ahead here so it + // overlaps with the copy (otherwise we will do it below). + // Note that we are assuming that we will not get ahead of our + // current transfer - if read ahead is working it should either + // already be in memory or else underway. + // + + OriginalOffset.LowPart = FileOffset; + OriginalOffset.HighPart = 0; + + if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) { + CcScheduleReadAhead( FileObject, &OriginalOffset, Length ); + } + + // + // This is not an exact solution, but when IoPageRead gets a miss, + // it cannot tell whether it was CcCopyRead or CcMdlRead, but since + // the miss should occur very soon, by loading the pointer here + // probably the right counter will get incremented, and in any case, + // we hope the errrors average out! + // + + CcMissCounter = &CcCopyReadWaitMiss; + + // + // Increment performance counters + // + + HOT_STATISTIC(CcCopyReadWait) += 1; + + // + // See if we have an active Vacb, that we can just copy to. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + if (ActiveVacb != NULL) { + + if ((FileOffset >> VACB_OFFSET_SHIFT) == (ActivePage >> (VACB_OFFSET_SHIFT - PAGE_SHIFT))) { + + ULONG LengthToCopy = VACB_MAPPING_GRANULARITY - (FileOffset & (VACB_MAPPING_GRANULARITY - 1)); + + if (SharedCacheMap->NeedToZero != NULL) { + + PVOID NeedToZero; + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + // + // Note that the NeedToZero could be cleared, since we + // tested it without the spinlock. + // + + NeedToZero = SharedCacheMap->NeedToZero; + if (NeedToZero != NULL) { + + RtlZeroMemory( NeedToZero, PAGE_SIZE - ((((ULONG)NeedToZero - 1) & (PAGE_SIZE - 1)) + 1) ); + SharedCacheMap->NeedToZero = NULL; + } + + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + + if (NeedToZero != NULL) { + MmUnlockCachedPage( (PVOID)((PCHAR)NeedToZero - 1) ); + } + } + + // + // Reduce LengthToCopy if it is greater than our caller's length. + // + + if (LengthToCopy > Length) { + LengthToCopy = Length; + } + + // + // Copy the data to the user buffer. + // + + try { + + MmSetPageFaultReadAhead( Thread, PageCount - 1 ); + RtlCopyBytes( Buffer, + (PVOID)((PCHAR)ActiveVacb->BaseAddress + + (FileOffset & (VACB_MAPPING_GRANULARITY - 1))), + LengthToCopy ); + + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + MmResetPageFaultReadAhead( Thread, SavedState ); + + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Now adjust FileOffset and Length by what we copied. + // + + Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy); + FileOffset += LengthToCopy; + Length -= LengthToCopy; + } + + // + // If that was all the data, then remember the Vacb + // + + if (Length == 0) { + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Otherwise we must free it because we will map other vacbs below. + // + + } else { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + } + + // + // Not all of the transfer will come back at once, so we have to loop + // until the entire transfer is complete. + // + + FOffset.HighPart = 0; + FOffset.LowPart = FileOffset; + + while (Length != 0) { + + ULONG ReceivedLength; + ULONG BeyondLastByte; + + // + // Call local routine to Map or Access the file data, then move the data, + // then call another local routine to free the data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + // Note that this call may result in an exception, however, if it + // does no Bcb is returned and this routine has absolutely no + // cleanup to perform. Therefore, we do not have a try-finally + // and we allow the possibility that we will simply be unwound + // without notice. + // + + CacheBuffer = CcGetVirtualAddress( SharedCacheMap, + FOffset, + &Vacb, + &ReceivedLength ); + + BeyondLastByte = FOffset.LowPart + ReceivedLength; + + // + // If we got more than we need, make sure to only transfer + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + // + // It is possible for the user buffer to become no longer accessible + // since it was last checked by the I/O system. If we fail to access + // the buffer we must raise a status that the caller's exception + // filter considers as "expected". Also we unmap the Bcb here, since + // we otherwise would have no other reason to put a try-finally around + // this loop. + // + + try { + + ULONG PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, + ReceivedLength ) - 1; + + // + // We know exactly how much we want to read here, and we do not + // want to read any more in case the caller is doing random access. + // Our read ahead logic takes care of detecting sequential reads, + // and tends to do large asynchronous read aheads. So far we have + // only mapped the data and we have not forced any in. What we + // do now is get into a loop where we copy a page at a time and + // just prior to each move, we tell MM how many additional pages + // we would like to have read in, in the event that we take a + // fault. With this strategy, for cache hits we never make a single + // expensive call to MM to guarantee that the data is in, yet if we + // do take a fault, we are guaranteed to only take one fault because + // we will read all of the data in for the rest of the transfer. + // + // We test first for the multiple page case, to keep the small + // reads faster. + // + + if (PagesToGo != 0) { + + ULONG MoveLength; + ULONG LengthToGo = ReceivedLength; + + while (LengthToGo != 0) { + + MoveLength = (PCHAR)(ROUND_TO_PAGES(((PCHAR)CacheBuffer + 1))) - + (PCHAR)CacheBuffer; + + if (MoveLength > LengthToGo) { + MoveLength = LengthToGo; + } + + // + // Here's hoping that it is cheaper to call Mm to see if + // the page is valid. If not let Mm know how many pages + // we are after before doing the move. + // + + MmSetPageFaultReadAhead( Thread, PagesToGo ); + GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE ); + + RtlCopyBytes( Buffer, CacheBuffer, MoveLength ); + + PagesToGo -= 1; + + LengthToGo -= MoveLength; + Buffer = (PCHAR)Buffer + MoveLength; + CacheBuffer = (PCHAR)CacheBuffer + MoveLength; + } + + // + // Handle the read here that stays on a single page. + // + + } else { + + // + // Here's hoping that it is cheaper to call Mm to see if + // the page is valid. If not let Mm know how many pages + // we are after before doing the move. + // + + MmSetPageFaultReadAhead( Thread, 0 ); + GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE ); + + RtlCopyBytes( Buffer, CacheBuffer, ReceivedLength ); + + Buffer = (PCHAR)Buffer + ReceivedLength; + } + } + except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + CcMissCounter = &CcThrowAway; + + // + // If we get an exception, then we have to renable page fault + // clustering and unmap on the way out. + // + + MmResetPageFaultReadAhead( Thread, SavedState ); + + + CcFreeVirtualAddress( Vacb ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Update number of bytes transferred. + // + + Length -= ReceivedLength; + + // + // Unmap the data now, and calculate length left to transfer. + // + + if (Length != 0) { + + // + // If there is more to go, just free this vacb. + // + + CcFreeVirtualAddress( Vacb ); + + } else { + + // + // Otherwise save it for the next time through. + // + + SetActiveVacb( SharedCacheMap, OldIrql, Vacb, (FOffset.LowPart >> PAGE_SHIFT), 0 ); + break; + } + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data. + // + + FOffset.LowPart = BeyondLastByte; + } + + MmResetPageFaultReadAhead( Thread, SavedState ); + + CcMissCounter = &CcThrowAway; + + // + // Now enable read ahead if it looks like we got any misses, and do + // the first one. + // + + if (GotAMiss && !PrivateCacheMap->ReadAheadEnabled) { + + PrivateCacheMap->ReadAheadEnabled = TRUE; + CcScheduleReadAhead( FileObject, &OriginalOffset, OriginalLength ); + } + + // + // Now that we have described our desired read ahead, let's + // shift the read history down. + // + + PrivateCacheMap->FileOffset1.LowPart = PrivateCacheMap->FileOffset2.LowPart; + PrivateCacheMap->BeyondLastByte1.LowPart = PrivateCacheMap->BeyondLastByte2.LowPart; + PrivateCacheMap->FileOffset2.LowPart = OriginalOffset.LowPart; + PrivateCacheMap->BeyondLastByte2.LowPart = OriginalOffset.LowPart + OriginalLength; + + IoStatus->Status = STATUS_SUCCESS; + IoStatus->Information = OriginalLength; + + DebugTrace(-1, me, "CcFastCopyRead -> VOID\n", 0 ); +} + + +BOOLEAN +CcCopyWrite ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Wait, + IN PVOID Buffer + ) + +/*++ + +Routine Description: + + This routine attempts to copy the specified file data from the specified + buffer into the Cache, and deliver the correct I/O status. It is *not* + safe to call this routine from Dpc level. + + If the caller does not want to block (such as for disk I/O), then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to receive all of the requested data without + blocking, then this routine will return FALSE. However, if the + correct space is immediately accessible in the cache and no blocking is + required, this routine copies the data and returns TRUE. + + If the caller supplies Wait as TRUE, then this routine is guaranteed + to copy the data and return TRUE. If the correct space is immediately + accessible in the cache, then no blocking will occur. Otherwise, + the necessary work will be initiated to read and/or free cache data, + and the caller will be blocked until the data can be received. + + File system Fsd's should typically supply Wait = TRUE if they are + processing a synchronous I/O requests, or Wait = FALSE if they are + processing an asynchronous request. + + File system or Server Fsp threads should supply Wait = TRUE. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file to receive the data. + + Length - Length of data in bytes. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + + Buffer - Pointer to input buffer from which data should be copied. + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not copied. + + TRUE - if the data has been copied. + +Raises: + + STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. + This can only occur if Wait was specified as TRUE. (If Wait is + specified as FALSE, and an allocation failure occurs, this + routine simply returns FALSE.) + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVACB ActiveVacb; + ULONG ActivePage; + PVOID ActiveAddress; + ULONG PageIsDirty; + KIRQL OldIrql; + NTSTATUS Status; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + PBCB Bcb; + ULONG ZeroFlags; + LARGE_INTEGER Temp; + + DebugTrace(+1, me, "CcCopyWrite\n", 0 ); + + // + // If the caller specified Wait == FALSE, but the FileObject is WriteThrough, + // then we need to just get out. + // + + if ((FileObject->Flags & FO_WRITE_THROUGH) && !Wait) { + + DebugTrace(-1, me, "CcCopyWrite->FALSE (WriteThrough && !Wait)\n", 0 ); + + return FALSE; + } + + // + // Get pointer to shared cache map + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + FOffset = *FileOffset; + + // + // See if we have an active Vacb, that we can just copy to. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + if (ActiveVacb != NULL) { + + // + // See if the request starts in the ActivePage. WriteThrough requests must + // go the longer route through CcMapAndCopy, where WriteThrough flushes are + // implemented. + // + + if (((ULONG)(FOffset.QuadPart >> PAGE_SHIFT) == ActivePage) && (Length != 0) && + !FlagOn( FileObject->Flags, FO_WRITE_THROUGH )) { + + ULONG LengthToCopy = PAGE_SIZE - (FOffset.LowPart & (PAGE_SIZE - 1)); + + // + // Reduce LengthToCopy if it is greater than our caller's length. + // + + if (LengthToCopy > Length) { + LengthToCopy = Length; + } + + // + // Copy the data to the user buffer. + // + + try { + + // + // If we are copying to a page that is locked down, then + // we have to do it under our spinlock, and update the + // NeedToZero field. + // + + OldIrql = 0xFF; + + CacheBuffer = (PVOID)((PCHAR)ActiveVacb->BaseAddress + + (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1))); + + if (SharedCacheMap->NeedToZero != NULL) { + + // + // The FastLock may not write our "flag". + // + + OldIrql = 0; + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + // + // Note that the NeedToZero could be cleared, since we + // tested it without the spinlock. + // + + ActiveAddress = SharedCacheMap->NeedToZero; + if ((ActiveAddress != NULL) && + (((PCHAR)CacheBuffer + LengthToCopy) > (PCHAR)ActiveAddress)) { + + // + // If we are skipping some bytes in the page, then we need + // to zero them. + // + + if ((PCHAR)CacheBuffer > (PCHAR)ActiveAddress) { + + RtlZeroMemory( ActiveAddress, (PCHAR)CacheBuffer - (PCHAR)ActiveAddress ); + } + SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + LengthToCopy); + } + + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + } + + RtlCopyBytes( CacheBuffer, Buffer, LengthToCopy ); + + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + // + // If we failed to overwrite the uninitialized data, + // zero it now (we cannot safely restore NeedToZero). + // + + if (OldIrql != 0xFF) { + RtlZeroBytes( CacheBuffer, LengthToCopy ); + } + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Now adjust FOffset and Length by what we copied. + // + + Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy); + FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)LengthToCopy; + Length -= LengthToCopy; + + // + // If that was all the data, then get outski... + // + + if (Length == 0) { + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY ); + return TRUE; + } + + // + // Remember that the page is dirty now. + // + + PageIsDirty |= ACTIVE_PAGE_IS_DIRTY; + } + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Else someone else could have the active page, and may want to zero + // the range we plan to write! + // + + } else if (SharedCacheMap->NeedToZero != NULL) { + + CcFreeActiveVacb( SharedCacheMap, NULL, 0, FALSE ); + } + + // + // At this point we can calculate the ZeroFlags. + // + + // + // We can always zero middle pages, if any. + // + + ZeroFlags = ZERO_MIDDLE_PAGES; + + if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) && + (Length >= PAGE_SIZE)) { + ZeroFlags |= ZERO_FIRST_PAGE; + } + + if (((FOffset.LowPart + Length) & (PAGE_SIZE - 1)) == 0) { + ZeroFlags |= ZERO_LAST_PAGE; + } + + Temp = FOffset; + Temp.LowPart &= ~(PAGE_SIZE -1); + Temp.QuadPart = ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.QuadPart - + Temp.QuadPart; + + if (Temp.QuadPart <= 0) { + ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } else if ((Temp.HighPart == 0) && (Temp.LowPart <= PAGE_SIZE)) { + ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } + + // + // Call a routine to map and copy the data in Mm and get out. + // + + if (Wait) { + + CcMapAndCopy( SharedCacheMap, + Buffer, + &FOffset, + Length, + ZeroFlags, + BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH )); + + return TRUE; + } + + // + // The rest of this routine is the Wait == FALSE case. + // + // Not all of the transfer will come back at once, so we have to loop + // until the entire transfer is complete. + // + + while (Length != 0) { + + ULONG ReceivedLength; + LARGE_INTEGER BeyondLastByte; + + if (!CcPinFileData( FileObject, + &FOffset, + Length, + FALSE, + TRUE, + FALSE, + &Bcb, + &CacheBuffer, + &BeyondLastByte )) { + + DebugTrace(-1, me, "CcCopyWrite -> FALSE\n", 0 ); + + return FALSE; + + } else { + + // + // Calculate how much data is described by Bcb starting at our desired + // file offset. + // + + ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart); + + // + // If we got more than we need, make sure to only transfer + // the right amount. + // + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + } + + // + // It is possible for the user buffer to become no longer accessible + // since it was last checked by the I/O system. If we fail to access + // the buffer we must raise a status that the caller's exception + // filter considers as "expected". Also we unmap the Bcb here, since + // we otherwise would have no other reason to put a try-finally around + // this loop. + // + + try { + + RtlCopyBytes( CacheBuffer, Buffer, ReceivedLength ); + + CcSetDirtyPinnedData( Bcb, NULL ); + CcUnpinFileData( Bcb, FALSE, UNPIN ); + } + except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + CcUnpinFileData( Bcb, TRUE, UNPIN ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + + ExRaiseStatus(FsRtlNormalizeNtstatus( Status, STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data and adjust the Buffer and Length. + // + + FOffset = BeyondLastByte; + Buffer = (PCHAR)Buffer + ReceivedLength; + Length -= ReceivedLength; + } + + DebugTrace(-1, me, "CcCopyWrite -> TRUE\n", 0 ); + + return TRUE; +} + + +VOID +CcFastCopyWrite ( + IN PFILE_OBJECT FileObject, + IN ULONG FileOffset, + IN ULONG Length, + IN PVOID Buffer + ) + +/*++ + +Routine Description: + + This routine attempts to copy the specified file data from the specified + buffer into the Cache, and deliver the correct I/O status. + + This is a faster version of CcCopyWrite which only supports 32-bit file + offsets and synchronicity (Wait = TRUE) and no Write Through. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file to receive the data. + + Length - Length of data in bytes. + + Buffer - Pointer to input buffer from which data should be copied. + +Return Value: + + None + +Raises: + + STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. + This can only occur if Wait was specified as TRUE. (If Wait is + specified as FALSE, and an allocation failure occurs, this + routine simply returns FALSE.) + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVOID CacheBuffer; + PVACB ActiveVacb; + ULONG ActivePage; + PVOID ActiveAddress; + ULONG PageIsDirty; + KIRQL OldIrql; + NTSTATUS Status; + ULONG ZeroFlags; + ULONG ValidDataLength; + LARGE_INTEGER FOffset; + + DebugTrace(+1, me, "CcFastCopyWrite\n", 0 ); + + // + // Get pointer to shared cache map and a copy of valid data length + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // See if we have an active Vacb, that we can just copy to. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + if (ActiveVacb != NULL) { + + // + // See if the request starts in the ActivePage. WriteThrough requests must + // go the longer route through CcMapAndCopy, where WriteThrough flushes are + // implemented. + // + + if (((FileOffset >> PAGE_SHIFT) == ActivePage) && (Length != 0) && + !FlagOn( FileObject->Flags, FO_WRITE_THROUGH )) { + + ULONG LengthToCopy = PAGE_SIZE - (FileOffset & (PAGE_SIZE - 1)); + + // + // Reduce LengthToCopy if it is greater than our caller's length. + // + + if (LengthToCopy > Length) { + LengthToCopy = Length; + } + + // + // Copy the data to the user buffer. + // + + try { + + // + // If we are copying to a page that is locked down, then + // we have to do it under our spinlock, and update the + // NeedToZero field. + // + + OldIrql = 0xFF; + + CacheBuffer = (PVOID)((PCHAR)ActiveVacb->BaseAddress + + (FileOffset & (VACB_MAPPING_GRANULARITY - 1))); + + if (SharedCacheMap->NeedToZero != NULL) { + + // + // The FastLock may not write our "flag". + // + + OldIrql = 0; + + ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); + + // + // Note that the NeedToZero could be cleared, since we + // tested it without the spinlock. + // + + ActiveAddress = SharedCacheMap->NeedToZero; + if ((ActiveAddress != NULL) && + (((PCHAR)CacheBuffer + LengthToCopy) > (PCHAR)ActiveAddress)) { + + // + // If we are skipping some bytes in the page, then we need + // to zero them. + // + + if ((PCHAR)CacheBuffer > (PCHAR)ActiveAddress) { + + RtlZeroMemory( ActiveAddress, (PCHAR)CacheBuffer - (PCHAR)ActiveAddress ); + } + SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + LengthToCopy); + } + + ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); + } + + RtlCopyBytes( CacheBuffer, Buffer, LengthToCopy ); + + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), + &Status ) ) { + + // + // If we failed to overwrite the uninitialized data, + // zero it now (we cannot safely restore NeedToZero). + // + + if (OldIrql != 0xFF) { + RtlZeroBytes( CacheBuffer, LengthToCopy ); + } + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY ); + + // + // If we got an access violation, then the user buffer went + // away. Otherwise we must have gotten an I/O error trying + // to bring the data in. + // + + if (Status == STATUS_ACCESS_VIOLATION) { + ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); + } + else { + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_IO_ERROR )); + } + } + + // + // Now adjust FileOffset and Length by what we copied. + // + + Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy); + FileOffset += LengthToCopy; + Length -= LengthToCopy; + + // + // If that was all the data, then get outski... + // + + if (Length == 0) { + + SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY ); + return; + } + + // + // Remember that the page is dirty now. + // + + PageIsDirty |= ACTIVE_PAGE_IS_DIRTY; + } + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Else someone else could have the active page, and may want to zero + // the range we plan to write! + // + + } else if (SharedCacheMap->NeedToZero != NULL) { + + CcFreeActiveVacb( SharedCacheMap, NULL, 0, FALSE ); + } + + // + // Set up for call to CcMapAndCopy + // + + FOffset.LowPart = FileOffset; + FOffset.HighPart = 0; + + ValidDataLength = ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.LowPart; + + ASSERT((ValidDataLength == MAXULONG) || + (((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.HighPart == 0)); + + // + // At this point we can calculate the ReadOnly flag for + // the purposes of whether to use the Bcb resource, and + // we can calculate the ZeroFlags. + // + + // + // We can always zero middle pages, if any. + // + + ZeroFlags = ZERO_MIDDLE_PAGES; + + if (((FileOffset & (PAGE_SIZE - 1)) == 0) && + (Length >= PAGE_SIZE)) { + ZeroFlags |= ZERO_FIRST_PAGE; + } + + if (((FileOffset + Length) & (PAGE_SIZE - 1)) == 0) { + ZeroFlags |= ZERO_LAST_PAGE; + } + + if ((FileOffset & ~(PAGE_SIZE - 1)) >= ValidDataLength) { + ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } else if (((FileOffset & ~(PAGE_SIZE - 1)) + PAGE_SIZE) >= ValidDataLength) { + ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } + + // + // Call a routine to map and copy the data in Mm and get out. + // + + CcMapAndCopy( SharedCacheMap, + Buffer, + &FOffset, + Length, + ZeroFlags, + BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH )); + + DebugTrace(-1, me, "CcFastCopyWrite -> VOID\n", 0 ); +} + + +LONG +CcCopyReadExceptionFilter( + IN PEXCEPTION_POINTERS ExceptionPointer, + IN PNTSTATUS ExceptionCode + ) + +/*++ + +Routine Description: + + This routine serves as a exception filter and has the special job of + extracting the "real" I/O error when Mm raises STATUS_IN_PAGE_ERROR + beneath us. + +Arguments: + + ExceptionPointer - A pointer to the exception record that contains + the real Io Status. + + ExceptionCode - A pointer to an NTSTATUS that is to receive the real + status. + +Return Value: + + EXCEPTION_EXECUTE_HANDLER + +--*/ + +{ + *ExceptionCode = ExceptionPointer->ExceptionRecord->ExceptionCode; + + if ( (*ExceptionCode == STATUS_IN_PAGE_ERROR) && + (ExceptionPointer->ExceptionRecord->NumberParameters >= 3) ) { + + *ExceptionCode = ExceptionPointer->ExceptionRecord->ExceptionInformation[2]; + } + + ASSERT( !NT_SUCCESS(*ExceptionCode) ); + + return EXCEPTION_EXECUTE_HANDLER; +} + + +BOOLEAN +CcCanIWrite ( + IN PFILE_OBJECT FileObject, + IN ULONG BytesToWrite, + IN BOOLEAN Wait, + IN UCHAR Retrying + ) + +/*++ + +Routine Description: + + This routine tests whether it is ok to do a write to the cache + or not, according to the Thresholds of dirty bytes and available + pages. The first time this routine is called for a request (Retrying + FALSE), we automatically make the new request queue if there are other + requests in the queue. + + Note that the ListEmpty test is important to prevent small requests from sneaking + in and starving large requests. + +Arguments: + + FileObject - for the file to be written + + BytesToWrite - Number of bytes caller wishes to write to the Cache. + + Wait - TRUE if the caller owns no resources, and can block inside this routine + until it is ok to write. + + Retrying - Specified as FALSE when the request is first received, and + otherwise specified as TRUE if this write has already entered + the queue. Special non-zero value of MAXUCHAR indicates that + we were called within the cache manager with a MasterSpinLock held, + so do not attempt to acquire it here. MAXUCHAR - 1 means we + were called within the Cache Manager with some other spinlock + held. For either of these two special values, we do not touch + the FsRtl header. + +Return Value: + + TRUE if it is ok to write. + FALSE if the caller should defer the write via a call to CcDeferWrite. + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + KEVENT Event; + KIRQL OldIrql; + ULONG PagesToWrite; + BOOLEAN ExceededPerFileThreshold; + DEFERRED_WRITE DeferredWrite; + PSECTION_OBJECT_POINTERS SectionObjectPointers; + + // + // Do a special test here for file objects that keep track of dirty + // pages on a per-file basis. This is used mainly for slow links. + // + + ExceededPerFileThreshold = FALSE; + + PagesToWrite = ((BytesToWrite < 0x40000 ? + BytesToWrite : 0x40000) + (PAGE_SIZE - 1)) / PAGE_SIZE; + + // + // Don't dereference the FsContext field if we were called while holding + // a spinlock. + // + + if ((Retrying >= MAXUCHAR - 1) || + + FlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags, + FSRTL_FLAG_LIMIT_MODIFIED_PAGES)) { + + if (Retrying != MAXUCHAR) { + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + } + + if (((SectionObjectPointers = FileObject->SectionObjectPointer) != NULL) && + ((SharedCacheMap = SectionObjectPointers->SharedCacheMap) != NULL) && + (SharedCacheMap->DirtyPageThreshold != 0) && + (SharedCacheMap->DirtyPages != 0) && + ((PagesToWrite + SharedCacheMap->DirtyPages) > + SharedCacheMap->DirtyPageThreshold)) { + + ExceededPerFileThreshold = TRUE; + } + + if (Retrying != MAXUCHAR) { + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + } + } + + // + // See if it is ok to do the write right now + // + + if ((Retrying || IsListEmpty(&CcDeferredWrites)) + + && + + (CcTotalDirtyPages + PagesToWrite < CcDirtyPageThreshold) + + && + + MmEnoughMemoryForWrite() + + && + + !ExceededPerFileThreshold) { + + return TRUE; + + // + // Otherwise, if our caller is synchronous, we will just wait here. + // + + } + + if (IsListEmpty(&CcDeferredWrites) ) { + + // + // Get a write scan to occur NOW + // + + KeSetTimer( &LazyWriter.ScanTimer, CcNoDelay, &LazyWriter.ScanDpc ); + } + + if (Wait) { + + KeInitializeEvent( &Event, NotificationEvent, FALSE ); + + // + // Fill in the block. Note that we can access the Fsrtl Common Header + // even if it's paged because Wait will be FALSE if called from + // within the cache. + // + + DeferredWrite.NodeTypeCode = CACHE_NTC_DEFERRED_WRITE; + DeferredWrite.NodeByteSize = sizeof(DEFERRED_WRITE); + DeferredWrite.FileObject = FileObject; + DeferredWrite.BytesToWrite = BytesToWrite; + DeferredWrite.Event = &Event; + DeferredWrite.LimitModifiedPages = BooleanFlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags, + FSRTL_FLAG_LIMIT_MODIFIED_PAGES); + + // + // Now insert at the appropriate end of the list + // + + if (Retrying) { + (VOID)ExInterlockedInsertHeadList( &CcDeferredWrites, + &DeferredWrite.DeferredWriteLinks, + &CcDeferredWriteSpinLock ); + } else { + (VOID)ExInterlockedInsertTailList( &CcDeferredWrites, + &DeferredWrite.DeferredWriteLinks, + &CcDeferredWriteSpinLock ); + } + + while (TRUE) { + + // + // Now since we really didn't synchronize anything but the insertion, + // we call the post routine to make sure that in some wierd case we + // do not leave anyone hanging with no dirty bytes for the Lazy Writer. + // + + CcPostDeferredWrites(); + + // + // Finally wait until the event is signalled and we can write + // and return to tell the guy he can write. + // + + if (KeWaitForSingleObject( &Event, + Executive, + KernelMode, + FALSE, + &CcIdleDelay ) == STATUS_SUCCESS) { + + + return TRUE; + } + } + + } else { + return FALSE; + } +} + + +VOID +CcDeferWrite ( + IN PFILE_OBJECT FileObject, + IN PCC_POST_DEFERRED_WRITE PostRoutine, + IN PVOID Context1, + IN PVOID Context2, + IN ULONG BytesToWrite, + IN BOOLEAN Retrying + ) + +/*++ + +Routine Description: + + This routine may be called to have the Cache Manager defer posting + of a write until the Lazy Writer makes some progress writing, or + there are more available pages. A file system would normally call + this routine after receiving FALSE from CcCanIWrite, and preparing + the request to be posted. + +Arguments: + + FileObject - for the file to be written + + PostRoutine - Address of the PostRoutine that the Cache Manager can + call to post the request when conditions are right. Note + that it is possible that this routine will be called + immediately from this routine. + + Context1 - First context parameter for the post routine. + + Context2 - Secont parameter for the post routine. + + BytesToWrite - Number of bytes that the request is trying to write + to the cache. + + Retrying - Supplied as FALSE if the request is being posted for the + first time, TRUE otherwise. + +Return Value: + + None + +--*/ + +{ + PDEFERRED_WRITE DeferredWrite; + + // + // Attempt to allocate a deferred write block, and if we do not get + // one, just post it immediately rather than gobbling up must succeed + // pool. + // + + DeferredWrite = ExAllocatePool( NonPagedPool, sizeof(DEFERRED_WRITE) ); + + if (DeferredWrite == NULL) { + (*PostRoutine)( Context1, Context2 ); + return; + } + + // + // Fill in the block. + // + + DeferredWrite->NodeTypeCode = CACHE_NTC_DEFERRED_WRITE; + DeferredWrite->NodeByteSize = sizeof(DEFERRED_WRITE); + DeferredWrite->FileObject = FileObject; + DeferredWrite->BytesToWrite = BytesToWrite; + DeferredWrite->Event = NULL; + DeferredWrite->PostRoutine = PostRoutine; + DeferredWrite->Context1 = Context1; + DeferredWrite->Context2 = Context2; + DeferredWrite->LimitModifiedPages = BooleanFlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags, + FSRTL_FLAG_LIMIT_MODIFIED_PAGES); + + // + // Now insert at the appropriate end of the list + // + + if (Retrying) { + (VOID)ExInterlockedInsertHeadList( &CcDeferredWrites, + &DeferredWrite->DeferredWriteLinks, + &CcDeferredWriteSpinLock ); + } else { + (VOID)ExInterlockedInsertTailList( &CcDeferredWrites, + &DeferredWrite->DeferredWriteLinks, + &CcDeferredWriteSpinLock ); + } + + // + // Now since we really didn't synchronize anything but the insertion, + // we call the post routine to make sure that in some wierd case we + // do not leave anyone hanging with no dirty bytes for the Lazy Writer. + // + + CcPostDeferredWrites(); +} + + +VOID +CcPostDeferredWrites ( + ) + +/*++ + +Routine Description: + + This routine may be called to see if any deferred writes should be posted + now, and to post them. It should be called any time the status of the + queue may have changed, such as when a new entry has been added, or the + Lazy Writer has finished writing out buffers and set them clean. + +Arguments: + + None + +Return Value: + + None + +--*/ + +{ + PDEFERRED_WRITE DeferredWrite; + ULONG TotalBytesLetLoose = 0; + KIRQL OldIrql; + + do { + + // + // Initially clear the deferred write structure pointer + // and syncrhronize. + // + + DeferredWrite = NULL; + + ExAcquireSpinLock( &CcDeferredWriteSpinLock, &OldIrql ); + + // + // If the list is empty we are done. + // + + if (!IsListEmpty(&CcDeferredWrites)) { + + PLIST_ENTRY Entry; + + Entry = CcDeferredWrites.Flink; + + while (Entry != &CcDeferredWrites) { + + DeferredWrite = CONTAINING_RECORD( Entry, + DEFERRED_WRITE, + DeferredWriteLinks ); + + // + // Check for a paranoid case here that TotalBytesLetLoose + // wraps. We stop processing the list at this time. + // + + TotalBytesLetLoose += DeferredWrite->BytesToWrite; + + if (TotalBytesLetLoose < DeferredWrite->BytesToWrite) { + + DeferredWrite = NULL; + break; + } + + // + // If it is now ok to post this write, remove him from + // the list. + // + + if (CcCanIWrite( DeferredWrite->FileObject, + TotalBytesLetLoose, + FALSE, + MAXUCHAR - 1 )) { + + RemoveEntryList( &DeferredWrite->DeferredWriteLinks ); + break; + + // + // Otherwise, it is time to stop processing the list, so + // we clear the pointer again unless we throttled this item + // because of a private dirty page limit. + // + + } else { + + // + // If this was a private throttle, skip over it and + // remove its byte count from the running total. + // + + if (DeferredWrite->LimitModifiedPages) { + + Entry = Entry->Flink; + TotalBytesLetLoose -= DeferredWrite->BytesToWrite; + DeferredWrite = NULL; + continue; + + } else { + + DeferredWrite = NULL; + + break; + } + } + } + } + + ExReleaseSpinLock( &CcDeferredWriteSpinLock, OldIrql ); + + // + // If we got something, set the event or call the post routine + // and deallocate the structure. + // + + if (DeferredWrite != NULL) { + + if (DeferredWrite->Event != NULL) { + + KeSetEvent( DeferredWrite->Event, 0, FALSE ); + + } else { + + (*DeferredWrite->PostRoutine)( DeferredWrite->Context1, + DeferredWrite->Context2 ); + ExFreePool( DeferredWrite ); + } + } + + // + // Loop until we find no more work to do. + // + + } while (DeferredWrite != NULL); +} diff --git a/private/ntos/cache/dirs b/private/ntos/cache/dirs new file mode 100644 index 000000000..a2a38f0fd --- /dev/null +++ b/private/ntos/cache/dirs @@ -0,0 +1,24 @@ +!IF 0 + +Copyright (c) 1989 Microsoft Corporation + +Module Name: + + dirs. + +Abstract: + + This file specifies the subdirectories of the current directory that + contain component makefiles. + + +Author: + + +NOTE: Commented description of this file is in \nt\bak\bin\dirs.tpl + +!ENDIF + +DIRS=up + +OPTIONAL_DIRS=mp diff --git a/private/ntos/cache/fssup.c b/private/ntos/cache/fssup.c new file mode 100644 index 000000000..82990558a --- /dev/null +++ b/private/ntos/cache/fssup.c @@ -0,0 +1,3343 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + fssup.c + +Abstract: + + This module implements the File System support routines for the + Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// The Bug check file id for this module +// + +#define BugCheckFileId (CACHE_BUG_CHECK_FSSUP) + +// +// Define our debug constant +// + +#define me 0x00000001 + +// +// For your debugging pleasure, if the flag doesn't move! (Currently not used) +// + +#define IsSyscacheFile(FO) (((FO) != NULL) && \ + (*(PUSHORT)(FO)->FsContext == 0X705) && \ + FlagOn(*(PULONG)((PCHAR)(FO)->FsContext + 0x48), 0x80000000)) + +extern POBJECT_TYPE IoFileObjectType; +extern ULONG MmLargeSystemCache; + +VOID +CcUnmapAndPurge( + IN PSHARED_CACHE_MAP SharedCacheMap + ); + +VOID +CcPurgeAndClearCacheSection ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset + ); + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(INIT,CcInitializeCacheManager) +#endif + + +BOOLEAN +CcInitializeCacheManager ( + ) + +/*++ + +Routine Description: + + This routine must be called during system initialization before the + first call to any file system, to allow the Cache Manager to initialize + its global data structures. This routine has no dependencies on other + system components being initialized. + +Arguments: + + None + +Return Value: + + TRUE if initialization was successful + +--*/ + +{ + CLONG i; + USHORT NumberOfItems; + PWORK_QUEUE_ITEM WorkItem; + +#ifdef CCDBG_LOCK + KeInitializeSpinLock( &CcDebugTraceLock ); +#endif + +#if DBG + CcBcbCount = 0; + InitializeListHead( &CcBcbList ); + KeInitializeSpinLock( &CcBcbSpinLock ); +#endif + + // + // Initialize shared cache map list structures + // + + KeInitializeSpinLock( &CcMasterSpinLock ); + InitializeListHead( &CcCleanSharedCacheMapList ); + InitializeListHead( &CcDirtySharedCacheMapList.SharedCacheMapLinks ); + CcDirtySharedCacheMapList.Flags = IS_CURSOR; + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &CcLazyWriterCursor.SharedCacheMapLinks ); + CcLazyWriterCursor.Flags = IS_CURSOR; + + // + // Initialize worker thread structures + // + + KeInitializeSpinLock( &CcWorkQueueSpinlock ); + InitializeListHead( &CcIdleWorkerThreadList ); + InitializeListHead( &CcExpressWorkQueue ); + InitializeListHead( &CcRegularWorkQueue ); + + // + // Set the number of worker threads based on the system size. + // + + CcCapturedSystemSize = MmQuerySystemSize(); + if (CcNumberWorkerThreads == 0) { + + switch (CcCapturedSystemSize) { + case MmSmallSystem: + CcNumberWorkerThreads = ExCriticalWorkerThreads - 1; + CcDirtyPageThreshold = MmNumberOfPhysicalPages / 8; + break; + + case MmMediumSystem: + CcNumberWorkerThreads = ExCriticalWorkerThreads - 1; + CcDirtyPageThreshold = MmNumberOfPhysicalPages / 4; + break; + + case MmLargeSystem: + CcNumberWorkerThreads = ExCriticalWorkerThreads - 2; + CcDirtyPageThreshold = MmNumberOfPhysicalPages / 4 + + MmNumberOfPhysicalPages / 8; + +#if 0 + // + // Use more memory if we are a large server. + // + + if ((MmLargeSystemCache != 0) && + (CcDirtyPageThreshold < (MmNumberOfPhysicalPages - (0xE00000 / PAGE_SIZE)))) { + + CcDirtyPageThreshold = MmNumberOfPhysicalPages - (0xE00000 / PAGE_SIZE); + } +#endif + break; + + default: + CcNumberWorkerThreads = 1; + CcDirtyPageThreshold = MmNumberOfPhysicalPages / 8; + } + +// CcDirtyPageThreshold = (2*1024*1024)/PAGE_SIZE; + + if (MmSystemCacheWs.MaximumWorkingSetSize > ((4*1024*1024)/PAGE_SIZE)) { + CcDirtyPageThreshold = MmSystemCacheWs.MaximumWorkingSetSize - + ((2*1024*1024)/PAGE_SIZE); + } + + CcDirtyPageTarget = CcDirtyPageThreshold / 2 + + CcDirtyPageThreshold / 4; + } + + // + // Now allocate and initialize the above number of worker thread + // items. + // + + for (i = 0; i < CcNumberWorkerThreads; i++) { + + WorkItem = ExAllocatePool( NonPagedPool, sizeof(WORK_QUEUE_ITEM) ); + + // + // Initialize the work queue item and insert in our queue + // of potential worker threads. + // + + ExInitializeWorkItem( WorkItem, CcWorkerThread, WorkItem ); + InsertTailList( &CcIdleWorkerThreadList, &WorkItem->List ); + } + + // + // Initialize the Lazy Writer thread structure, and start him up. + // + + RtlZeroMemory( &LazyWriter, sizeof(LAZY_WRITER) ); + + KeInitializeSpinLock( &CcWorkQueueSpinlock ); + InitializeListHead( &LazyWriter.WorkQueue ); + + // + // Store process address + // + + LazyWriter.OurProcess = PsGetCurrentProcess(); + + // + // Initialize the Scan Dpc and Timer. + // + + KeInitializeDpc( &LazyWriter.ScanDpc, &CcScanDpc, NULL ); + KeInitializeTimer( &LazyWriter.ScanTimer ); + + // + // Now initialize the lookaside list for allocating Work Queue entries. + // + + switch ( CcCapturedSystemSize ) { + + // + // ~512 bytes + // + + case MmSmallSystem : + NumberOfItems = 32; + break; + + // + // ~1k bytes + // + + case MmMediumSystem : + NumberOfItems = 64; + break; + + // + // ~2k bytes + // + + case MmLargeSystem : + NumberOfItems = 128; + if (MmIsThisAnNtAsSystem()) { + NumberOfItems += 128; + } + + break; + } + + ExInitializeNPagedLookasideList( &CcTwilightLookasideList, + NULL, + NULL, + 0, + sizeof( WORK_QUEUE_ENTRY ), + 'kwcC', + NumberOfItems ); + + // + // Now initialize the Bcb zone + // + + { + PVOID InitialSegment; + ULONG InitialSegmentSize; + ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7; + ULONG NumberOfItems; + + + switch ( CcCapturedSystemSize ) { + + // + // ~1.5k bytes + // + + case MmSmallSystem : + NumberOfItems = 8; + break; + + // + // ~4k bytes + // + + case MmMediumSystem : + NumberOfItems = 20; + break; + + // + // ~12k bytes + // + + case MmLargeSystem : + NumberOfItems = 64; + break; + } + + InitialSegmentSize = sizeof(ZONE_SEGMENT_HEADER) + RoundedBcbSize * NumberOfItems; + + // + // Allocate the initial allocation for the zone. If we cannot get it, + // something must really be wrong, so we will just bugcheck. + // + + if ((InitialSegment = ExAllocatePool( NonPagedPool, + InitialSegmentSize)) == NULL) { + + CcBugCheck( 0, 0, 0 ); + } + + if (!NT_SUCCESS(ExInitializeZone( &LazyWriter.BcbZone, + RoundedBcbSize, + InitialSegment, + InitialSegmentSize ))) { + CcBugCheck( 0, 0, 0 ); + } + } + + // + // Initialize the Deferred Write List. + // + + KeInitializeSpinLock( &CcDeferredWriteSpinLock ); + InitializeListHead( &CcDeferredWrites ); + + // + // Initialize the Vacbs. + // + + CcInitializeVacbs(); + + return TRUE; +} + + +VOID +CcInitializeCacheMap ( + IN PFILE_OBJECT FileObject, + IN PCC_FILE_SIZES FileSizes, + IN BOOLEAN PinAccess, + IN PCACHE_MANAGER_CALLBACKS Callbacks, + IN PVOID LazyWriteContext + ) + +/*++ + +Routine Description: + + This routine is intended to be called by File Systems only. It + initializes the cache maps for data caching. It should be called + every time a file is open or created, and NO_INTERMEDIATE_BUFFERING + was specified as FALSE. + +Arguments: + + FileObject - A pointer to the newly-created file object. + + FileSizes - A pointer to AllocationSize, FileSize and ValidDataLength + for the file. ValidDataLength should contain MAXLONGLONG if + valid data length tracking and callbacks are not desired. + + PinAccess - FALSE if file will be used exclusively for Copy and Mdl + access, or TRUE if file will be used for Pin access. + (Files for Pin access are not limited in size as the caller + must access multiple areas of the file at once.) + + Callbacks - Structure of callbacks used by the Lazy Writer + + LazyWriteContext - Parameter to be passed in to above routine. + +Return Value: + + None. If an error occurs, this routine will Raise the status. + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap = NULL; + PVOID CacheMapToFree = NULL; + CC_FILE_SIZES LocalSizes; + BOOLEAN WeSetBeingCreated = FALSE; + BOOLEAN SharedListOwned = FALSE; + BOOLEAN MustUninitialize = FALSE; + BOOLEAN WeCreated = FALSE; + + DebugTrace(+1, me, "CcInitializeCacheMap:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace( 0, me, " FileSizes = %08lx\n", FileSizes ); + + // + // Make a local copy of the passed in file sizes before acquiring + // the spin lock. + // + + LocalSizes = *FileSizes; + + // + // If no FileSize was given, set to one byte before maximizing below. + // + + if (LocalSizes.AllocationSize.QuadPart == 0) { + LocalSizes.AllocationSize.LowPart += 1; + } + + // + // If caller has Write access or will allow write, then round + // size to next create modulo. (***Temp*** there may be too many + // apps that end up allowing shared write, thanks to our Dos heritage, + // to keep that part of the check in.) + // + + if (FileObject->WriteAccess /*|| FileObject->SharedWrite */) { + + LocalSizes.AllocationSize.QuadPart = LocalSizes.AllocationSize.QuadPart + (LONGLONG)(DEFAULT_CREATE_MODULO - 1); + LocalSizes.AllocationSize.LowPart &= ~(DEFAULT_CREATE_MODULO - 1); + + } else { + + LocalSizes.AllocationSize.QuadPart = LocalSizes.AllocationSize.QuadPart + (LONGLONG)(VACB_MAPPING_GRANULARITY - 1); + LocalSizes.AllocationSize.LowPart &= ~(VACB_MAPPING_GRANULARITY - 1); + } + + // + // Do the allocate of the SharedCacheMap, based on an unsafe test, + // while not holding a spinlock. Allocation failures look like we + // never decided to allocate one here! + // + + if (FileObject->SectionObjectPointer->SharedCacheMap == NULL) { + CacheMapToFree = ExAllocatePool( NonPagedPool, sizeof(SHARED_CACHE_MAP) ); + } + + // + // Serialize Creation/Deletion of all Shared CacheMaps + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SharedListOwned = TRUE; + + // + // Insure release of our global resource + // + + try { + + // + // Check for second initialization of same file object + // + + if (FileObject->PrivateCacheMap != NULL) { + + DebugTrace( 0, 0, "CacheMap already initialized\n", 0 ); + try_return( NOTHING ); + } + + // + // Get current Shared Cache Map pointer indirectly off of the file object. + // (The actual pointer is typically in a file system data structure, such + // as an Fcb.) + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // If there is no SharedCacheMap, then we must create a section and + // the SharedCacheMap structure. + // + + if (SharedCacheMap == NULL) { + + // + // After successfully creating the section, allocate the SharedCacheMap. + // + + WeCreated = TRUE; + + if (CacheMapToFree == NULL) { + CacheMapToFree = (PSHARED_CACHE_MAP)ExAllocatePool( NonPagedPool, + sizeof(SHARED_CACHE_MAP) ); + } + + SharedCacheMap = CacheMapToFree; + CacheMapToFree = NULL; + + if (SharedCacheMap == NULL) { + + DebugTrace( 0, 0, "Failed to allocate SharedCacheMap\n", 0 ); + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // Zero the SharedCacheMap and fill in the nonzero portions later. + // + + RtlZeroMemory( SharedCacheMap, sizeof(SHARED_CACHE_MAP) ); + + // + // Now initialize the Shared Cache Map. + // + + SharedCacheMap->NodeTypeCode = CACHE_NTC_SHARED_CACHE_MAP; + SharedCacheMap->NodeByteSize = sizeof(SHARED_CACHE_MAP); + SharedCacheMap->FileSize = LocalSizes.FileSize; + SharedCacheMap->ValidDataLength = + SharedCacheMap->ValidDataGoal = LocalSizes.ValidDataLength; + SharedCacheMap->FileObject = FileObject; + // SharedCacheMap->Section set below + + // + // Initialize the ActiveVacbSpinLock. + // + + KeInitializeSpinLock( &SharedCacheMap->ActiveVacbSpinLock ); + + if (PinAccess) { + SetFlag(SharedCacheMap->Flags, PIN_ACCESS); + } + + // + // If this file has FO_SEQUENTIAL_ONLY set, then remember that + // in the SharedCacheMap. + // + + if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { + SetFlag(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN); + } + + // + // Do the round-robin allocation of the spinlock for the shared + // cache map. Note the manipulation of the next + // counter is safe, since we have the CcMasterSpinLock + // exclusive. + // + + InitializeListHead( &SharedCacheMap->BcbList ); + SharedCacheMap->Callbacks = Callbacks; + SharedCacheMap->LazyWriteContext = LazyWriteContext; + + // + // Initialize the pointer to the uninitialize event chain. + // + + SharedCacheMap->UninitializeEvent = NULL; + + // + // Initialize listhead for all PrivateCacheMaps + // + + InitializeListHead( &SharedCacheMap->PrivateList ); + + // + // Insert the new Shared Cache Map in the global list + // + + InsertTailList( &CcCleanSharedCacheMapList, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Finally, store the pointer to the Shared Cache Map back + // via the indirect pointer in the File Object. + // + + FileObject->SectionObjectPointer->SharedCacheMap = SharedCacheMap; + + // + // We must reference this file object so that it cannot go away + // until we do CcUninitializeCacheMap below. Note we cannot + // find or rely on the FileObject that Memory Management has, + // although normally it will be this same one anyway. + // + + ObReferenceObject ( FileObject ); + + } else { + + // + // If this file has FO_SEQUENTIAL_ONLY clear, then remember that + // in the SharedCacheMap. + // + + if (!FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { + ClearFlag(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN); + } + } + + // + // Make sure that no one is trying to lazy delete it in the case + // that the Cache Map was already there. + // + + ClearFlag(SharedCacheMap->Flags, TRUNCATE_REQUIRED); + + // + // In case there has been a CcUnmapAndPurge call, we check here if we + // if we need to recreate the section and map it. + // + + if ((SharedCacheMap->Vacbs == NULL) && + !FlagOn(SharedCacheMap->Flags, BEING_CREATED)) { + + // + // Increment the OpenCount on the CacheMap. + // + + SharedCacheMap->OpenCount += 1; + MustUninitialize = TRUE; + + // + // We still want anyone else to wait. + // + + SetFlag(SharedCacheMap->Flags, BEING_CREATED); + WeSetBeingCreated = TRUE; + + // + // If there is a create event, then this must be the path where we + // we were only unmapped. We will just clear it here again in case + // someone needs to wait again this time too. + // + + if (SharedCacheMap->CreateEvent != NULL) { + + KeInitializeEvent( SharedCacheMap->CreateEvent, + NotificationEvent, + FALSE ); + } + + // + // Release global resource + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + + // + // We have to test this, because the section may only be unmapped. + // + + if (SharedCacheMap->Section == NULL) { + + LARGE_INTEGER LargeZero = {0,0}; + + // + // Call MM to create a section for this file, for the calculated + // section size. Note that we have the choice in this service to + // pass in a FileHandle or a FileObject pointer, but not both. + // Naturally we want to pass in the handle. + // + + DebugTrace( 0, mm, "MmCreateSection:\n", 0 ); + DebugTrace2(0, mm, " MaximumSize = %08lx, %08lx\n", + LocalSizes.AllocationSize.LowPart, + LocalSizes.AllocationSize.HighPart ); + DebugTrace( 0, mm, " FileObject = %08lx\n", FileObject ); + + SharedCacheMap->Status = MmCreateSection( &SharedCacheMap->Section, + SECTION_MAP_READ + | SECTION_MAP_WRITE + | SECTION_QUERY, + NULL, + &LocalSizes.AllocationSize, + PAGE_READWRITE, + SEC_COMMIT, + NULL, + FileObject ); + + DebugTrace( 0, mm, "
Section ); + + if (!NT_SUCCESS( SharedCacheMap->Status )){ + DebugTrace( 0, 0, "Error from MmCreateSection = %08lx\n", + SharedCacheMap->Status ); + + SharedCacheMap->Section = NULL; + ExRaiseStatus( FsRtlNormalizeNtstatus( SharedCacheMap->Status, + STATUS_UNEXPECTED_MM_CREATE_ERR )); + } + + ObDeleteCapturedInsertInfo(SharedCacheMap->Section); + + // + // If this is a stream file object, then no user can map it, + // and we should keep the modified page writer out of it. + // + + if (!FlagOn(((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags2, + FSRTL_FLAG2_DO_MODIFIED_WRITE) && + (FileObject->FsContext2 == NULL)) { + + BOOLEAN Disabled; + + Disabled = MmDisableModifiedWriteOfSection( FileObject->SectionObjectPointer ); + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SetFlag(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED); + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + //**** ASSERT( Disabled ); + } + + // + // Create the Vacb array. + // + + CcCreateVacbArray( SharedCacheMap, LocalSizes.AllocationSize ); + } + + // + // If the section already exists, we still have to call MM to + // extend, in case it is not large enough. + // + + else { + + if ( LocalSizes.AllocationSize.QuadPart > SharedCacheMap->SectionSize.QuadPart ) { + + NTSTATUS Status; + + DebugTrace( 0, mm, "MmExtendSection:\n", 0 ); + DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section ); + DebugTrace2(0, mm, " Size = %08lx, %08lx\n", + LocalSizes.AllocationSize.LowPart, + LocalSizes.AllocationSize.HighPart ); + + Status = MmExtendSection( SharedCacheMap->Section, + &LocalSizes.AllocationSize, + TRUE ); + + if (!NT_SUCCESS(Status)) { + + DebugTrace( 0, 0, "Error from MmExtendSection, Status = %08lx\n", + Status ); + + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_MM_EXTEND_ERR )); + } + } + + // + // Extend the Vacb array. + // + + CcExtendVacbArray( SharedCacheMap, LocalSizes.AllocationSize ); + } + + // + // Now show that we are all done and resume any waiters. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + ClearFlag(SharedCacheMap->Flags, BEING_CREATED); + WeSetBeingCreated = FALSE; + if (SharedCacheMap->CreateEvent != NULL) { + KeSetEvent( SharedCacheMap->CreateEvent, 0, FALSE ); + } + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + // + // Else if the section is already there, we make sure it is large + // enough by calling CcExtendCacheSection. + // + + else { + + // + // If the SharedCacheMap is currently being created we have + // to optionally create and wait on an event for it. Note that + // the only safe time to delete the event is in + // CcUninitializeCacheMap, because we otherwise have no way of + // knowing when everyone has reached the KeWaitForSingleObject. + // + + if (FlagOn(SharedCacheMap->Flags, BEING_CREATED)) { + if (SharedCacheMap->CreateEvent == NULL) { + + // + // We create for the loacl event with the WaitOnActiveCount + // event, and we synchronize the claiming of that event with + // CcVacbSpinLock. + // + + ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock ); + + // + // If the local even is not being used as a create event, + // then we can use it. (Should be quite rare that it is in use.) + // + + if (SharedCacheMap->WaitOnActiveCount == NULL) { + + SharedCacheMap->CreateEvent = &SharedCacheMap->Event; + + } else { + + SharedCacheMap->CreateEvent = (PKEVENT)ExAllocatePool( NonPagedPool, sizeof(KEVENT) ); + } + + ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock ); + + if (SharedCacheMap->CreateEvent == NULL) { + DebugTrace( 0, 0, "Failed to allocate CreateEvent\n", 0 ); + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + + ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES); + } + + KeInitializeEvent( SharedCacheMap->CreateEvent, + NotificationEvent, + FALSE ); + } + + // + // Increment the OpenCount on the CacheMap. + // + + SharedCacheMap->OpenCount += 1; + MustUninitialize = TRUE; + + // + // Release global resource before waiting + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + + DebugTrace( 0, 0, "Waiting on CreateEvent\n", 0 ); + + KeWaitForSingleObject( SharedCacheMap->CreateEvent, + Executive, + KernelMode, + FALSE, + (PLARGE_INTEGER)NULL); + + // + // If the real creator got an error, then we must bomb + // out too. + // + + if (!NT_SUCCESS(SharedCacheMap->Status)) { + ExRaiseStatus( FsRtlNormalizeNtstatus( SharedCacheMap->Status, + STATUS_UNEXPECTED_MM_CREATE_ERR )); + } + } + else { + + PCACHE_UNINITIALIZE_EVENT CUEvent; + + // + // Increment the OpenCount on the CacheMap. + // + + SharedCacheMap->OpenCount += 1; + MustUninitialize = TRUE; + + // + // If there is a process waiting on an uninitialize on this + // cache map to complete, let the thread that is waiting go, + // since the uninitialize is now complete. + // + CUEvent = SharedCacheMap->UninitializeEvent; + + while (CUEvent != NULL) { + PCACHE_UNINITIALIZE_EVENT EventNext = CUEvent->Next; + KeSetEvent(&CUEvent->Event, 0, FALSE); + CUEvent = EventNext; + } + + SharedCacheMap->UninitializeEvent = NULL; + + // + // Release global resource + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + } + } + + { + PPRIVATE_CACHE_MAP PrivateCacheMap; + + // + // Now allocate (if local one already in use) and initialize + // the Private Cache Map. + // + + PrivateCacheMap = &SharedCacheMap->PrivateCacheMap; + + // + // See if we should allocate a PrivateCacheMap while not holding + // a spinlock. + // + + if (CacheMapToFree != NULL) { + ExFreePool( CacheMapToFree ); + CacheMapToFree = NULL; + } + + if (PrivateCacheMap->NodeTypeCode != 0) { + CacheMapToFree = ExAllocatePool( NonPagedPool, sizeof(PRIVATE_CACHE_MAP) ); + } + + // + // Insert the new PrivateCacheMap in the list off the SharedCacheMap. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SharedListOwned = TRUE; + + // + // Now make sure there is still no PrivateCacheMap, and if so just get out. + // + + if (FileObject->PrivateCacheMap == NULL) { + + // + // Is the local one already in use? + // + + if (PrivateCacheMap->NodeTypeCode != 0) { + + // + // Use the one allocated above, if there is one, else go to pool now. + // + + if (CacheMapToFree == NULL) { + CacheMapToFree = + (PPRIVATE_CACHE_MAP)ExAllocatePool( NonPagedPool, + sizeof(PRIVATE_CACHE_MAP) ); + } + PrivateCacheMap = CacheMapToFree; + CacheMapToFree = NULL; + } + + if (PrivateCacheMap == NULL) { + + DebugTrace( 0, 0, "Failed to allocate PrivateCacheMap\n", 0 ); + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListOwned = FALSE; + + ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES); + } + + RtlZeroMemory( PrivateCacheMap, sizeof(PRIVATE_CACHE_MAP) ); + + PrivateCacheMap->NodeTypeCode = CACHE_NTC_PRIVATE_CACHE_MAP; + PrivateCacheMap->NodeByteSize = sizeof(PRIVATE_CACHE_MAP); + PrivateCacheMap->FileObject = FileObject; + PrivateCacheMap->ReadAheadMask = PAGE_SIZE - 1; + + // + // Initialize the spin lock. + // + + KeInitializeSpinLock( &PrivateCacheMap->ReadAheadSpinLock ); + + InsertTailList( &SharedCacheMap->PrivateList, &PrivateCacheMap->PrivateLinks ); + + FileObject->PrivateCacheMap = PrivateCacheMap; + } + } + + MustUninitialize = FALSE; + try_exit: NOTHING; + } + finally { + + // + // See if we got an error and must uninitialize the SharedCacheMap + // + + if (MustUninitialize) { + + if (!SharedListOwned) { + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + if (WeSetBeingCreated) { + if (SharedCacheMap->CreateEvent != NULL) { + KeSetEvent( SharedCacheMap->CreateEvent, 0, FALSE ); + } + ClearFlag(SharedCacheMap->Flags, BEING_CREATED); + } + + // + // Now release our open count. + // + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // On PinAccess it is safe and necessary to eliminate + // the structure immediately. + // + + if (PinAccess) { + + CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FALSE ); + + // + // If it is not PinAccess, we must lazy delete, because + // we could get into a deadlock trying to acquire the + // stream exclusive when we dereference the file object. + // + + } else { + + // + // Move it to the dirty list so the lazy write scan will + // see it. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + } else { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + SharedListOwned = FALSE; + + // + // If we did not create this SharedCacheMap, then there is a + // possibility that it is in the dirty list. Once we are sure + // we have the spinlock, just make sure it is in the clean list + // if there are no dirty bytes and the open count is nonzero. + // (The latter test is almost guaranteed, of course, but we check + // it to be safe.) + // + + } else if (!WeCreated && + (SharedCacheMap != NULL)) { + + if (!SharedListOwned) { + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SharedListOwned = TRUE; + } + + if ((SharedCacheMap->DirtyPages == 0) && + (SharedCacheMap->OpenCount != 0)) { + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcCleanSharedCacheMapList, + &SharedCacheMap->SharedCacheMapLinks ); + } + } + + // + // Release global resource + // + + if (SharedListOwned) { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + if (CacheMapToFree != NULL) { + ExFreePool(CacheMapToFree); + } + + } + + DebugTrace(-1, me, "CcInitializeCacheMap -> VOID\n", 0 ); + + return; +} + + +BOOLEAN +CcUninitializeCacheMap ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER TruncateSize OPTIONAL, + IN PCACHE_UNINITIALIZE_EVENT UninitializeEvent OPTIONAL + ) + +/*++ + +Routine Description: + + This routine uninitializes the previously initialized Shared and Private + Cache Maps. This routine is only intended to be called by File Systems. + It should be called when the File System receives a cleanup call on the + File Object. + + A File System which supports data caching must always call this routine + whenever it closes a file, whether the caller opened the file with + NO_INTERMEDIATE_BUFFERING as FALSE or not. This is because the final + cleanup of a file related to truncation or deletion of the file, can + only occur on the last close, whether the last closer cached the file + or not. When CcUnitializeCacheMap is called on a file object for which + CcInitializeCacheMap was never called, the call has a benign effect + iff no one has truncated or deleted the file; otherwise the necessary + cleanup relating to the truncate or close is performed. + + In summary, CcUnitializeCacheMap does the following: + + If the caller had Write or Delete access, the cache is flushed. + (This could change with lazy writing.) + + If a Cache Map was initialized on this File Object, it is + unitialized (unmap any views, delete section, and delete + Cache Map structures). + + On the last Cleanup, if the file has been deleted, the + Section is forced closed. If the file has been truncated, then + the truncated pages are purged from the cache. + +Arguments: + + FileObject - File Object which was previously supplied to + CcInitializeCacheMap. + + TruncateSize - If specified, the file was truncated to the specified + size, and the cache should be purged accordingly. + + UninitializeEvent - If specified, then the provided event + will be set to the signalled state when the actual flush is + completed. This is only of interest to file systems that + require that they be notified when a cache flush operation + has completed. Due to network protocol restrictions, it + is critical that network file systems know exactly when + a cache flush operation completes, by specifying this + event, they can be notified when the cache section is + finally purged if the section is "lazy-deleted". + +ReturnValue: + + FALSE if Section was not closed. + TRUE if Section was closed. + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + ULONG ActivePage; + ULONG PageIsDirty; + PVACB ActiveVacb = NULL; + BOOLEAN SectionClosed = FALSE; + BOOLEAN SharedListAcquired = FALSE; + PPRIVATE_CACHE_MAP PrivateCacheMap; + + DebugTrace(+1, me, "CcUninitializeCacheMap:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace( 0, me, " &TruncateSize = %08lx\n", TruncateSize ); + + // + // Insure release of resources + // + + try { + + // + // Serialize Creation/Deletion of all Shared CacheMaps + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SharedListAcquired = TRUE; + + // + // Get pointer to SharedCacheMap via File Object. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // Decrement Open Count on SharedCacheMap, if we did a cached open. + // Also unmap PrivateCacheMap if it is mapped and deallocate it. + // + + if (PrivateCacheMap != NULL) { + + SharedCacheMap->OpenCount -= 1; + + // + // Remove PrivateCacheMap from list in SharedCacheMap. + // + + RemoveEntryList( &PrivateCacheMap->PrivateLinks ); + + // + // Free local or allocated PrivateCacheMap + // + + if (PrivateCacheMap == &SharedCacheMap->PrivateCacheMap) { + PrivateCacheMap->NodeTypeCode = 0; + PrivateCacheMap = NULL; + } + + FileObject->PrivateCacheMap = (PPRIVATE_CACHE_MAP)NULL; + } + + // + // Now if we have a SharedCacheMap whose Open Count went to 0, we + // have some additional cleanup. + // + + if (SharedCacheMap != NULL) { + + // + // If a Truncate Size was specified, then remember that we want to + // truncate the FileSize and purge the unneeded pages when OpenCount + // goes to 0. + // + + if (ARGUMENT_PRESENT(TruncateSize)) { + + if ( (TruncateSize->QuadPart == 0) && (SharedCacheMap->FileSize.QuadPart != 0) ) { + SetFlag(SharedCacheMap->Flags, TRUNCATE_REQUIRED); + } + + // + // If this is the last guy, I can drop the file size down + // now. + // + + if (IsListEmpty(&SharedCacheMap->PrivateList)) { + SharedCacheMap->FileSize = *TruncateSize; + } + } + + // + // If other file objects are still using this SharedCacheMap, + // then we are done now. + // + + if (SharedCacheMap->OpenCount != 0) { + + DebugTrace(-1, me, "SharedCacheMap OpenCount != 0\n", 0); + + // + // If the caller specified an event to be set when + // the cache uninitialize is completed, set the event + // now, because the uninitialize is complete for this file. + // (Note, we make him wait if he is the last guy.) + // + + if (ARGUMENT_PRESENT(UninitializeEvent)) { + + if (!IsListEmpty(&SharedCacheMap->PrivateList)) { + KeSetEvent(&UninitializeEvent->Event, 0, FALSE); + } else { + + UninitializeEvent->Next = SharedCacheMap->UninitializeEvent; + SharedCacheMap->UninitializeEvent = UninitializeEvent; + } + } + + try_return( SectionClosed = FALSE ); + } + + // + // Set the "uninitialize complete" in the shared cache map + // so that CcDeleteSharedCacheMap will delete it. + // + + if (ARGUMENT_PRESENT(UninitializeEvent)) { + UninitializeEvent->Next = SharedCacheMap->UninitializeEvent; + SharedCacheMap->UninitializeEvent = UninitializeEvent; + } + + // + // We are in the process of deleting this cache map. If the + // Lazy Writer is active or the Bcb list is not empty or the Lazy + // Writer will hit this SharedCacheMap because we are purging + // the file to 0, then get out and let the Lazy Writer clean + // up. + // + + if ((!FlagOn(SharedCacheMap->Flags, PIN_ACCESS) && + !ARGUMENT_PRESENT(UninitializeEvent)) + + || + + FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) + + || + + (SharedCacheMap->DirtyPages != 0)) { + + // + // Move it to the dirty list so the lazy write scan will + // see it. + // + + if (!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED)) { + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + } + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + + // + // Get the active Vacb if we are going to lazy delete, to + // free it for someone who can use it. + // + + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + DebugTrace(-1, me, "SharedCacheMap has Bcbs and not purging to 0\n", 0); + + try_return( SectionClosed = FALSE ); + } + + // + // Now we can delete the SharedCacheMap. If there are any Bcbs, + // then we must be truncating to 0, and they will also be deleted. + // On return the Shared Cache Map List Spinlock will be released. + // + + CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FALSE ); + + SharedListAcquired = FALSE; + + try_return( SectionClosed = TRUE ); + } + + // + // No Shared Cache Map. To make the file go away, we still need to + // purge the section, if one exists. (And we still need to release + // our global list first to avoid deadlocks.) + // + + else { + if (ARGUMENT_PRESENT(TruncateSize) && + ( TruncateSize->QuadPart == 0 ) && + (*(PCHAR *)FileObject->SectionObjectPointer != NULL)) { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + SharedListAcquired = FALSE; + + DebugTrace( 0, mm, "MmPurgeSection:\n", 0 ); + DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", + FileObject->SectionObjectPointer ); + DebugTrace2(0, mm, " Offset = %08lx\n", + TruncateSize->LowPart, + TruncateSize->HighPart ); + + // + // 0 Length means to purge from the TruncateSize on. + // + + CcPurgeCacheSection( FileObject->SectionObjectPointer, + TruncateSize, + 0, + FALSE ); + } + + // + // If the caller specified an event to be set when + // the cache uninitialize is completed, set the event + // now, because the uninitialize is complete for this file. + // + + if (ARGUMENT_PRESENT(UninitializeEvent)) { + KeSetEvent(&UninitializeEvent->Event, 0, FALSE); + } + + } + + try_exit: NOTHING; + } + finally { + + // + // Release global resources + // + + if (SharedListAcquired) { + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + // + // Free the active vacb, if we found one. + // + + if (ActiveVacb != NULL) { + + CcFreeActiveVacb( ActiveVacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // Free PrivateCacheMap now that we no longer have the spinlock. + // + + if (PrivateCacheMap != NULL) { + ExFreePool( PrivateCacheMap ); + } + } + + DebugTrace(-1, me, "CcUnitializeCacheMap -> %02lx\n", SectionClosed ); + + return SectionClosed; + +} + + +// +// Internal support routine. +// + +VOID +FASTCALL +CcDeleteSharedCacheMap ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN KIRQL ListIrql, + IN ULONG ReleaseFile + ) + +/*++ + +Routine Description: + + The specified SharedCacheMap is removed from the global list of + SharedCacheMap's and deleted with all of its related structures. + Other objects which were referenced in CcInitializeCacheMap are + dereferenced here. + + NOTE: The CcMasterSpinLock must already be acquired + on entry. It is released on return. + +Arguments: + + SharedCacheMap - Pointer to Cache Map to delete + + ListIrql - priority to restore to when releasing shared cache map list + + ReleaseFile - Supplied as nonzero if file was acquired exclusive and + should be released. + +ReturnValue: + + None. + +--*/ + +{ + LIST_ENTRY LocalList; + PFILE_OBJECT FileObject; + PVACB ActiveVacb; + ULONG ActivePage; + ULONG PageIsDirty; + KIRQL OldIrql; + PMBCB Mbcb; + + DebugTrace(+1, me, "CcDeleteSharedCacheMap:\n", 0 ); + DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); + + // + // Remove it from the global list and clear the pointer to it via + // the File Object. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + + // + // Zero pointer to SharedCacheMap. Once we have cleared the pointer, + // we can/must release the global list to avoid deadlocks. + // + + FileObject = SharedCacheMap->FileObject; + + FileObject->SectionObjectPointer->SharedCacheMap = (PSHARED_CACHE_MAP)NULL; + SetFlag( SharedCacheMap->Flags, WRITE_QUEUED ); + + // + // The OpenCount is 0, but we still need to flush out any dangling + // cache read or writes. + // + + if ((SharedCacheMap->VacbActiveCount != 0) || (SharedCacheMap->NeedToZero != NULL)) { + + // + // We will put it in a local list and set a flag + // to keep the Lazy Writer away from it, so that we can wrip it out + // below if someone manages to sneak in and set something dirty, etc. + // If the file system does not synchronize cleanup calls with an + // exclusive on the stream, then this case is possible. + // + + InitializeListHead( &LocalList ); + InsertTailList( &LocalList, &SharedCacheMap->SharedCacheMapLinks ); + + // + // If there is an active Vacb, then nuke it now (before waiting!). + // + + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + ExReleaseSpinLock( &CcMasterSpinLock, ListIrql ); + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + while (SharedCacheMap->VacbActiveCount != 0) { + CcWaitOnActiveCount( SharedCacheMap ); + } + + // + // Now in case we hit the rare path where someone moved the + // SharedCacheMap again, do a remove again now. It may be + // from our local list or it may be from the dirty list, + // but who cares? The important thing is to remove it in + // the case it was the dirty list, since we will delete it + // below. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &ListIrql ); + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + } + + // + // If there are Bcbs, then empty the list, asserting that none of them + // can be pinned now if we have gotten this far! + // + + while (!IsListEmpty( &SharedCacheMap->BcbList )) { + + PBCB Bcb; + + Bcb = (PBCB)CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, + BCB, + BcbLinks ); + + RemoveEntryList( &Bcb->BcbLinks ); + + // + // Skip over the pendaflex entries + // + + if (Bcb->NodeTypeCode == CACHE_NTC_BCB) { + + ASSERT( Bcb->PinCount == 0 ); + + // + // If the Bcb is dirty, we have to synchronize with the Lazy Writer + // and reduce the total number of dirty. + // + + if (Bcb->Dirty) { + + CcTotalDirtyPages -= Bcb->ByteLength >> PAGE_SHIFT; + } + + // + // There is a small window where the data could still be mapped + // if (for example) the Lazy Writer collides with a CcCopyWrite + // in the foreground, and then someone calls CcUninitializeCacheMap + // while the Lazy Writer is active. This is because the Lazy + // Writer biases the pin count. Deal with that here. + // + + if (Bcb->BaseAddress != NULL) { + CcFreeVirtualAddress( Bcb->Vacb ); + } + + // + // Debug routines used to remove Bcbs from the global list + // + +#if LIST_DBG + + { + KIRQL OldIrql; + + ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql ); + + if (Bcb->CcBcbLinks.Flink != NULL) { + + RemoveEntryList( &Bcb->CcBcbLinks ); + CcBcbCount -= 1; + } + + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + } + +#endif + + CcDeallocateBcb( Bcb ); + } + } + ExReleaseSpinLock( &CcMasterSpinLock, ListIrql ); + + // + // Call local routine to unmap, and purge if necessary. + // + + CcUnmapAndPurge( SharedCacheMap ); + + // + // Now release the file now that the purge is done. + // + + if (ReleaseFile) { + FsRtlReleaseFile( SharedCacheMap->FileObject ); + } + + // + // Dereference our pointer to the Section and FileObject + // (We have to test the Section pointer since CcInitializeCacheMap + // calls this routine for error recovery. Release our global + // resource before dereferencing the FileObject to avoid deadlocks. + // + + if (SharedCacheMap->Section != NULL) { + ObDereferenceObject( SharedCacheMap->Section ); + } + ObDereferenceObject( FileObject ); + + // + // If there is an Mbcb, deduct any dirty pages and deallocate. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + Mbcb = SharedCacheMap->Mbcb; + if (Mbcb != NULL) { + + if (Mbcb->DirtyPages != 0) { + + CcTotalDirtyPages -= Mbcb->DirtyPages; + } + + CcDeallocateBcb( (PBCB)Mbcb ); + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // If there was an uninitialize event specified for this shared cache + // map, then set it to the signalled state, indicating that we are + // removing the section and deleting the shared cache map. + // + + if (SharedCacheMap->UninitializeEvent != NULL) { + PCACHE_UNINITIALIZE_EVENT CUEvent = SharedCacheMap->UninitializeEvent; + + while (CUEvent != NULL) { + PCACHE_UNINITIALIZE_EVENT EventNext = CUEvent->Next; + + KeSetEvent(&CUEvent->Event, 0, FALSE); + + CUEvent = EventNext; + } + } + + // + // Now delete the Vacb vector. + // + + if ((SharedCacheMap->Vacbs != &SharedCacheMap->InitialVacbs[0]) + + && + + (SharedCacheMap->Vacbs != NULL)) { + + ExFreePool( SharedCacheMap->Vacbs ); + } + + // + // If an event had to be allocated for this SharedCacheMap, + // deallocate it. + // + + if ((SharedCacheMap->CreateEvent != NULL) && (SharedCacheMap->CreateEvent != &SharedCacheMap->Event)) { + ExFreePool( SharedCacheMap->CreateEvent ); + } + + if ((SharedCacheMap->WaitOnActiveCount != NULL) && (SharedCacheMap->WaitOnActiveCount != &SharedCacheMap->Event)) { + ExFreePool( SharedCacheMap->WaitOnActiveCount ); + } + + // + // Deallocate the storeage for the SharedCacheMap. + // + + ExFreePool( SharedCacheMap ); + + DebugTrace(-1, me, "CcDeleteSharedCacheMap -> VOID\n", 0 ); + + return; + +} + + +VOID +CcSetFileSizes ( + IN PFILE_OBJECT FileObject, + IN PCC_FILE_SIZES FileSizes + ) + +/*++ + +Routine Description: + + This routine must be called whenever a file has been extended to reflect + this extension in the cache maps and underlying section. Calling this + routine has a benign effect if the current size of the section is + already greater than or equal to the new AllocationSize. + + This routine must also be called whenever the FileSize for a file changes + to reflect these changes in the Cache Manager. + + This routine seems rather large, but in the normal case it only acquires + a spinlock, updates some fields, and exits. Less often it will either + extend the section, or truncate/purge the file, but it would be unexpected + to do both. On the other hand, the idea of this routine is that it does + "everything" required when AllocationSize or FileSize change. + +Arguments: + + FileObject - A file object for which CcInitializeCacheMap has been + previously called. + + FileSizes - A pointer to AllocationSize, FileSize and ValidDataLength + for the file. AllocationSize is ignored if it is not larger + than the current section size (i.e., it is ignored unless it + has grown). ValidDataLength is not used. + + +Return Value: + + None + +--*/ + +{ + LARGE_INTEGER NewSectionSize; + LARGE_INTEGER NewFileSize; + IO_STATUS_BLOCK IoStatus; + PSHARED_CACHE_MAP SharedCacheMap; + NTSTATUS Status; + KIRQL OldIrql; + PVACB ActiveVacb; + ULONG ActivePage; + ULONG PageIsDirty; + + DebugTrace(+1, me, "CcSetFileSizes:\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace( 0, me, " FileSizes = %08lx\n", FileSizes ); + + // + // Make a local copy of the new file size and section size. + // + + NewFileSize = FileSizes->FileSize; + NewSectionSize = FileSizes->AllocationSize; + + // + // Serialize Creation/Deletion of all Shared CacheMaps + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // Get pointer to SharedCacheMap via File Object. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // If the file is not cached, just get out. + // + + if ((SharedCacheMap == NULL) || (SharedCacheMap->Section == NULL)) { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Let's try to purge the file incase this is a truncate. In the + // vast majority of cases when there is no shared cache map, there + // is no data section either, so this call will eventually be + // no-oped in Mm. + // + + // + // First flush the first page we are keeping, if it has data, before + // we throw it away. + // + + if (NewFileSize.LowPart & (PAGE_SIZE - 1)) { + MmFlushSection( FileObject->SectionObjectPointer, &NewFileSize, 1, &IoStatus, FALSE ); + } + + CcPurgeCacheSection( FileObject->SectionObjectPointer, + &NewFileSize, + 0, + FALSE ); + + DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 ); + + return; + } + + // + // Make call a Noop if file is not mapped, or section already big enough. + // + + if ( NewSectionSize.QuadPart > SharedCacheMap->SectionSize.QuadPart ) { + + // + // Increment open count to make sure the SharedCacheMap stays around, + // then release the spinlock so that we can call Mm. + // + + SharedCacheMap->OpenCount += 1; + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Round new section size to pages. + // + + NewSectionSize.QuadPart = NewSectionSize.QuadPart + (LONGLONG)(DEFAULT_EXTEND_MODULO - 1); + NewSectionSize.LowPart &= ~(DEFAULT_EXTEND_MODULO - 1); + + // + // Use try-finally to make sure we get the open count decremented. + // + + try { + + // + // Call MM to extend the section. + // + + DebugTrace( 0, mm, "MmExtendSection:\n", 0 ); + DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section ); + DebugTrace2(0, mm, " Size = %08lx, %08lx\n", + NewSectionSize.LowPart, NewSectionSize.HighPart ); + + Status = MmExtendSection( SharedCacheMap->Section, &NewSectionSize, TRUE ); + + if (!NT_SUCCESS(Status)) { + + DebugTrace( 0, 0, "Error from MmExtendSection, Status = %08lx\n", + Status ); + + ExRaiseStatus( FsRtlNormalizeNtstatus( Status, + STATUS_UNEXPECTED_MM_EXTEND_ERR )); + } + + // + // Extend the Vacb array. + // + + CcExtendVacbArray( SharedCacheMap, NewSectionSize ); + + } finally { + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + // + // It is now very unlikely that we have any more work to do, but just + // in case we reacquire the spinlock and check again if we are cached. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + // + // Get pointer to SharedCacheMap via File Object. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // If the file is not cached, just get out. + // + + if (SharedCacheMap == NULL) { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 ); + + return; + } + } + + // + // If we are shrinking either of these two sizes, then we must free the + // active page, since it may be locked. + // + + SharedCacheMap->OpenCount += 1; + + try { + + if ( ( NewFileSize.QuadPart < SharedCacheMap->ValidDataGoal.QuadPart ) || + ( NewFileSize.QuadPart < SharedCacheMap->FileSize.QuadPart )) { + + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) { + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Serialize again to reduce ValidDataLength. It cannot change + // because the caller must have the file exclusive. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + } + + // + // If the section did not grow, see if the file system supports ValidDataLength, + // then update the valid data length in the file system. + // + + if ( SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG ) { + + if ( NewFileSize.QuadPart < SharedCacheMap->ValidDataLength.QuadPart ) { + SharedCacheMap->ValidDataLength = NewFileSize; + } + + // + // When truncating Valid Data Goal, remember that it must always + // stay rounded to the top of the page, to protect writes of user-mapped + // files. ** no longer rounding ** + // + + if ( NewFileSize.QuadPart < SharedCacheMap->ValidDataGoal.QuadPart ) { + + SharedCacheMap->ValidDataGoal = NewFileSize; + } + } + + // + // On truncate, be nice guys and actually purge away user data from + // the cache. However, the PinAccess check is important to avoid deadlocks + // in Ntfs. + // + // It is also important to check the Vacb Active count. The caller + // must have the file exclusive, therefore, no one else can be actively + // doing anything in the file. Normally the Active count will be zero + // (like in a normal call from Set File Info), and we can go ahead and truncate. + // However, if the active count is nonzero, chances are this very thread has + // something pinned or mapped, and we will deadlock if we try to purge and + // wait for the count to go zero. A rare case of this which deadlocked DaveC + // on Christmas Day of 1992, is where Ntfs was trying to convert an attribute + // from resident to nonresident - which is a good example of a case where the + // purge was not needed. + // + + if ( (NewFileSize.QuadPart < SharedCacheMap->FileSize.QuadPart ) && + !FlagOn(SharedCacheMap->Flags, PIN_ACCESS) && + (SharedCacheMap->VacbActiveCount == 0)) { + + // + // If we are actually truncating to zero (a size which has particular + // meaning to the Lazy Writer scan!), then we must reset the Mbcb if + // there is one, so that we do not keep dirty pages around forever. + // + + if ((NewFileSize.QuadPart == 0) && (SharedCacheMap->Mbcb != NULL)) { + + PMBCB Mbcb = SharedCacheMap->Mbcb; + + CcTotalDirtyPages -= Mbcb->DirtyPages; + SharedCacheMap->DirtyPages -= Mbcb->DirtyPages; + Mbcb->DirtyPages = 0; + Mbcb->FirstDirtyPage = MAXULONG; + Mbcb->LastDirtyPage = 0; + Mbcb->ResumeWritePage = 0; + Mbcb->PagesToWrite = 0; + RtlZeroMemory( Mbcb->Bitmap.Buffer, Mbcb->Bitmap.SizeOfBitMap / 8 ); + } + + // + // Increment open count to make sure the SharedCacheMap stays around, + // then release the spinlock so that we can call Mm. + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + CcPurgeAndClearCacheSection( SharedCacheMap, &NewFileSize ); + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + + } finally { + + // + // We should only be raising without owning the spinlock. + // + + if (AbnormalTermination()) { + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + } + + SharedCacheMap->OpenCount -= 1; + + SharedCacheMap->FileSize = NewFileSize; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + + DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 ); + + return; +} + + +VOID +CcPurgeAndClearCacheSection ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset + ) + +/*++ + +Routine Description: + + This routine calls CcPurgeCacheSection after zeroing the end any + partial page at the start of the range. If the file is not cached + it flushes this page before the purge. + +Arguments: + + SectionObjectPointer - A pointer to the Section Object Pointers + structure in the nonpaged Fcb. + + FileOffset - Offset from which file should be purged - rounded down + to page boundary. If NULL, purge the entire file. + +ReturnValue: + + FALSE - if the section was not successfully purged + TRUE - if the section was successfully purged + +--*/ + +{ + ULONG TempLength, Length; + LARGE_INTEGER LocalFileOffset; + IO_STATUS_BLOCK IoStatus; + PVOID TempVa; + PVACB Vacb; + + // + // If a range was specified, then we have to see if we need to + // save any user data before purging. + // + + if ((FileOffset->LowPart & (PAGE_SIZE - 1)) != 0) { + + // + // Switch to LocalFileOffset. We do it this way because we + // still pass it on as an optional parameter. + // + + LocalFileOffset = *FileOffset; + FileOffset = &LocalFileOffset; + + // + // If the file is cached, then we can actually zero the data to + // be purged in memory, and not purge those pages. This is a huge + // savings, because sometimes the flushes in the other case cause + // us to kill lots of stack, time and I/O doing CcZeroData in especially + // large user-mapped files. + // + + if ((SharedCacheMap->Section != NULL) && + (SharedCacheMap->Vacbs != NULL)) { + + // + // First zero the first page we are keeping, if it has data, and + // adjust FileOffset and Length to allow it to stay. + // + + TempLength = PAGE_SIZE - (FileOffset->LowPart & (PAGE_SIZE - 1)); + + TempVa = CcGetVirtualAddress( SharedCacheMap, *FileOffset, &Vacb, &Length ); + + try { + + // + // Do not map and zero the page if we are not reducing our notion + // of Valid Data, because that does two bad things. First CcSetDirtyInMask + // will arbitrarily smash up ValidDataGoal (causing a potential invalid + // CcSetValidData call). Secondly, if the Lazy Writer writes the last + // page ahead of another flush through MM, then the file system will + // never see a write from MM, and will not include the last page in + // ValidDataLength on disk. + // + + RtlZeroMemory( TempVa, TempLength ); + + if (FileOffset->QuadPart <= SharedCacheMap->ValidDataGoal.QuadPart) { + + // + // Make sure the Lazy Writer writes it. + // + + CcSetDirtyInMask( SharedCacheMap, FileOffset, TempLength ); + + // + // Otherwise, we are mapped, so make sure at least that Mm + // knows the page is dirty since we zeroed it. + // + + } else { + + MmSetAddressRangeModified( TempVa, 1 ); + } + + FileOffset->QuadPart += (LONGLONG)TempLength; + + // + // If we get any kind of error, like failing to read the page from + // the network, just charge on. Note that we only read it in order + // to zero it and avoid the flush below, so if we cannot read it + // there is really no stale data problem. + // + + } except(EXCEPTION_EXECUTE_HANDLER) { + + NOTHING; + } + + CcFreeVirtualAddress( Vacb ); + + } else { + + // + // First flush the first page we are keeping, if it has data, before + // we throw it away. + // + + MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, FileOffset, 1, &IoStatus, FALSE ); + } + } + + CcPurgeCacheSection( SharedCacheMap->FileObject->SectionObjectPointer, + FileOffset, + 0, + FALSE ); +} + + +BOOLEAN +CcPurgeCacheSection ( + IN PSECTION_OBJECT_POINTERS SectionObjectPointer, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN UninitializeCacheMaps + ) + +/*++ + +Routine Description: + + This routine may be called to force a purge of the cache section, + even if it is cached. Note, if a user has the file mapped, then the purge + will *not* take effect, and this must be considered part of normal application + interaction. The purpose of purge is to throw away potentially nonzero + data, so that it will be read in again and presumably zeroed. This is + not really a security issue, but rather an effort to not confuse the + application when it sees nonzero data. We cannot help the fact that + a user-mapped view forces us to hang on to stale data. + + This routine is intended to be called whenever previously written + data is being truncated from the file, and the file is not being + deleted. + + The file must be acquired exclusive in order to call this routine. + +Arguments: + + SectionObjectPointer - A pointer to the Section Object Pointers + structure in the nonpaged Fcb. + + FileOffset - Offset from which file should be purged - rounded down + to page boundary. If NULL, purge the entire file. + + Length - Defines the length of the byte range to purge, starting at + FileOffset. This parameter is ignored if FileOffset is + specified as NULL. If FileOffset is specified and Length + is 0, then purge from FileOffset to the end of the file. + + UninitializeCacheMaps - If TRUE, we should uninitialize all the private + cache maps before purging the data. + +ReturnValue: + + FALSE - if the section was not successfully purged + TRUE - if the section was successfully purged + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + ULONG ActivePage; + ULONG PageIsDirty; + BOOLEAN PurgeWorked = TRUE; + PVACB Vacb = NULL; + + DebugTrace(+1, me, "CcPurgeCacheSection:\n", 0 ); + DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", + ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart + : 0, + ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart + : 0 ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + + // + // If you want us to uninitialize cache maps, the RtlZeroMemory paths + // below depend on actually having to purge something after zeroing. + // + + ASSERT(!UninitializeCacheMaps || (Length == 0) || (Length >= PAGE_SIZE * 2)); + + // + // Serialize Creation/Deletion of all Shared CacheMaps + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + // + // Get pointer to SharedCacheMap via File Object. + // + + SharedCacheMap = SectionObjectPointer->SharedCacheMap; + + // + // Increment open count to make sure the SharedCacheMap stays around, + // then release the spinlock so that we can call Mm. + // + + if (SharedCacheMap != NULL) { + + SharedCacheMap->OpenCount += 1; + + // + // If there is an active Vacb, then nuke it now (before waiting!). + // + + GetActiveVacbAtDpcLevel( SharedCacheMap, Vacb, ActivePage, PageIsDirty ); + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + if (Vacb != NULL) { + + CcFreeActiveVacb( SharedCacheMap, Vacb, ActivePage, PageIsDirty ); + } + + // + // Use try-finally to insure cleanup of the Open Count and Vacb on the + // way out. + // + + try { + + // + // Increment open count to make sure the SharedCacheMap stays around, + // then release the spinlock so that we can call Mm. + // + + if (SharedCacheMap != NULL) { + + // + // Now loop to make sure that no one is currently caching the file. + // + + if (UninitializeCacheMaps) { + + while (!IsListEmpty( &SharedCacheMap->PrivateList )) { + + PrivateCacheMap = CONTAINING_RECORD( SharedCacheMap->PrivateList.Flink, + PRIVATE_CACHE_MAP, + PrivateLinks ); + + CcUninitializeCacheMap( PrivateCacheMap->FileObject, NULL, NULL ); + } + } + + // + // Now, let's unmap and purge here. + // + // We still need to wait for any dangling cache read or writes. + // + // In fact we have to loop and wait because the lazy writer can + // sneak in and do an CcGetVirtualAddressIfMapped, and we are not + // synchronized. + // + + while ((SharedCacheMap->Vacbs != NULL) && + !CcUnmapVacbArray( SharedCacheMap, FileOffset, Length )) { + + CcWaitOnActiveCount( SharedCacheMap ); + } + } + + // + // Purge failures are extremely rare if there are no user mapped sections. + // However, it is possible that we will get one from our own mapping, if + // the file is being lazy deleted from a previous open. For that case + // we wait here until the purge succeeds, so that we are not left with + // old user file data. Although Length is actually invariant in this loop, + // we do need to keep checking that we are allowed to truncate in case a + // user maps the file during a delay. + // + + while (!(PurgeWorked = MmPurgeSection(SectionObjectPointer, + FileOffset, + Length, + (BOOLEAN)((SharedCacheMap !=NULL) && + ARGUMENT_PRESENT(FileOffset)))) && + (Length == 0) && + MmCanFileBeTruncated(SectionObjectPointer, FileOffset)) { + + (VOID)KeDelayExecutionThread( KernelMode, FALSE, &CcCollisionDelay ); + } + + } finally { + + // + // Reduce the open count on the SharedCacheMap if there was one. + // + + if (SharedCacheMap != NULL) { + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } + } + + DebugTrace(-1, me, "CcPurgeCacheSection -> %02lx\n", PurgeWorked ); + + return PurgeWorked; +} + + +// +// Internal support routine. +// + +VOID +CcUnmapAndPurge( + IN PSHARED_CACHE_MAP SharedCacheMap + ) + +/*++ + +Routine Description: + + This routine may be called to unmap and purge a section, causing Memory + Management to throw the pages out and reset his notion of file size. + +Arguments: + + SharedCacheMap - Pointer to SharedCacheMap of section to purge. + +Return Value: + + None. + +--*/ + +{ + PFILE_OBJECT FileObject; + KIRQL OldIrql; + + FileObject = SharedCacheMap->FileObject; + + // + // Unmap all Vacbs + // + + if (SharedCacheMap->Vacbs != NULL) { + (VOID)CcUnmapVacbArray( SharedCacheMap, NULL, 0 ); + } + + // + // Now that the file is unmapped, we can purge the truncated + // pages from memory, if TRUNCATE_REQUIRED. Note that if all + // of the section is being purged (FileSize == 0), the purge + // and subsequent delete of the SharedCacheMap should drop + // all references on the section and file object clearing the + // way for the Close Call and actual file delete to occur + // immediately. + // + + if (FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED)) { + + DebugTrace( 0, mm, "MmPurgeSection:\n", 0 ); + DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", + FileObject->SectionObjectPointer ); + DebugTrace2(0, mm, " Offset = %08lx\n", + SharedCacheMap->FileSize.LowPart, + SharedCacheMap->FileSize.HighPart ); + + // + // 0 Length means to purge from the TruncateSize on. + // + + CcPurgeCacheSection( FileObject->SectionObjectPointer, + &SharedCacheMap->FileSize, + 0, + FALSE ); + } +} + + +VOID +CcSetDirtyPageThreshold ( + IN PFILE_OBJECT FileObject, + IN ULONG DirtyPageThreshold + ) + +/*++ + +Routine Description: + + This routine may be called to set a dirty page threshold for this + stream. The write throttling will kick in whenever the file system + attempts to exceed the dirty page threshold for this file. + +Arguments: + + FileObject - Supplies file object for the stream + + DirtyPageThreshold - Supplies the dirty page threshold for this stream, + or 0 for no threshold. + +Return Value: + + None + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + if (SharedCacheMap != NULL) { + + SharedCacheMap->DirtyPageThreshold = DirtyPageThreshold; + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SetFlag(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags, + FSRTL_FLAG_LIMIT_MODIFIED_PAGES); + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + } +} + + +VOID +CcZeroEndOfLastPage ( + IN PFILE_OBJECT FileObject + ) + +/*++ + +Routine Description: + + This routine is only called by Mm before mapping a user view to + a section. If there is an uninitialized page at the end of the + file, we zero it by freeing that page. + +Parameters: + + FileObject - File object for section to be mapped + +Return Value: + + None +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + ULONG ActivePage; + ULONG PageIsDirty; + KIRQL OldIrql; + PVOID NeedToZero = NULL; + PVACB ActiveVacb = NULL; + + // + // See if we have an active Vacb, that we need to free. + // + + FsRtlAcquireFileExclusive( FileObject ); + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + if (SharedCacheMap != NULL) { + + // + // See if there is an active vacb. + // + + if ((SharedCacheMap->ActiveVacb != NULL) || ((NeedToZero = SharedCacheMap->NeedToZero) != NULL)) { + + SharedCacheMap->OpenCount += 1; + GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + // + // Remember in FsRtl header is there is a user section. + // If this is an advanced header then also acquire the mutex to access + // this field. + // + + if (FlagOn( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags, + FSRTL_FLAG_ADVANCED_HEADER )) { + + ExAcquireFastMutex( ((PFSRTL_ADVANCED_FCB_HEADER)FileObject->FsContext)->FastMutex ); + + SetFlag( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags, + FSRTL_FLAG_USER_MAPPED_FILE ); + + ExReleaseFastMutex( ((PFSRTL_ADVANCED_FCB_HEADER)FileObject->FsContext)->FastMutex ); + + } else { + + SetFlag( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags, + FSRTL_FLAG_USER_MAPPED_FILE ); + } + + FsRtlReleaseFile( FileObject ); + + // + // If the file is cached and we have a Vacb to free, we need to + // use the lazy writer callback to synchronize so no one will be + // extending valid data. + // + + if ((ActiveVacb != NULL) || (NeedToZero != NULL)) { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + + // + // Serialize again to decrement the open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } +} + + +BOOLEAN +CcZeroData ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER StartOffset, + IN PLARGE_INTEGER EndOffset, + IN BOOLEAN Wait + ) + +/*++ + +Routine Description: + + This routine attempts to zero the specified file data and deliver the + correct I/O status. + + If the caller does not want to block (such as for disk I/O), then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to zero all of the requested data without + blocking, then this routine will return FALSE. However, if the + required space is immediately accessible in the cache and no blocking is + required, this routine zeros the data and returns TRUE. + + If the caller supplies Wait as TRUE, then this routine is guaranteed + to zero the data and return TRUE. If the correct space is immediately + accessible in the cache, then no blocking will occur. Otherwise, + the necessary work will be initiated to read and/or free cache data, + and the caller will be blocked until the data can be received. + + File system Fsd's should typically supply Wait = TRUE if they are + processing a synchronous I/O requests, or Wait = FALSE if they are + processing an asynchronous request. + + File system threads should supply Wait = TRUE. + + IMPORTANT NOTE: File systems which call this routine must be prepared + to handle a special form of a write call where the Mdl is already + supplied. Namely, if Irp->MdlAddress is supplied, the file system + must check the low order bit of Irp->MdlAddress->ByteOffset. If it + is set, that means that the Irp was generated in this routine and + the file system must do two things: + + Decrement Irp->MdlAddress->ByteOffset and Irp->UserBuffer + + Clear Irp->MdlAddress immediately prior to completing the + request, as this routine expects to reuse the Mdl and + ultimately deallocate the Mdl itself. + +Arguments: + + FileObject - pointer to the FileObject for which a range of bytes + is to be zeroed. This FileObject may either be for + a cached file or a noncached file. If the file is + not cached, then WriteThrough must be TRUE and + StartOffset and EndOffset must be on sector boundaries. + + StartOffset - Start offset in file to be zeroed. + + EndOffset - End offset in file to be zeroed. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not zeroed. + + TRUE - if the data has been zeroed. + +Raises: + + STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. + This can only occur if Wait was specified as TRUE. (If Wait is + specified as FALSE, and an allocation failure occurs, this + routine simply returns FALSE.) + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + LARGE_INTEGER ToGo; + ULONG ZeroBytes, ZeroTransfer; + ULONG i; + BOOLEAN WriteThrough; + ULONG SavedState = 0; + ULONG MaxZerosInCache = MAX_ZEROS_IN_CACHE; + + PBCB Bcb = NULL; + PCHAR Zeros = NULL; + PMDL ZeroMdl = NULL; + ULONG MaxBytesMappedInMdl = 0; + BOOLEAN Result = TRUE; + + DebugTrace(+1, me, "CcZeroData\n", 0 ); + + WriteThrough = (BOOLEAN)(((FileObject->Flags & FO_WRITE_THROUGH) != 0) || + (FileObject->PrivateCacheMap == NULL)); + + // + // If the caller specified Wait, but the FileObject is WriteThrough, + // then we need to just get out. + // + + if (WriteThrough && !Wait) { + + DebugTrace(-1, me, "CcZeroData->FALSE (WriteThrough && !Wait)\n", 0 ); + + return FALSE; + } + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + FOffset = *StartOffset; + + // + // Calculate how much to zero this time. + // + + ToGo.QuadPart = EndOffset->QuadPart - FOffset.QuadPart; + + // + // We will only do zeroing in the cache if the caller is using a + // cached file object, and did not specify WriteThrough. We are + // willing to zero some data in the cache if our total is not too + // much, or there is sufficient available pages. + // + + if (((ToGo.QuadPart <= 0x2000) || + (MmAvailablePages >= ((MAX_ZEROS_IN_CACHE / PAGE_SIZE) * 4))) && !WriteThrough) { + + try { + + while (MaxZerosInCache != 0) { + + ULONG ReceivedLength; + LARGE_INTEGER BeyondLastByte; + + if ( ToGo.QuadPart > (LONGLONG)MaxZerosInCache ) { + + // + // If Wait == FALSE, then there is no point in getting started, + // because we would have to start all over again zeroing with + // Wait == TRUE, since we would fall out of this loop and + // start synchronously writing pages to disk. + // + + if (!Wait) { + + DebugTrace(-1, me, "CcZeroData -> FALSE\n", 0 ); + + try_return( Result = FALSE ); + } + } + else { + MaxZerosInCache = ToGo.LowPart; + } + + // + // Call local routine to Map or Access the file data, then zero the data, + // then call another local routine to free the data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + // Note that this call may result in an exception, however, if it + // does no Bcb is returned and this routine has absolutely no + // cleanup to perform. Therefore, we do not have a try-finally + // and we allow the possibility that we will simply be unwound + // without notice. + // + + if (!CcPinFileData( FileObject, + &FOffset, + MaxZerosInCache, + FALSE, + TRUE, + Wait, + &Bcb, + &CacheBuffer, + &BeyondLastByte )) { + + DebugTrace(-1, me, "CcZeroData -> FALSE\n", 0 ); + + try_return( Result = FALSE ); + } + + // + // Calculate how much data is described by Bcb starting at our desired + // file offset. If it is more than we need, we will zero the whole thing + // anyway. + // + + ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart ); + + // + // Now attempt to allocate an Mdl to describe the mapped data. + // + + ZeroMdl = IoAllocateMdl( CacheBuffer, + ReceivedLength, + FALSE, + FALSE, + NULL ); + + if (ZeroMdl == NULL) { + + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // It is necessary to probe and lock the pages, or else + // the pages may not still be in memory when we do the + // MmSetAddressRangeModified for the dirty Bcb. + // + + MmDisablePageFaultClustering(&SavedState); + MmProbeAndLockPages( ZeroMdl, KernelMode, IoReadAccess ); + MmEnablePageFaultClustering(SavedState); + SavedState = 0; + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data, and advance buffer pointer. + // + + FOffset = BeyondLastByte; + + // + // Figure out how many bytes we are allowed to zero in the cache. + // Note it is possible we have zeroed a little more than our maximum, + // because we hit an existing Bcb that extended beyond the range. + // + + if (MaxZerosInCache <= ReceivedLength) { + MaxZerosInCache = 0; + } + else { + MaxZerosInCache -= ReceivedLength; + } + + // + // Now set the Bcb dirty. We have to explicitly set the address + // range modified here, because that work otherwise gets deferred + // to the Lazy Writer. + // + + MmSetAddressRangeModified( CacheBuffer, ReceivedLength ); + CcSetDirtyPinnedData( Bcb, NULL ); + + // + // Unmap the data now + // + + CcUnpinFileData( Bcb, FALSE, UNPIN ); + Bcb = NULL; + + // + // Unlock and free the Mdl (we only loop back if we crossed + // a 256KB boundary. + // + + MmUnlockPages( ZeroMdl ); + IoFreeMdl( ZeroMdl ); + ZeroMdl = NULL; + } + + try_exit: NOTHING; + } finally { + + if (SavedState != 0) { + MmEnablePageFaultClustering(SavedState); + } + + // + // Clean up only necessary in abnormal termination. + // + + if (Bcb != NULL) { + + CcUnpinFileData( Bcb, FALSE, UNPIN ); + } + + // + // Since the last thing in the above loop which can + // fail is the MmProbeAndLockPages, we only need to + // free the Mdl here. + // + + if (ZeroMdl != NULL) { + + IoFreeMdl( ZeroMdl ); + } + } + + // + // If hit a wait condition above, return it now. + // + + if (!Result) { + return FALSE; + } + + // + // If we finished, get out nbow. + // + + if ( FOffset.QuadPart >= EndOffset->QuadPart ) { + return TRUE; + } + } + + // + // We either get here because we decided above not to zero anything in + // the cache directly, or else we zeroed up to our maximum and still + // have some left to zero direct to the file on disk. In either case, + // we will now zero from FOffset to *EndOffset, and then flush this + // range in case the file is cached/mapped, and there are modified + // changes in memory. + // + + // + // try-finally to guarantee cleanup. + // + + try { + PULONG Page; + ULONG SavedByteCount; + LARGE_INTEGER SizeLeft; + + // + // Round FOffset and EndOffset up to sector boundaries, since + // we will be doing disk I/O, and calculate size left. + // + + i = IoGetRelatedDeviceObject(FileObject)->SectorSize - 1; + FOffset.QuadPart += (LONGLONG)i; + FOffset.LowPart &= ~i; + SizeLeft.QuadPart = EndOffset->QuadPart + (LONGLONG)i; + SizeLeft.LowPart &= ~i; + SizeLeft.QuadPart -= FOffset.QuadPart; + + if (SizeLeft.QuadPart == 0) { + return TRUE; + } + + // + // Allocate a page to hold the zeros we will write, and + // zero it. + // + + ZeroBytes = MmNumberOfColors * PAGE_SIZE; + + if (SizeLeft.QuadPart < (LONGLONG)ZeroBytes) { + ZeroBytes = SizeLeft.LowPart; + } + + Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes ); + + if (Zeros != NULL) { + + // + // Allocate and initialize an Mdl to describe the zeros + // we need to transfer. Allocate to cover the maximum + // size required, and we will use and reuse it in the + // loop below, initialized correctly. + // + + ZeroTransfer = MAX_ZERO_TRANSFER; + + if (ZeroBytes < MmNumberOfColors * PAGE_SIZE) { + ZeroTransfer = ZeroBytes; + } + + ZeroMdl = IoAllocateMdl( Zeros, ZeroTransfer, FALSE, FALSE, NULL ); + + if (ZeroMdl == NULL) { + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // Now we will temporarily lock the allocated pages + // only, and then replicate the page frame numbers through + // the entire Mdl to keep writing the same pages of zeros. + // + + SavedByteCount = ZeroMdl->ByteCount; + ZeroMdl->ByteCount = ZeroBytes; + MmBuildMdlForNonPagedPool( ZeroMdl ); + + ZeroMdl->MdlFlags &= ~MDL_SOURCE_IS_NONPAGED_POOL; + ZeroMdl->MdlFlags |= MDL_PAGES_LOCKED; + ZeroMdl->MappedSystemVa = NULL; + ZeroMdl->ByteCount = SavedByteCount; + Page = (PULONG)(ZeroMdl + 1); + for (i = MmNumberOfColors; + i < (COMPUTE_PAGES_SPANNED( 0, SavedByteCount )); + i++) { + + *(Page + i) = *(Page + i - MmNumberOfColors); + } + + // + // We failed to allocate the space we wanted, so we will go to + // half of page of must succeed pool. + // + + } else { + + ZeroBytes = PAGE_SIZE / 2; + Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes ); + + // + // If we cannot get even that much, then let's write a sector at a time. + // + + if (Zeros == NULL) { + ZeroBytes = IoGetRelatedDeviceObject(FileObject)->SectorSize; + Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes ); + } + + // + // Allocate and initialize an Mdl to describe the zeros + // we need to transfer. Allocate to cover the maximum + // size required, and we will use and reuse it in the + // loop below, initialized correctly. + // + + ZeroTransfer = ZeroBytes; + ZeroMdl = IoAllocateMdl( Zeros, ZeroBytes, FALSE, FALSE, NULL ); + + if ((Zeros == NULL) || (ZeroMdl == NULL)) { + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + + // + // Now we will lock the allocated pages + // + + MmBuildMdlForNonPagedPool( ZeroMdl ); + } + +#ifdef MIPS +#ifdef MIPS_PREFILL + RtlFillMemory( Zeros, ZeroBytes, 0xDD ); + KeSweepDcache( TRUE ); +#endif +#endif + + // + // Zero the buffer now. + // + + RtlZeroMemory( Zeros, ZeroBytes ); + + // + // Map the full Mdl even if we will only use a part of it. This + // allow the unmapping operation to be deterministic. + // + + (VOID)MmGetSystemAddressForMdl(ZeroMdl); + MaxBytesMappedInMdl = ZeroMdl->ByteCount; + + // + // Now loop to write buffers full of zeros through to the file + // until we reach the starting Vbn for the transfer. + // + + while ( SizeLeft.QuadPart != 0 ) { + + IO_STATUS_BLOCK IoStatus; + NTSTATUS Status; + KEVENT Event; + + // + // See if we really need to write that many zeros, and + // trim the size back if not. + // + + if ( (LONGLONG)ZeroTransfer > SizeLeft.QuadPart ) { + + ZeroTransfer = SizeLeft.LowPart; + } + + // + // (Re)initialize the kernel event to FALSE. + // + + KeInitializeEvent( &Event, NotificationEvent, FALSE ); + + // + // Initiate and wait for the synchronous transfer. + // + + ZeroMdl->ByteCount = ZeroTransfer; + + Status = IoSynchronousPageWrite( FileObject, + ZeroMdl, + &FOffset, + &Event, + &IoStatus ); + + // + // If pending is returned (which is a successful status), + // we must wait for the request to complete. + // + + if (Status == STATUS_PENDING) { + KeWaitForSingleObject( &Event, + Executive, + KernelMode, + FALSE, + (PLARGE_INTEGER)NULL); + } + + + // + // If we got an error back in Status, then the Iosb + // was not written, so we will just copy the status + // there, then test the final status after that. + // + + if (!NT_SUCCESS(Status)) { + ExRaiseStatus( Status ); + } + + if (!NT_SUCCESS(IoStatus.Status)) { + ExRaiseStatus( IoStatus.Status ); + } + + // + // If we succeeded, then update where we are at by how much + // we wrote, and loop back to see if there is more. + // + + FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)ZeroTransfer; + SizeLeft.QuadPart = SizeLeft.QuadPart - (LONGLONG)ZeroTransfer; + } + } + finally{ + + // + // Clean up anything from zeroing pages on a noncached + // write. + // + + if (ZeroMdl != NULL) { + + if ((MaxBytesMappedInMdl != 0) && + !FlagOn(ZeroMdl->MdlFlags, MDL_SOURCE_IS_NONPAGED_POOL)) { + ZeroMdl->ByteCount = MaxBytesMappedInMdl; + MmUnmapLockedPages (ZeroMdl->MappedSystemVa, ZeroMdl); + } + + IoFreeMdl( ZeroMdl ); + } + + if (Zeros != NULL) { + ExFreePool( Zeros ); + } + + DebugTrace(-1, me, "CcZeroData -> TRUE\n", 0 ); + } + + return TRUE; +} + + +PFILE_OBJECT +CcGetFileObjectFromSectionPtrs ( + IN PSECTION_OBJECT_POINTERS SectionObjectPointer + ) + +/*++ + +This routine may be used to retrieve a pointer to the FileObject that the +Cache Manager is using for a given file from the Section Object Pointers +in the nonpaged File System structure Fcb. The use of this function is +intended for exceptional use unrelated to the processing of user requests, +when the File System would otherwise not have a FileObject at its disposal. +An example is for mount verification. + +Note that the File System is responsible for insuring that the File +Object does not go away while in use. It is impossible for the Cache +Manager to guarantee this. + +Arguments: + + SectionObjectPointer - A pointer to the Section Object Pointers + structure in the nonpaged Fcb. + +Return Value: + + Pointer to the File Object, or NULL if the file is not cached or no + longer cached + +--*/ + +{ + KIRQL OldIrql; + PFILE_OBJECT FileObject = NULL; + + // + // Serialize with Creation/Deletion of all Shared CacheMaps + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + if (SectionObjectPointer->SharedCacheMap != NULL) { + + FileObject = ((PSHARED_CACHE_MAP)SectionObjectPointer->SharedCacheMap)->FileObject; + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + return FileObject; +} + + +PFILE_OBJECT +CcGetFileObjectFromBcb ( + IN PVOID Bcb + ) + +/*++ + +This routine may be used to retrieve a pointer to the FileObject that the +Cache Manager is using for a given file from a Bcb of that file. + +Note that the File System is responsible for insuring that the File +Object does not go away while in use. It is impossible for the Cache +Manager to guarantee this. + +Arguments: + + Bcb - A pointer to the pinned Bcb. + +Return Value: + + Pointer to the File Object, or NULL if the file is not cached or no + longer cached + +--*/ + +{ + return ((PBCB)Bcb)->SharedCacheMap->FileObject; +} diff --git a/private/ntos/cache/lazyrite.c b/private/ntos/cache/lazyrite.c new file mode 100644 index 000000000..d61b0864d --- /dev/null +++ b/private/ntos/cache/lazyrite.c @@ -0,0 +1,732 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + lazyrite.c + +Abstract: + + This module implements the lazy writer for the Cache subsystem. + +Author: + + Tom Miller [TomM] 22-July-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// The Bug check file id for this module +// + +#define BugCheckFileId (CACHE_BUG_CHECK_LAZYRITE) + +// +// Define our debug constant +// + +#define me 0x00000020 + +// +// Local support routines +// + +PWORK_QUEUE_ENTRY +CcReadWorkQueue ( + ); + +VOID +CcLazyWriteScan ( + ); + + +VOID +CcScheduleLazyWriteScan ( + ) + +/*++ + +Routine Description: + + This routine may be called to schedule the next lazy writer scan, + during which lazy write and lazy close activity is posted to other + worker threads. Callers should acquire the lazy writer spin lock + to see if the scan is currently active, and then call this routine + still holding the spin lock if not. One special call is used at + the end of the lazy write scan to propagate lazy write active once + we go active. This call is "the" scan thread, and it can therefore + safely schedule the next scan without taking out the spin lock. + +Arguments: + + None + +Return Value: + + None. + +--*/ + +{ + // + // It is important to set the active flag TRUE first for the propagate + // case, because it is conceivable that once the timer is set, another + // thread could actually run and make the scan go idle before we then + // jam the flag TRUE. + // + // When going from idle to active, we delay a little longer to let the + // app finish saving its file. + // + + if (LazyWriter.ScanActive) { + + KeSetTimer( &LazyWriter.ScanTimer, CcIdleDelay, &LazyWriter.ScanDpc ); + + } else { + + LazyWriter.ScanActive = TRUE; + KeSetTimer( &LazyWriter.ScanTimer, CcFirstDelay, &LazyWriter.ScanDpc ); + } +} + + +VOID +CcScanDpc ( + IN PKDPC Dpc, + IN PVOID DeferredContext, + IN PVOID SystemArgument1, + IN PVOID SystemArgument2 + ) + +/*++ + +Routine Description: + + This is the Dpc routine which runs when the scan timer goes off. It + simply posts an element for an Ex Worker thread to do the scan. + +Arguments: + + (All are ignored) + +Return Value: + + None. + +--*/ + +{ + PWORK_QUEUE_ENTRY WorkQueueEntry; + + UNREFERENCED_PARAMETER(Dpc); + UNREFERENCED_PARAMETER(DeferredContext); + UNREFERENCED_PARAMETER(SystemArgument1); + UNREFERENCED_PARAMETER(SystemArgument2); + + WorkQueueEntry = CcAllocateWorkQueueEntry(); + + // + // If we failed to allocate a WorkQueueEntry, things must + // be in pretty bad shape. However, all we have to do is + // say we are not active, and wait for another event to + // wake things up again. + // + + if (WorkQueueEntry == NULL) { + + LazyWriter.ScanActive = FALSE; + + } else { + + // + // Otherwise post a work queue entry to do the scan. + // + + WorkQueueEntry->Function = (UCHAR)LazyWriteScan; + + CcPostWorkQueue( WorkQueueEntry, &CcRegularWorkQueue ); + } +} + + +VOID +CcLazyWriteScan ( + ) + +/*++ + +Routine Description: + + This routine implements the Lazy Writer scan for dirty data to flush + or any other work to do (lazy close). This routine is scheduled by + calling CcScheduleLazyWriteScan. + +Arguments: + + None. + +Return Value: + + None. + +--*/ + +{ + ULONG PagesToWrite, ForegroundRate, EstimatedDirtyNextInterval; + PSHARED_CACHE_MAP SharedCacheMap, FirstVisited; + KIRQL OldIrql; + ULONG LoopsWithLockHeld = 0; + BOOLEAN AlreadyMoved = FALSE; + + // + // Top of Lazy Writer scan. + // + + try { + + // + // If there is no work to do, then we will go inactive, and return. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + if ((CcTotalDirtyPages == 0) && !LazyWriter.OtherWork) { + + LazyWriter.ScanActive = FALSE; + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + return; + } + + // + // Acquire the Lazy Writer spinlock, calculate the next sweep time + // stamp, then update all relevant fields for the next time around. + // Also we can clear the OtherWork flag. + // + + LazyWriter.OtherWork = FALSE; + + // + // Assume we will write our usual fraction of dirty pages. Do not do the + // divide if there is not enough dirty pages, or else we will never write + // the last few pages. + // + + PagesToWrite = CcTotalDirtyPages; + if (PagesToWrite > LAZY_WRITER_MAX_AGE_TARGET) { + PagesToWrite /= LAZY_WRITER_MAX_AGE_TARGET; + } + + // + // Estimate the rate of dirty pages being produced in the foreground. + // This is the total number of dirty pages now plus the number of dirty + // pages we scheduled to write last time, minus the number of dirty + // pages we have now. Throw out any cases which would not produce a + // positive rate. + // + + ForegroundRate = 0; + + if ((CcTotalDirtyPages + CcPagesWrittenLastTime) > CcDirtyPagesLastScan) { + ForegroundRate = (CcTotalDirtyPages + CcPagesWrittenLastTime) - + CcDirtyPagesLastScan; + } + + // + // If we estimate that we will exceed our dirty page target by the end + // of this interval, then we must write more. Try to arrive on target. + // + + EstimatedDirtyNextInterval = CcTotalDirtyPages - PagesToWrite + ForegroundRate; + + if (EstimatedDirtyNextInterval > CcDirtyPageTarget) { + PagesToWrite += EstimatedDirtyNextInterval - CcDirtyPageTarget; + } + + // + // Now save away the number of dirty pages and the number of pages we + // just calculated to write. + // + + CcDirtyPagesLastScan = CcTotalDirtyPages; + CcPagesYetToWrite = CcPagesWrittenLastTime = PagesToWrite; + + // + // Loop to flush enough Shared Cache Maps to write the number of pages + // we just calculated. + // + + SharedCacheMap = CONTAINING_RECORD( CcLazyWriterCursor.SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + + DebugTrace( 0, me, "Start of Lazy Writer Scan\n", 0 ); + + // + // Normally we would just like to visit every Cache Map once on each scan, + // so the scan will terminate normally when we return to FirstVisited. But + // in the off chance that FirstVisited gets deleted, we are guaranteed to stop + // when we get back to our own listhead. + // + + FirstVisited = NULL; + while ((SharedCacheMap != FirstVisited) && + (&SharedCacheMap->SharedCacheMapLinks != &CcLazyWriterCursor.SharedCacheMapLinks)) { + + if (FirstVisited == NULL) { + FirstVisited = SharedCacheMap; + } + + // + // Skip the SharedCacheMap if a write behind request is + // already queued, write behind has been disabled, or + // if there is no work to do (either dirty data to be written + // or a delete is required). + // + // Note that for streams where modified writing is disabled, we + // need to take out Bcbs exclusive, which serializes with foreground + // activity. Therefore we use a special counter in the SharedCacheMap + // to only service these once every n intervals. + // + // Skip temporary files unless we currently could not write 196KB + // + + if (!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR) + + && + + (((PagesToWrite != 0) && (SharedCacheMap->DirtyPages != 0) && + (((++SharedCacheMap->LazyWritePassCount & 0xF) == 0) || + !FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) || + (CcCapturedSystemSize == MmSmallSystem) || + (SharedCacheMap->DirtyPages >= (4 * (MAX_WRITE_BEHIND / PAGE_SIZE)))) && + (!FlagOn(SharedCacheMap->FileObject->Flags, FO_TEMPORARY_FILE) || + !CcCanIWrite(SharedCacheMap->FileObject, 0x30000, FALSE, MAXUCHAR))) + + || + + (SharedCacheMap->OpenCount == 0))) { + + PWORK_QUEUE_ENTRY WorkQueueEntry; + + // + // If this is a metadata stream with at least 4 times + // the maximum write behind I/O size, then let's tell + // this guy to write 1/8 of his dirty data on this pass + // so it doesn't build up. + // + // Else assume we can write everything (PagesToWrite only affects + // metadata streams - otherwise writing is controlled by the Mbcb). + // + + SharedCacheMap->PagesToWrite = SharedCacheMap->DirtyPages; + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) && + (SharedCacheMap->PagesToWrite >= (4 * (MAX_WRITE_BEHIND / PAGE_SIZE))) && + (CcCapturedSystemSize != MmSmallSystem)) { + + SharedCacheMap->PagesToWrite /= 8; + } + + // + // See if he exhausts the number of pages to write. (We + // keep going in case there are any closes to do.) + // + + if ((SharedCacheMap->PagesToWrite >= PagesToWrite) && !AlreadyMoved) { + + // + // If we met our write quota on a given SharedCacheMap, then make sure + // we start at him on the next scan, unless it is a metadata stream. + // + + RemoveEntryList( &CcLazyWriterCursor.SharedCacheMapLinks ); + + // + // For Metadata streams, set up to resume on the next stream on the + // next scan. + // + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { + InsertHeadList( &SharedCacheMap->SharedCacheMapLinks, &CcLazyWriterCursor.SharedCacheMapLinks ); + + // + // For other streams, set up to resume on the same stream on the + // next scan. + // + + } else { + InsertTailList( &SharedCacheMap->SharedCacheMapLinks, &CcLazyWriterCursor.SharedCacheMapLinks ); + } + + PagesToWrite = 0; + AlreadyMoved = TRUE; + + } else { + + PagesToWrite -= SharedCacheMap->PagesToWrite; + } + + // + // Otherwise show we are actively writing, and keep it in the dirty + // list. + // + + SetFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages += 1; + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Queue the request to do the work to a worker thread. + // + + WorkQueueEntry = CcAllocateWorkQueueEntry(); + + // + // If we failed to allocate a WorkQueueEntry, things must + // be in pretty bad shape. However, all we have to do is + // break out of our current loop, and try to go back and + // delay a while. Even if the current guy should have gone + // away when we clear WRITE_QUEUED, we will find him again + // in the LW scan. + // + + if (WorkQueueEntry == NULL) { + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages -= 1; + break; + } + + WorkQueueEntry->Function = (UCHAR)WriteBehind; + WorkQueueEntry->Parameters.Write.SharedCacheMap = SharedCacheMap; + + // + // Post it to the regular work queue. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + SharedCacheMap->DirtyPages -= 1; + CcPostWorkQueue( WorkQueueEntry, &CcRegularWorkQueue ); + + LoopsWithLockHeld = 0; + + // + // Make sure we occassionally drop the lock. Set WRITE_QUEUED + // to keep the guy from going away. + // + + } else if ((++LoopsWithLockHeld >= 20) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) { + + SetFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages += 1; + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + LoopsWithLockHeld = 0; + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages -= 1; + } + + // + // Now loop back. + // + + SharedCacheMap = + CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + } + + DebugTrace( 0, me, "End of Lazy Writer Scan\n", 0 ); + + // + // Now we can release the global list and loop back, per chance to sleep. + // + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Now go ahead and schedule the next scan. + // + + CcScheduleLazyWriteScan(); + + // + // Basically, the Lazy Writer thread should never get an exception, + // so we put a try-except around it that bug checks one way or the other. + // Better we bug check here than worry about what happens if we let one + // get by. + // + + } except( CcExceptionFilter( GetExceptionCode() )) { + + CcBugCheck( GetExceptionCode(), 0, 0 ); + } +} + + +// +// Internal support routine +// + +LONG +CcExceptionFilter ( + IN NTSTATUS ExceptionCode + ) + +/*++ + +Routine Description: + + This is the standard exception filter for worker threads which simply + calls an FsRtl routine to see if an expected status is being raised. + If so, the exception is handled, else we bug check. + +Arguments: + + ExceptionCode - the exception code which was raised. + +Return Value: + + EXCEPTION_EXECUTE_HANDLER if expected, else a Bug Check occurs. + +--*/ + +{ + DebugTrace(0, 0, "CcExceptionFilter %08lx\n", ExceptionCode); +// DbgBreakPoint(); + + if (FsRtlIsNtstatusExpected( ExceptionCode )) { + + return EXCEPTION_EXECUTE_HANDLER; + + } else { + + return EXCEPTION_CONTINUE_SEARCH; + } +} + + + +// +// Internal support routine +// + +VOID +FASTCALL +CcPostWorkQueue ( + IN PWORK_QUEUE_ENTRY WorkQueueEntry, + IN PLIST_ENTRY WorkQueue + ) + +/*++ + +Routine Description: + + This routine queues a WorkQueueEntry, which has been allocated and + initialized by the caller, to the WorkQueue for FIFO processing by + the work threads. + +Arguments: + + WorkQueueEntry - supplies a pointer to the entry to queue + +Return Value: + + None + +--*/ + +{ + KIRQL OldIrql; + PLIST_ENTRY WorkerThreadEntry = NULL; + + ASSERT(FIELD_OFFSET(WORK_QUEUE_ITEM, List) == 0); + + DebugTrace(+1, me, "CcPostWorkQueue:\n", 0 ); + DebugTrace( 0, me, " WorkQueueEntry = %08lx\n", WorkQueueEntry ); + + // + // Queue the entry to the respective work queue. + // + + ExAcquireFastLock( &CcWorkQueueSpinlock, &OldIrql ); + InsertTailList( WorkQueue, &WorkQueueEntry->WorkQueueLinks ); + + // + // Now, if we have any more idle threads we can use, then activate + // one. + // + + if (!IsListEmpty(&CcIdleWorkerThreadList)) { + WorkerThreadEntry = RemoveHeadList( &CcIdleWorkerThreadList ); + } + ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql ); + + if (WorkerThreadEntry != NULL) { + + // + // I had to peak in the sources to verify that this routine + // is a noop if the Flink is not NULL. Sheeeeit! + // + + ((PWORK_QUEUE_ITEM)WorkerThreadEntry)->List.Flink = NULL; + ExQueueWorkItem( (PWORK_QUEUE_ITEM)WorkerThreadEntry, CriticalWorkQueue ); + } + + // + // And return to our caller + // + + DebugTrace(-1, me, "CcPostWorkQueue -> VOID\n", 0 ); + + return; +} + + +// +// Internal support routine +// + +VOID +CcWorkerThread ( + PVOID ExWorkQueueItem + ) + +/*++ + +Routine Description: + + This is worker thread routine for processing cache manager work queue + entries. + +Arguments: + + ExWorkQueueItem - The work item used for this thread + +Return Value: + + None + +--*/ + +{ + KIRQL OldIrql; + PWORK_QUEUE_ENTRY WorkQueueEntry; + BOOLEAN RescanOk = FALSE; + + ASSERT(FIELD_OFFSET(WORK_QUEUE_ENTRY, WorkQueueLinks) == 0); + + while (TRUE) { + + ExAcquireFastLock( &CcWorkQueueSpinlock, &OldIrql ); + + // + // First see if there is something in the express queue. + // + + if (!IsListEmpty(&CcExpressWorkQueue)) { + WorkQueueEntry = (PWORK_QUEUE_ENTRY)RemoveHeadList( &CcExpressWorkQueue ); + + // + // If there was nothing there, then try the regular queue. + // + + } else if (!IsListEmpty(&CcRegularWorkQueue)) { + WorkQueueEntry = (PWORK_QUEUE_ENTRY)RemoveHeadList( &CcRegularWorkQueue ); + + // + // Else we can break and go idle. + // + + } else { + break; + } + + ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql ); + + // + // Process the entry within a try-except clause, so that any errors + // will cause us to continue after the called routine has unwound. + // + + try { + + switch (WorkQueueEntry->Function) { + + // + // A read ahead or write behind request has been nooped (but + // left in the queue to keep the semaphore count right). + // + + case Noop: + break; + + // + // Perform read ahead + // + + case ReadAhead: + + DebugTrace( 0, me, "CcWorkerThread Read Ahead FileObject = %08lx\n", + WorkQueueEntry->Parameters.Read.FileObject ); + + CcPerformReadAhead( WorkQueueEntry->Parameters.Read.FileObject ); + + break; + + // + // Perform write behind + // + + case WriteBehind: + + DebugTrace( 0, me, "CcWorkerThread WriteBehind SharedCacheMap = %08lx\n", + WorkQueueEntry->Parameters.Write.SharedCacheMap ); + + RescanOk = (BOOLEAN)NT_SUCCESS(CcWriteBehind( WorkQueueEntry->Parameters.Write.SharedCacheMap )); + break; + + // + // Perform Lazy Write Scan + // + + case LazyWriteScan: + + DebugTrace( 0, me, "CcWorkerThread Lazy Write Scan\n", 0 ); + + CcLazyWriteScan(); + break; + } + + } + except( CcExceptionFilter( GetExceptionCode() )) { + + NOTHING; + } + + CcFreeWorkQueueEntry( WorkQueueEntry ); + } + + // + // No more work. Requeue our worker thread entry and get out. + // + + InsertTailList( &CcIdleWorkerThreadList, + &((PWORK_QUEUE_ITEM)ExWorkQueueItem)->List ); + + ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql ); + + if (!IsListEmpty(&CcDeferredWrites) && (CcTotalDirtyPages >= 20) && RescanOk) { + CcLazyWriteScan(); + } + + return; +} diff --git a/private/ntos/cache/logsup.c b/private/ntos/cache/logsup.c new file mode 100644 index 000000000..22739c051 --- /dev/null +++ b/private/ntos/cache/logsup.c @@ -0,0 +1,548 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + logsup.c + +Abstract: + + This module implements the special cache manager support for logging + file systems. + +Author: + + Tom Miller [TomM] 30-Jul-1991 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Define our debug constant +// + +#define me 0x0000040 + + +VOID +CcSetAdditionalCacheAttributes ( + IN PFILE_OBJECT FileObject, + IN BOOLEAN DisableReadAhead, + IN BOOLEAN DisableWriteBehind + ) + +/*++ + +Routine Description: + + This routine supports the setting of disable read ahead or disable write + behind flags to control Cache Manager operation. This routine may be + called any time after calling CcInitializeCacheMap. Initially both + read ahead and write behind are enabled. Note that the state of both + of these flags must be specified on each call to this routine. + +Arguments: + + FileObject - File object for which the respective flags are to be set. + + DisableReadAhead - FALSE to enable read ahead, TRUE to disable it. + + DisableWriteBehind - FALSE to enable write behind, TRUE to disable it. + +Return Value: + + None. + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + KIRQL OldIrql; + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // Now set the flags and return. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + if (DisableReadAhead) { + SetFlag(SharedCacheMap->Flags, DISABLE_READ_AHEAD); + } else { + ClearFlag(SharedCacheMap->Flags, DISABLE_READ_AHEAD); + } + if (DisableWriteBehind) { + SetFlag(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND | MODIFIED_WRITE_DISABLED); + } else { + ClearFlag(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND); + } + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); +} + + +VOID +CcSetLogHandleForFile ( + IN PFILE_OBJECT FileObject, + IN PVOID LogHandle, + IN PFLUSH_TO_LSN FlushToLsnRoutine + ) + +/*++ + +Routine Description: + + This routine may be called to instruct the Cache Manager to store the + specified log handle with the shared cache map for a file, to support + subsequent calls to the other routines in this module which effectively + perform an associative search for files by log handle. + +Arguments: + + FileObject - File for which the log handle should be stored. + + LogHandle - Log Handle to store. + + FlushToLsnRoutine - A routine to call before flushing buffers for this + file, to insure a log file is flushed to the most + recent Lsn for any Bcb being flushed. + +Return Value: + + None. + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // Now set the log file handle and flush routine + // + + SharedCacheMap->LogHandle = LogHandle; + SharedCacheMap->FlushToLsnRoutine = FlushToLsnRoutine; +} + + +LARGE_INTEGER +CcGetDirtyPages ( + IN PVOID LogHandle, + IN PDIRTY_PAGE_ROUTINE DirtyPageRoutine, + IN PVOID Context1, + IN PVOID Context2 + ) + +/*++ + +Routine Description: + + This routine may be called to return all of the dirty pages in all files + for a given log handle. Each page is returned by an individual call to + the Dirty Page Routine. The Dirty Page Routine is defined by a prototype + in ntos\inc\cache.h. + +Arguments: + + LogHandle - Log Handle which must match the log handle previously stored + for all files which are to be returned. + + DirtyPageRoutine -- The routine to call as each dirty page for this log + handle is found. + + Context1 - First context parameter to be passed to the Dirty Page Routine. + + Context2 - First context parameter to be passed to the Dirty Page Routine. + +Return Value: + + LARGE_INTEGER - Oldest Lsn found of all the dirty pages, or 0 if no dirty pages + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PBCB Bcb, BcbToUnpin; + KIRQL OldIrql; + NTSTATUS ExceptionStatus; + LARGE_INTEGER SavedFileOffset, SavedOldestLsn, SavedNewestLsn; + ULONG SavedByteLength; + ULONG LoopsWithLockHeld = 0; + LARGE_INTEGER OldestLsn = {0,0}; + + // + // Synchronize with changes to the SharedCacheMap list. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap = CONTAINING_RECORD( CcDirtySharedCacheMapList.SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + + BcbToUnpin = NULL; + while (&SharedCacheMap->SharedCacheMapLinks != &CcDirtySharedCacheMapList.SharedCacheMapLinks) { + + // + // Skip over cursors, SharedCacheMaps for other LogHandles, and ones with + // no dirty pages + // + + if (!FlagOn(SharedCacheMap->Flags, IS_CURSOR) && (SharedCacheMap->LogHandle == LogHandle) && + (SharedCacheMap->DirtyPages != 0)) { + + // + // This SharedCacheMap should stick around for a while in the dirty list. + // + + SharedCacheMap->OpenCount += 1; + SharedCacheMap->DirtyPages += 1; + + // + // Set our initial resume point and point to first Bcb in List. + // + + Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, BCB, BcbLinks ); + + // + // Scan to the end of the Bcb list. + // + + while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) { + + // + // If the Bcb is dirty, then capture the inputs for the + // callback routine so we can call without holding a spinlock. + // + + LoopsWithLockHeld += 1; + if ((Bcb->NodeTypeCode == CACHE_NTC_BCB) && Bcb->Dirty) { + + SavedFileOffset = Bcb->FileOffset; + SavedByteLength = Bcb->ByteLength; + SavedOldestLsn = Bcb->OldestLsn; + SavedNewestLsn = Bcb->NewestLsn; + + // + // Increment PinCount so the Bcb sticks around + // + + Bcb->PinCount += 1; + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + // + // Any Bcb to unpin from a previous loop? + // + + if (BcbToUnpin != NULL) { + CcUnpinFileData( BcbToUnpin, TRUE, UNPIN ); + BcbToUnpin = NULL; + } + + // + // Call the file system + // + + (*DirtyPageRoutine)( SharedCacheMap->FileObject, + &SavedFileOffset, + SavedByteLength, + &SavedOldestLsn, + &SavedNewestLsn, + Context1, + Context2 ); + + // + // Possibly update OldestLsn + // + + if ((SavedOldestLsn.QuadPart != 0) && + ((OldestLsn.QuadPart == 0) || (SavedOldestLsn.QuadPart < OldestLsn.QuadPart ))) { + OldestLsn = SavedOldestLsn; + } + + // + // Now reacquire the spinlock and scan from the resume point + // point to the next Bcb to return in the descending list. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + // + // Normally the Bcb can stay around a while, but if not, + // we will just remember it for the next time we do not + // have the spin lock. We cannot unpin it now, because + // we would lose our place in the list. + // + + if (Bcb->Dirty || (Bcb->PinCount > 1)) { + Bcb->PinCount -= 1; + } else { + BcbToUnpin = Bcb; + } + + // + // Normally the Bcb is not going away now, but if it is + // we need to free it by calling the normal routine + + LoopsWithLockHeld = 0; + } + + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Flink, BCB, BcbLinks ); + } + + // + // We need to unpin any Bcb we are holding before moving on to + // the next SharedCacheMap, or else CcDeleteSharedCacheMap will + // also delete this Bcb. + // + + if (BcbToUnpin != NULL) { + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + CcUnpinFileData( BcbToUnpin, TRUE, UNPIN ); + BcbToUnpin = NULL; + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + } + + // + // Now release the SharedCacheMap, leaving it in the dirty list. + // + + SharedCacheMap->OpenCount -= 1; + SharedCacheMap->DirtyPages -= 1; + } + + // + // Make sure we occassionally drop the lock. Set WRITE_QUEUED + // to keep the guy from going away, and increment DirtyPages to + // keep in in this list. + // + + if ((++LoopsWithLockHeld >= 20) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) { + + SetFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages += 1; + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + LoopsWithLockHeld = 0; + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages -= 1; + } + + // + // Now loop back for the next cache map. + // + + SharedCacheMap = + CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + return OldestLsn; +} + + +BOOLEAN +CcIsThereDirtyData ( + IN PVPB Vpb + ) + +/*++ + +Routine Description: + + This routine returns TRUE if the specified Vcb has any unwritten dirty + data in the cache. + +Arguments: + + Vpb - specifies Vpb to check for + +Return Value: + + FALSE - if the Vpb has no dirty data + TRUE - if the Vpb has dirty data + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + KIRQL OldIrql; + ULONG LoopsWithLockHeld = 0; + + // + // Synchronize with changes to the SharedCacheMap list. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap = CONTAINING_RECORD( CcDirtySharedCacheMapList.SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + + while (&SharedCacheMap->SharedCacheMapLinks != &CcDirtySharedCacheMapList.SharedCacheMapLinks) { + + // + // Look at this one if the Vpb matches and if there is dirty data. + // For what it's worth, don't worry about dirty data in temporary files, + // as that should not concern the caller if it wants to dismount. + // + + if (!FlagOn(SharedCacheMap->Flags, IS_CURSOR) && + (SharedCacheMap->FileObject->Vpb == Vpb) && + (SharedCacheMap->DirtyPages != 0) && + !FlagOn(SharedCacheMap->FileObject->Flags, FO_TEMPORARY_FILE)) { + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + return TRUE; + } + + // + // Make sure we occassionally drop the lock. Set WRITE_QUEUED + // to keep the guy from going away, and increment DirtyPages to + // keep in in this list. + // + + if ((++LoopsWithLockHeld >= 20) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) { + + SetFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages += 1; + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + LoopsWithLockHeld = 0; + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); + SharedCacheMap->DirtyPages -= 1; + } + + // + // Now loop back for the next cache map. + // + + SharedCacheMap = + CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink, + SHARED_CACHE_MAP, + SharedCacheMapLinks ); + } + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + return FALSE; +} + +LARGE_INTEGER +CcGetLsnForFileObject( + IN PFILE_OBJECT FileObject, + OUT PLARGE_INTEGER OldestLsn OPTIONAL + ) + +/*++ + +Routine Description: + + This routine returns the oldest and newest LSNs for a file object. + +Arguments: + + FileObject - File for which the log handle should be stored. + + OldestLsn - pointer to location to store oldest LSN for file object. + +Return Value: + + The newest LSN for the file object. + +--*/ + +{ + PBCB Bcb; + KIRQL OldIrql; + LARGE_INTEGER Oldest, Newest; + PSHARED_CACHE_MAP SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // initialize lsn variables + // + + Oldest.LowPart = 0; + Oldest.HighPart = 0; + Newest.LowPart = 0; + Newest.HighPart = 0; + + if(SharedCacheMap == NULL) { + return Oldest; + } + + ExAcquireFastLock(&CcMasterSpinLock, &OldIrql); + + // + // Now point to first Bcb in List, and loop through it. + // + + Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, BCB, BcbLinks ); + + while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) { + + // + // If the Bcb is dirty then capture the oldest and newest lsn + // + + + if ((Bcb->NodeTypeCode == CACHE_NTC_BCB) && Bcb->Dirty) { + + LARGE_INTEGER BcbLsn, BcbNewest; + + BcbLsn = Bcb->OldestLsn; + BcbNewest = Bcb->NewestLsn; + + if ((BcbLsn.QuadPart != 0) && + ((Oldest.QuadPart == 0) || + (BcbLsn.QuadPart < Oldest.QuadPart))) { + + Oldest = BcbLsn; + } + + if ((BcbLsn.QuadPart != 0) && (BcbNewest.QuadPart > Newest.QuadPart)) { + + Newest = BcbNewest; + } + } + + + Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Flink, BCB, BcbLinks ); + } + + // + // Now release the spin lock for this Bcb list and generate a callback + // if we got something. + // + + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + + if (ARGUMENT_PRESENT(OldestLsn)) { + + *OldestLsn = Oldest; + } + + return Newest; +} diff --git a/private/ntos/cache/mdlsup.c b/private/ntos/cache/mdlsup.c new file mode 100644 index 000000000..0435e7283 --- /dev/null +++ b/private/ntos/cache/mdlsup.c @@ -0,0 +1,999 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + mdlsup.c + +Abstract: + + This module implements the Mdl support routines for the Cache subsystem. + +Author: + + Tom Miller [TomM] 4-May-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Debug Trace Level +// + +#define me (0x00000010) + +VOID +CcMdlRead ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + OUT PMDL *MdlChain, + OUT PIO_STATUS_BLOCK IoStatus + ) + +/*++ + +Routine Description: + + This routine attempts to lock the specified file data in the cache + and return a description of it in an Mdl along with the correct + I/O status. It is *not* safe to call this routine from Dpc level. + + This routine is synchronous, and raises on errors. + + As each call returns, the pages described by the Mdl are + locked in memory, but not mapped in system space. If the caller + needs the pages mapped in system space, then it must map them. + + Note that each call is a "single shot" which should be followed by + a call to CcMdlReadComplete. To resume an Mdl-based transfer, the + caller must form one or more subsequent calls to CcMdlRead with + appropriately adjusted parameters. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + MdlChain - On output it returns a pointer to an Mdl chain describing + the desired data. Note that even if FALSE is returned, + one or more Mdls may have been allocated, as may be ascertained + by the IoStatus.Information field (see below). + + IoStatus - Pointer to standard I/O status block to receive the status + for the transfer. (STATUS_SUCCESS guaranteed for cache + hits, otherwise the actual I/O status is returned.) The + I/O Information Field indicates how many bytes have been + successfully locked down in the Mdl Chain. + +Return Value: + + None + +Raises: + + STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PPRIVATE_CACHE_MAP PrivateCacheMap; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + PMDL Mdl; + PMDL MdlTemp; + ULONG SavedState = 0; + ULONG OriginalLength = Length; + ULONG Information = 0; + PVACB Vacb = NULL; + ULONG SavedMissCounter = 0; + + KIRQL OldIrql; + ULONG ActivePage; + ULONG PageIsDirty; + PVACB ActiveVacb = NULL; + + DebugTrace(+1, me, "CcMdlRead\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + PrivateCacheMap = FileObject->PrivateCacheMap; + + // + // See if we have an active Vacb, that we need to free. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // If there is an end of a page to be zeroed, then free that page now, + // so we don't send Greg the uninitialized data... + // + + if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // If read ahead is enabled, then do the read ahead here so it + // overlaps with the copy (otherwise we will do it below). + // Note that we are assuming that we will not get ahead of our + // current transfer - if read ahead is working it should either + // already be in memory or else underway. + // + + if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) { + CcScheduleReadAhead( FileObject, FileOffset, Length ); + } + + // + // Increment performance counters + // + + CcMdlReadWait += 1; + + // + // This is not an exact solution, but when IoPageRead gets a miss, + // it cannot tell whether it was CcCopyRead or CcMdlRead, but since + // the miss should occur very soon, by loading the pointer here + // probably the right counter will get incremented, and in any case, + // we hope the errrors average out! + // + + CcMissCounter = &CcMdlReadWaitMiss; + + FOffset = *FileOffset; + + // + // Check for read past file size, the caller must filter this case out. + // + + ASSERT( ( FOffset.QuadPart + (LONGLONG)Length ) <= SharedCacheMap->FileSize.QuadPart ); + + // + // Put try-finally around the loop to deal with any exceptions + // + + try { + + // + // Not all of the transfer will come back at once, so we have to loop + // until the entire transfer is complete. + // + + while (Length != 0) { + + ULONG ReceivedLength; + LARGE_INTEGER BeyondLastByte; + + // + // Map the data and read it in (if necessary) with the + // MmProbeAndLockPages call below. + // + + CacheBuffer = CcGetVirtualAddress( SharedCacheMap, + FOffset, + &Vacb, + &ReceivedLength ); + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength; + + // + // Now attempt to allocate an Mdl to describe the mapped data. + // + + DebugTrace( 0, mm, "IoAllocateMdl:\n", 0 ); + DebugTrace( 0, mm, " BaseAddress = %08lx\n", CacheBuffer ); + DebugTrace( 0, mm, " Length = %08lx\n", ReceivedLength ); + + Mdl = IoAllocateMdl( CacheBuffer, + ReceivedLength, + FALSE, + FALSE, + NULL ); + + DebugTrace( 0, mm, " Next != NULL) { + MdlTemp = MdlTemp->Next; + } + MdlTemp->Next = Mdl; + } + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data. + // + + FOffset = BeyondLastByte; + + // + // Update number of bytes transferred. + // + + Information += ReceivedLength; + + // + // Calculate length left to transfer. + // + + Length -= ReceivedLength; + } + } + finally { + + CcMissCounter = &CcThrowAway; + + if (AbnormalTermination()) { + + if (SavedState != 0) { + MmEnablePageFaultClustering(SavedState); + } + + // + // We may have failed to allocate an Mdl while still having + // data mapped. + // + + if (Vacb != NULL) { + CcFreeVirtualAddress( Vacb ); + } + + // + // Otherwise loop to deallocate the Mdls + // + + while (*MdlChain != NULL) { + MdlTemp = (*MdlChain)->Next; + + DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 ); + DebugTrace( 0, mm, " Mdl = %08lx\n", *MdlChain ); + + MmUnlockPages( *MdlChain ); + IoFreeMdl( *MdlChain ); + + *MdlChain = MdlTemp; + } + + DebugTrace(-1, me, "CcMdlRead -> Unwinding\n", 0 ); + + } + else { + + // + // Now enable read ahead if it looks like we got any misses, and do + // the first one. + // + + if (!PrivateCacheMap->ReadAheadEnabled && (SavedMissCounter != 0)) { + + PrivateCacheMap->ReadAheadEnabled = TRUE; + CcScheduleReadAhead( FileObject, FileOffset, OriginalLength ); + } + + // + // Now that we have described our desired read ahead, let's + // shift the read history down. + // + + PrivateCacheMap->FileOffset1 = PrivateCacheMap->FileOffset2; + PrivateCacheMap->BeyondLastByte1 = PrivateCacheMap->BeyondLastByte2; + PrivateCacheMap->FileOffset2 = *FileOffset; + PrivateCacheMap->BeyondLastByte2.QuadPart = + FileOffset->QuadPart + (LONGLONG)OriginalLength; + + IoStatus->Status = STATUS_SUCCESS; + IoStatus->Information = Information; + } + } + + + DebugTrace( 0, me, " Status, + IoStatus->Information ); + DebugTrace(-1, me, "CcMdlRead -> VOID\n", 0 ); + + return; +} + + +// +// First we have the old routine which checks for an entry in the FastIo vector. +// This routine becomes obsolete for every component that compiles with the new +// definition of FsRtlMdlReadComplete in fsrtl.h. +// + +VOID +CcMdlReadComplete ( + IN PFILE_OBJECT FileObject, + IN PMDL MdlChain + ) + +{ + PDEVICE_OBJECT DeviceObject; + PFAST_IO_DISPATCH FastIoDispatch; + + DeviceObject = IoGetRelatedDeviceObject( FileObject ); + FastIoDispatch = DeviceObject->DriverObject->FastIoDispatch; + + if ((FastIoDispatch != NULL) && + (FastIoDispatch->SizeOfFastIoDispatch > FIELD_OFFSET(FAST_IO_DISPATCH, MdlWriteComplete)) && + (FastIoDispatch->MdlReadComplete != NULL)) { + + FastIoDispatch->MdlReadComplete( FileObject, MdlChain, DeviceObject ); + + } else { + CcMdlReadComplete2( FileObject, MdlChain ); + } +} + +VOID +CcMdlReadComplete2 ( + IN PFILE_OBJECT FileObject, + IN PMDL MdlChain + ) + +/*++ + +Routine Description: + + This routine must be called at IPL0 after a call to CcMdlRead. The + caller must simply supply the address of the MdlChain returned in + CcMdlRead. + + This call does the following: + + Deletes the MdlChain + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + MdlChain - same as returned from corresponding call to CcMdlRead. + +Return Value: + + None. +--*/ + +{ + PMDL MdlNext; + + DebugTrace(+1, me, "CcMdlReadComplete\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace( 0, me, " MdlChain = %08lx\n", MdlChain ); + + // + // Deallocate the Mdls + // + + while (MdlChain != NULL) { + + MdlNext = MdlChain->Next; + + DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 ); + DebugTrace( 0, mm, " Mdl = %08lx\n", MdlChain ); + + MmUnlockPages( MdlChain ); + + IoFreeMdl( MdlChain ); + + MdlChain = MdlNext; + } + + DebugTrace(-1, me, "CcMdlReadComplete -> VOID\n", 0 ); +} + + +VOID +CcPrepareMdlWrite ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + OUT PMDL *MdlChain, + OUT PIO_STATUS_BLOCK IoStatus + ) + +/*++ + +Routine Description: + + This routine attempts to lock the specified file data in the cache + and return a description of it in an Mdl along with the correct + I/O status. Pages to be completely overwritten may be satisfied + with emtpy pages. It is *not* safe to call this routine from Dpc level. + + This call is synchronous and raises on error. + + When this call returns, the caller may immediately begin + to transfer data into the buffers via the Mdl. + + When the call returns with TRUE, the pages described by the Mdl are + locked in memory, but not mapped in system space. If the caller + needs the pages mapped in system space, then it must map them. + On the subsequent call to CcMdlWriteComplete the pages will be + unmapped if they were mapped, and in any case unlocked and the Mdl + deallocated. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + MdlChain - On output it returns a pointer to an Mdl chain describing + the desired data. Note that even if FALSE is returned, + one or more Mdls may have been allocated, as may be ascertained + by the IoStatus.Information field (see below). + + IoStatus - Pointer to standard I/O status block to receive the status + for the in-transfer of the data. (STATUS_SUCCESS guaranteed + for cache hits, otherwise the actual I/O status is returned.) + The I/O Information Field indicates how many bytes have been + successfully locked down in the Mdl Chain. + +Return Value: + + None + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVOID CacheBuffer; + LARGE_INTEGER FOffset; + PVACB Vacb; + PMDL Mdl; + PMDL MdlTemp; + LARGE_INTEGER Temp; + ULONG SavedState = 0; + ULONG ZeroFlags = 0; + ULONG Information = 0; + + KIRQL OldIrql; + ULONG ActivePage; + ULONG PageIsDirty; + PVACB ActiveVacb = NULL; + + DebugTrace(+1, me, "CcPrepareMdlWrite\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, + FileOffset->HighPart ); + DebugTrace( 0, me, " Length = %08lx\n", Length ); + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // See if we have an active Vacb, that we need to free. + // + + GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); + + // + // If there is an end of a page to be zeroed, then free that page now, + // so it does not cause our data to get zeroed. If there is an active + // page, free it so we have the correct ValidDataGoal. + // + + if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) { + + CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + FOffset = *FileOffset; + + // + // Put try-finally around the loop to deal with exceptions + // + + try { + + // + // Not all of the transfer will come back at once, so we have to loop + // until the entire transfer is complete. + // + + while (Length != 0) { + + ULONG ReceivedLength; + LARGE_INTEGER BeyondLastByte; + + // + // Calculate how much we could potentially access at this + // FileOffset, then cut it down if it is more than we need. + // + + ReceivedLength = VACB_MAPPING_GRANULARITY - + (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)); + + if (ReceivedLength > Length) { + ReceivedLength = Length; + } + + BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength; + + // + // At this point we can calculate the ZeroFlags. + // + + // + // We can always zero middle pages, if any. + // + + ZeroFlags = ZERO_MIDDLE_PAGES; + + // + // See if we are completely overwriting the first or last page. + // + + if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) && + (ReceivedLength >= PAGE_SIZE)) { + ZeroFlags |= ZERO_FIRST_PAGE; + } + + if ((BeyondLastByte.LowPart & (PAGE_SIZE - 1)) == 0) { + ZeroFlags |= ZERO_LAST_PAGE; + } + + // + // See if the entire transfer is beyond valid data length, + // or at least starting from the second page. + // + + Temp = FOffset; + Temp.LowPart &= ~(PAGE_SIZE -1); + Temp.QuadPart = SharedCacheMap->ValidDataGoal.QuadPart - Temp.QuadPart; + + if (Temp.QuadPart <= 0) { + ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } else if ((Temp.HighPart == 0) && (Temp.LowPart <= PAGE_SIZE)) { + ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; + } + + (VOID)CcMapAndRead( SharedCacheMap, + &FOffset, + ReceivedLength, + ZeroFlags, + TRUE, + &Vacb, + &CacheBuffer ); + + // + // Now attempt to allocate an Mdl to describe the mapped data. + // + + DebugTrace( 0, mm, "IoAllocateMdl:\n", 0 ); + DebugTrace( 0, mm, " BaseAddress = %08lx\n", CacheBuffer ); + DebugTrace( 0, mm, " Length = %08lx\n", ReceivedLength ); + + Mdl = IoAllocateMdl( CacheBuffer, + ReceivedLength, + FALSE, + FALSE, + NULL ); + + DebugTrace( 0, mm, " SharedCacheMap->ValidDataGoal.QuadPart) { + SharedCacheMap->ValidDataGoal = BeyondLastByte; + } + + // + // Unmap the data now, now that the pages are locked down. + // + + CcFreeVirtualAddress( Vacb ); + Vacb = NULL; + + // + // Now link the Mdl into the caller's chain + // + + if ( *MdlChain == NULL ) { + *MdlChain = Mdl; + } else { + MdlTemp = CONTAINING_RECORD( *MdlChain, MDL, Next ); + while (MdlTemp->Next != NULL) { + MdlTemp = MdlTemp->Next; + } + MdlTemp->Next = Mdl; + } + + // + // Assume we did not get all the data we wanted, and set FOffset + // to the end of the returned data. + // + + FOffset = BeyondLastByte; + + // + // Update number of bytes transferred. + // + + Information += ReceivedLength; + + // + // Calculate length left to transfer. + // + + Length -= ReceivedLength; + } + } + finally { + + if (AbnormalTermination()) { + + if (SavedState != 0) { + MmEnablePageFaultClustering(SavedState); + } + + if (Vacb != NULL) { + CcFreeVirtualAddress( Vacb ); + } + + // + // Otherwise loop to deallocate the Mdls + // + + FOffset = *FileOffset; + while (*MdlChain != NULL) { + MdlTemp = (*MdlChain)->Next; + + DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 ); + DebugTrace( 0, mm, " Mdl = %08lx\n", *MdlChain ); + + MmUnlockPages( *MdlChain ); + + // + // Extract the File Offset for this part of the transfer, and + // tell the lazy writer to write these pages, since we have + // marked them dirty. Ignore the only exception (allocation + // error), and console ourselves for having tried. + // + + // + // try-except does not work on MS compiler. We can accept + // leaving a few good pages dirty... + // + // try { + // CcSetDirtyInMask( SharedCacheMap, &FOffset, (*MdlChain)->ByteCount ); + // } except( CcCopyReadExceptionFilter( GetExceptionInformation(), &ExceptionCode ) ) { + // NOTHING; + // } + + FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)((*MdlChain)->ByteCount); + + IoFreeMdl( *MdlChain ); + + *MdlChain = MdlTemp; + } + + DebugTrace(-1, me, "CcPrepareMdlWrite -> Unwinding\n", 0 ); + } + else { + + IoStatus->Status = STATUS_SUCCESS; + IoStatus->Information = Information; + + // + // Make sure the SharedCacheMap does not go away while + // the Mdl write is in progress. We decrment below. + // + + ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); + SharedCacheMap->OpenCount += 1; + ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); + } + } + + DebugTrace( 0, me, " VOID\n", 0 ); + + return; +} + + +// +// First we have the old routine which checks for an entry in the FastIo vector. +// This routine becomes obsolete for every component that compiles with the new +// definition of FsRtlMdlWriteComplete in fsrtl.h. +// + +VOID +CcMdlWriteComplete ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN PMDL MdlChain + ) + +{ + PDEVICE_OBJECT DeviceObject; + PFAST_IO_DISPATCH FastIoDispatch; + + DeviceObject = IoGetRelatedDeviceObject( FileObject ); + FastIoDispatch = DeviceObject->DriverObject->FastIoDispatch; + + if ((FastIoDispatch != NULL) && + (FastIoDispatch->SizeOfFastIoDispatch > FIELD_OFFSET(FAST_IO_DISPATCH, MdlWriteComplete)) && + (FastIoDispatch->MdlWriteComplete != NULL)) { + + FastIoDispatch->MdlWriteComplete( FileObject, FileOffset, MdlChain, DeviceObject ); + + } else { + CcMdlWriteComplete2( FileObject, FileOffset, MdlChain ); + } +} + +VOID +CcMdlWriteComplete2 ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN PMDL MdlChain + ) + +/*++ + +Routine Description: + + This routine must be called at IPL0 after a call to CcPrepareMdlWrite. + The caller supplies the ActualLength of data that it actually wrote + into the buffer, which may be less than or equal to the Length specified + in CcPrepareMdlWrite. + + This call does the following: + + Makes sure the data up to ActualLength eventually gets written. + If WriteThrough is FALSE, the data will not be written immediately. + If WriteThrough is TRUE, then the data is written synchronously. + + Unmaps the pages (if mapped), unlocks them and deletes the MdlChain + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Original file offset read above. + + MdlChain - same as returned from corresponding call to CcPrepareMdlWrite. + +Return Value: + + None + +--*/ + +{ + PMDL MdlNext; + PSHARED_CACHE_MAP SharedCacheMap; + LARGE_INTEGER FOffset; + IO_STATUS_BLOCK IoStatus; + KIRQL OldIrql; + NTSTATUS StatusToRaise = STATUS_SUCCESS; + + DebugTrace(+1, me, "CcMdlWriteComplete\n", 0 ); + DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); + DebugTrace( 0, me, " MdlChain = %08lx\n", MdlChain ); + + SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; + + // + // Deallocate the Mdls + // + + FOffset.QuadPart = *(LONGLONG UNALIGNED *)FileOffset; + while (MdlChain != NULL) { + + MdlNext = MdlChain->Next; + + DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 ); + DebugTrace( 0, mm, " Mdl = %08lx\n", MdlChain ); + + // + // Now clear the dirty bits in the Pte and set them in the + // Pfn. + // + + MmUnlockPages( MdlChain ); + + // + // Extract the File Offset for this part of the transfer. + // + + if (FlagOn(FileObject->Flags, FO_WRITE_THROUGH)) { + + MmFlushSection ( FileObject->SectionObjectPointer, + &FOffset, + MdlChain->ByteCount, + &IoStatus, + TRUE ); + + // + // If we got an I/O error, remember it. + // + + if (!NT_SUCCESS(IoStatus.Status)) { + StatusToRaise = IoStatus.Status; + } + + } else { + + NTSTATUS ExceptionCode; + + // + // Ignore the only exception (allocation error), and console + // ourselves for having tried. + // + + try { + CcSetDirtyInMask( SharedCacheMap, &FOffset, MdlChain->ByteCount ); + } except( CcCopyReadExceptionFilter( GetExceptionInformation(), &ExceptionCode ) ) { + StatusToRaise = STATUS_INSUFFICIENT_RESOURCES; + } + } + + FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)(MdlChain->ByteCount); + + IoFreeMdl( MdlChain ); + + MdlChain = MdlNext; + } + + // + // Now release our open count. + // + + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + + SharedCacheMap->OpenCount -= 1; + + if ((SharedCacheMap->OpenCount == 0) && + !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && + (SharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &SharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // If we got an I/O error, raise it now. + // + + if (!NT_SUCCESS(StatusToRaise)) { + FsRtlNormalizeNtstatus( StatusToRaise, + STATUS_UNEXPECTED_IO_ERROR ); + } + + DebugTrace(-1, me, "CcMdlWriteComplete -> TRUE\n", 0 ); + + return; +} + + + diff --git a/private/ntos/cache/mp/makefile b/private/ntos/cache/mp/makefile new file mode 100644 index 000000000..6ee4f43fa --- /dev/null +++ b/private/ntos/cache/mp/makefile @@ -0,0 +1,6 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the components of NT OS/2 +# +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/private/ntos/cache/mp/sources b/private/ntos/cache/mp/sources new file mode 100644 index 000000000..dbeb18d62 --- /dev/null +++ b/private/ntos/cache/mp/sources @@ -0,0 +1,29 @@ +!IF 0 + +Copyright (c) 1989 Microsoft Corporation + +Module Name: + + sources. + +Abstract: + + This file specifies the target component being built and the list of + sources files needed to build that component. Also specifies optional + compiler switches and libraries that are unique for the component being + built. + + +Author: + + Steve Wood (stevewo) 12-Apr-1990 + +NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl + +!ENDIF + +NT_UP=0 + +TARGETPATH=..\..\mpobj + +!include ..\sources.inc diff --git a/private/ntos/cache/pinsup.c b/private/ntos/cache/pinsup.c new file mode 100644 index 000000000..142806b1a --- /dev/null +++ b/private/ntos/cache/pinsup.c @@ -0,0 +1,1274 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + pinsup.c + +Abstract: + + This module implements the pointer-based Pin support routines for the + Cache subsystem. + +Author: + + Tom Miller [TomM] 4-June-1990 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Define our debug constant +// + +#define me 0x00000008 + +#if LIST_DBG + +#define SetCallersAddress(BCB) { \ + RtlGetCallersAddress( &(BCB)->CallerAddress, \ + &(BCB)->CallersCallerAddress ); \ +} + +#endif + +// +// Internal routines +// + +POBCB +CcAllocateObcb ( + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN PBCB FirstBcb + ); + + +BOOLEAN +CcMapData ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Wait, + OUT PVOID *Bcb, + OUT PVOID *Buffer + ) + +/*++ + +Routine Description: + + This routine attempts to map the specified file data in the cache. + A pointer is returned to the desired data in the cache. + + If the caller does not want to block on this call, then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to supply the requested data without + blocking, then this routine will return FALSE. However, if the + data is immediately accessible in the cache and no blocking is + required, this routine returns TRUE with a pointer to the data. + + Note that a call to this routine with Wait supplied as TRUE is + considerably faster than a call with Wait supplies as FALSE, because + in the Wait TRUE case we only have to make sure the data is mapped + in order to return. + + It is illegal to modify data that is only mapped, and can in fact lead + to serious problems. It is impossible to check for this in all cases, + however CcSetDirtyPinnedData may implement some Assertions to check for + this. If the caller wishes to modify data that it has only mapped, then + it must *first* call CcPinMappedData. + + In any case, the caller MUST subsequently call CcUnpinData. + Naturally if CcPinRead or CcPreparePinWrite were called multiple + times for the same data, CcUnpinData must be called the same number + of times. + + The returned Buffer pointer is valid until the data is unpinned, at + which point it is invalid to use the pointer further. This buffer pointer + will remain valid if CcPinMappedData is called. + + Note that under some circumstances (like Wait supplied as FALSE or more + than a page is requested), this routine may actually pin the data, however + it is not necessary, and in fact not correct, for the caller to be concerned + about this. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + + Bcb - On the first call this returns a pointer to a Bcb + parameter which must be supplied as input on all subsequent + calls, for this buffer + + Buffer - Returns pointer to desired data, valid until the buffer is + unpinned or freed. This pointer will remain valid if CcPinMappedData + is called. + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not delivered + + TRUE - if the data is being delivered + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + LARGE_INTEGER BeyondLastByte; + ULONG ReceivedLength; + ULONG SavedState; + volatile UCHAR ch; + ULONG PageCount = COMPUTE_PAGES_SPANNED(((PVOID)FileOffset->LowPart), Length); + PETHREAD Thread = PsGetCurrentThread(); + + DebugTrace(+1, me, "CcMapData\n", 0 ); + + MmSavePageFaultReadAhead( Thread, &SavedState ); + + // + // Increment performance counters + // + + if (Wait) { + + CcMapDataWait += 1; + + // + // Initialize the indirect pointer to our miss counter. + // + + CcMissCounter = &CcMapDataWaitMiss; + + } else { + CcMapDataNoWait += 1; + } + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + + // + // Call local routine to Map or Access the file data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + + if (Wait) { + + *Buffer = CcGetVirtualAddress( SharedCacheMap, + *FileOffset, + (PVACB *)Bcb, + &ReceivedLength ); + + ASSERT( ReceivedLength >= Length ); + + } else if (!CcPinFileData( FileObject, + FileOffset, + Length, + TRUE, + FALSE, + Wait, + (PBCB *)Bcb, + Buffer, + &BeyondLastByte )) { + + DebugTrace(-1, me, "CcMapData -> FALSE\n", 0 ); + + CcMapDataNoWaitMiss += 1; + + return FALSE; + + } else { + + ASSERT( (BeyondLastByte.QuadPart - FileOffset->QuadPart) >= Length ); + +#if LIST_DBG + { + KIRQL OldIrql; + PBCB BcbTemp = (PBCB)*Bcb; + + ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql ); + + if (BcbTemp->CcBcbLinks.Flink == NULL) { + + InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks ); + CcBcbCount += 1; + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + SetCallersAddress( BcbTemp ); + + } else { + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + } + + } +#endif + + } + + // + // Now let's just sit here and take the miss(es) like a man (and count them). + // + + try { + + // + // Loop to touch each page + // + + BeyondLastByte.LowPart = 0; + + while (PageCount != 0) { + + MmSetPageFaultReadAhead( Thread, PageCount - 1 ); + + ch = *((volatile UCHAR *)(*Buffer) + BeyondLastByte.LowPart); + + BeyondLastByte.LowPart += PAGE_SIZE; + PageCount -= 1; + } + + } finally { + + MmResetPageFaultReadAhead( Thread, SavedState ); + + if (AbnormalTermination() && (*Bcb != NULL)) { + CcUnpinFileData( (PBCB)*Bcb, TRUE, UNPIN ); + *Bcb = NULL; + } + } + + CcMissCounter = &CcThrowAway; + + // + // Increment the pointer as a reminder that it is read only. + // + + *(PCHAR *)Bcb += 1; + + DebugTrace(-1, me, "CcMapData -> TRUE\n", 0 ); + + return TRUE; +} + + +BOOLEAN +CcPinMappedData ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Wait, + IN OUT PVOID *Bcb + ) + +/*++ + +Routine Description: + + This routine attempts to pin data that was previously only mapped. + If the routine determines that in fact it was necessary to actually + pin the data when CcMapData was called, then this routine does not + have to do anything. + + If the caller does not want to block on this call, then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to supply the requested data without + blocking, then this routine will return FALSE. However, if the + data is immediately accessible in the cache and no blocking is + required, this routine returns TRUE with a pointer to the data. + + If the data is not returned in the first call, the caller + may request the data later with Wait = TRUE. It is not required + that the caller request the data later. + + If the caller subsequently modifies the data, it should call + CcSetDirtyPinnedData. + + In any case, the caller MUST subsequently call CcUnpinData. + Naturally if CcPinRead or CcPreparePinWrite were called multiple + times for the same data, CcUnpinData must be called the same number + of times. + + Note there are no performance counters in this routine, as the misses + will almost always occur on the map above, and there will seldom be a + miss on this conversion. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + + Bcb - On the first call this returns a pointer to a Bcb + parameter which must be supplied as input on all subsequent + calls, for this buffer + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not delivered + + TRUE - if the data is being delivered + +--*/ + +{ + PVOID Buffer; + LARGE_INTEGER BeyondLastByte; + PSHARED_CACHE_MAP SharedCacheMap; + LARGE_INTEGER LocalFileOffset = *FileOffset; + POBCB MyBcb = NULL; + PBCB *CurrentBcbPtr = (PBCB *)&MyBcb; + BOOLEAN Result = FALSE; + + DebugTrace(+1, me, "CcPinMappedData\n", 0 ); + + // + // If the Bcb is no longer ReadOnly, then just return. + // + + if ((*(PULONG)Bcb & 1) == 0) { + return TRUE; + } + + // + // Remove the Read Only flag + // + + *(PCHAR *)Bcb -= 1; + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + + // + // We only count the calls to this routine, since they are almost guaranteed + // to be hits. + // + + CcPinMappedDataCount += 1; + + // + // Guarantee we will put the flag back if required. + // + + try { + + if (((PBCB)*Bcb)->NodeTypeCode != CACHE_NTC_BCB) { + + // + // Form loop to handle occassional overlapped Bcb case. + // + + do { + + // + // If we have already been through the loop, then adjust + // our file offset and length from the last time. + // + + if (MyBcb != NULL) { + + // + // If this is the second time through the loop, then it is time + // to handle the overlap case and allocate an OBCB. + // + + if (CurrentBcbPtr == (PBCB *)&MyBcb) { + + MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb ); + + // + // Set CurrentBcbPtr to point at the first entry in + // the vector (which is already filled in), before + // advancing it below. + // + + CurrentBcbPtr = &MyBcb->Bcbs[0]; + } + + Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart); + LocalFileOffset.QuadPart = BeyondLastByte.QuadPart; + CurrentBcbPtr += 1; + } + + // + // Call local routine to Map or Access the file data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + + if (!CcPinFileData( FileObject, + &LocalFileOffset, + Length, + (BOOLEAN)!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED), + FALSE, + Wait, + CurrentBcbPtr, + &Buffer, + &BeyondLastByte )) { + + try_return( Result = FALSE ); + } + + // + // Continue looping if we did not get everything. + // + + } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length); + + // + // Free the Vacb before going on. + // + + CcFreeVirtualAddress( (PVACB)*Bcb ); + + *Bcb = MyBcb; + + // + // Debug routines used to insert and remove Bcbs from the global list + // + +#if LIST_DBG + { + KIRQL OldIrql; + PBCB BcbTemp = (PBCB)*Bcb; + + ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql ); + + if (BcbTemp->CcBcbLinks.Flink == NULL) { + + InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks ); + CcBcbCount += 1; + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + SetCallersAddress( BcbTemp ); + + } else { + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + } + + } +#endif + } + + // + // If he really has a Bcb, all we have to do is acquire it shared since he is + // no longer ReadOnly. + // + + else { + + if (!ExAcquireSharedStarveExclusive( &((PBCB)*Bcb)->Resource, Wait )) { + + try_return( Result = FALSE ); + } + } + + Result = TRUE; + + try_exit: NOTHING; + } + finally { + + if (!Result) { + + // + // Put the Read Only flag back + // + + *(PCHAR *)Bcb += 1; + + // + // We may have gotten partway through + // + + if (MyBcb != NULL) { + CcUnpinData( MyBcb ); + } + } + + DebugTrace(-1, me, "CcPinMappedData -> %02lx\n", Result ); + } + return Result; +} + + +BOOLEAN +CcPinRead ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Wait, + OUT PVOID *Bcb, + OUT PVOID *Buffer + ) + +/*++ + +Routine Description: + + This routine attempts to pin the specified file data in the cache. + A pointer is returned to the desired data in the cache. This routine + is intended for File System support and is not intended to be called + from Dpc level. + + If the caller does not want to block on this call, then + Wait should be supplied as FALSE. If Wait was supplied as FALSE and + it is currently impossible to supply the requested data without + blocking, then this routine will return FALSE. However, if the + data is immediately accessible in the cache and no blocking is + required, this routine returns TRUE with a pointer to the data. + + If the data is not returned in the first call, the caller + may request the data later with Wait = TRUE. It is not required + that the caller request the data later. + + If the caller subsequently modifies the data, it should call + CcSetDirtyPinnedData. + + In any case, the caller MUST subsequently call CcUnpinData. + Naturally if CcPinRead or CcPreparePinWrite were called multiple + times for the same data, CcUnpinData must be called the same number + of times. + + The returned Buffer pointer is valid until the data is unpinned, at + which point it is invalid to use the pointer further. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + Wait - Supplies TRUE if it is ok to block the caller's thread + Supplies 3 if it is ok to block the caller's thread and the Bcb should + be exclusive + Supplies FALSE if it is not ok to block the caller's thread + + Bcb - On the first call this returns a pointer to a Bcb + parameter which must be supplied as input on all subsequent + calls, for this buffer + + Buffer - Returns pointer to desired data, valid until the buffer is + unpinned or freed. + +Return Value: + + FALSE - if Wait was supplied as FALSE and the data was not delivered + + TRUE - if the data is being delivered + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVOID LocalBuffer; + LARGE_INTEGER BeyondLastByte; + LARGE_INTEGER LocalFileOffset = *FileOffset; + POBCB MyBcb = NULL; + PBCB *CurrentBcbPtr = (PBCB *)&MyBcb; + BOOLEAN Result = FALSE; + + DebugTrace(+1, me, "CcPinRead\n", 0 ); + + // + // Increment performance counters + // + + if (Wait) { + + CcPinReadWait += 1; + + // + // Initialize the indirect pointer to our miss counter. + // + + CcMissCounter = &CcPinReadWaitMiss; + + } else { + CcPinReadNoWait += 1; + } + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + + try { + + // + // Form loop to handle occassional overlapped Bcb case. + // + + do { + + // + // If we have already been through the loop, then adjust + // our file offset and length from the last time. + // + + if (MyBcb != NULL) { + + // + // If this is the second time through the loop, then it is time + // to handle the overlap case and allocate an OBCB. + // + + if (CurrentBcbPtr == (PBCB *)&MyBcb) { + + MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb ); + + // + // Set CurrentBcbPtr to point at the first entry in + // the vector (which is already filled in), before + // advancing it below. + // + + CurrentBcbPtr = &MyBcb->Bcbs[0]; + + // + // Also on second time through, return starting Buffer + // + + *Buffer = LocalBuffer; + } + + Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart); + LocalFileOffset.QuadPart = BeyondLastByte.QuadPart; + CurrentBcbPtr += 1; + } + + // + // Call local routine to Map or Access the file data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + + if (!CcPinFileData( FileObject, + &LocalFileOffset, + Length, + (BOOLEAN)!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED), + FALSE, + Wait, + CurrentBcbPtr, + &LocalBuffer, + &BeyondLastByte )) { + + CcPinReadNoWaitMiss += 1; + + try_return( Result = FALSE ); + } + + // + // Continue looping if we did not get everything. + // + + } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length); + + *Bcb = MyBcb; + + // + // Debug routines used to insert and remove Bcbs from the global list + // + +#if LIST_DBG + + { + KIRQL OldIrql; + PBCB BcbTemp = (PBCB)*Bcb; + + ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql ); + + if (BcbTemp->CcBcbLinks.Flink == NULL) { + + InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks ); + CcBcbCount += 1; + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + SetCallersAddress( BcbTemp ); + + } else { + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + } + + } + +#endif + + // + // In the normal (nonoverlapping) case we return the + // correct buffer address here. + // + + if (CurrentBcbPtr == (PBCB *)&MyBcb) { + *Buffer = LocalBuffer; + } + + Result = TRUE; + + try_exit: NOTHING; + } + finally { + + CcMissCounter = &CcThrowAway; + + if (!Result) { + + // + // We may have gotten partway through + // + + if (MyBcb != NULL) { + CcUnpinData( MyBcb ); + } + } + + DebugTrace(-1, me, "CcPinRead -> %02lx\n", Result ); + } + + return Result; +} + + +BOOLEAN +CcPreparePinWrite ( + IN PFILE_OBJECT FileObject, + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN BOOLEAN Zero, + IN BOOLEAN Wait, + OUT PVOID *Bcb, + OUT PVOID *Buffer + ) + +/*++ + +Routine Description: + + This routine attempts to lock the specified file data in the cache + and return a pointer to it along with the correct + I/O status. Pages to be completely overwritten may be satisfied + with emtpy pages. + + If not all of the pages can be prepared, and Wait was supplied as + FALSE, then this routine will return FALSE, and its outputs will + be meaningless. The caller may request the data later with + Wait = TRUE. However, it is not required that the caller request + the data later. + + If Wait is supplied as TRUE, and all of the pages can be prepared + without blocking, this call will return TRUE immediately. Otherwise, + this call will block until all of the pages can be prepared, and + then return TRUE. + + When this call returns with TRUE, the caller may immediately begin + to transfer data into the buffers via the Buffer pointer. The + buffer will already be marked dirty. + + The caller MUST subsequently call CcUnpinData. + Naturally if CcPinRead or CcPreparePinWrite were called multiple + times for the same data, CcUnpinData must be called the same number + of times. + + The returned Buffer pointer is valid until the data is unpinned, at + which point it is invalid to use the pointer further. + +Arguments: + + FileObject - Pointer to the file object for a file which was + opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for + which CcInitializeCacheMap was called by the file system. + + FileOffset - Byte offset in file for desired data. + + Length - Length of desired data in bytes. + + Zero - If supplied as TRUE, the buffer will be zeroed on return. + + Wait - FALSE if caller may not block, TRUE otherwise (see description + above) + + Bcb - This returns a pointer to a Bcb parameter which must be + supplied as input to CcPinWriteComplete. + + Buffer - Returns pointer to desired data, valid until the buffer is + unpinned or freed. + +Return Value: + + FALSE - if Wait was supplied as FALSE and the pages were not delivered + + TRUE - if the pages are being delivered + +--*/ + +{ + PSHARED_CACHE_MAP SharedCacheMap; + PVOID LocalBuffer; + LARGE_INTEGER BeyondLastByte; + LARGE_INTEGER LocalFileOffset = *FileOffset; + POBCB MyBcb = NULL; + PBCB *CurrentBcbPtr = (PBCB *)&MyBcb; + ULONG OriginalLength = Length; + BOOLEAN Result = FALSE; + + DebugTrace(+1, me, "CcPreparePinWrite\n", 0 ); + + // + // Get pointer to SharedCacheMap. + // + + SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + + sizeof(PVOID)); + + try { + + // + // Form loop to handle occassional overlapped Bcb case. + // + + do { + + // + // If we have already been through the loop, then adjust + // our file offset and length from the last time. + // + + if (MyBcb != NULL) { + + // + // If this is the second time through the loop, then it is time + // to handle the overlap case and allocate an OBCB. + // + + if (CurrentBcbPtr == (PBCB *)&MyBcb) { + + MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb ); + + // + // Set CurrentBcbPtr to point at the first entry in + // the vector (which is already filled in), before + // advancing it below. + // + + CurrentBcbPtr = &MyBcb->Bcbs[0]; + + // + // Also on second time through, return starting Buffer + // + + *Buffer = LocalBuffer; + } + + Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart); + LocalFileOffset.QuadPart = BeyondLastByte.QuadPart; + CurrentBcbPtr += 1; + } + + // + // Call local routine to Map or Access the file data. If we cannot map + // the data because of a Wait condition, return FALSE. + // + + if (!CcPinFileData( FileObject, + &LocalFileOffset, + Length, + FALSE, + TRUE, + Wait, + CurrentBcbPtr, + &LocalBuffer, + &BeyondLastByte )) { + + try_return( Result = FALSE ); + } + + // + // Continue looping if we did not get everything. + // + + } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length); + + *Bcb = MyBcb; + + // + // Debug routines used to insert and remove Bcbs from the global list + // + +#if LIST_DBG + + { + KIRQL OldIrql; + PBCB BcbTemp = (PBCB)*Bcb; + + ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql ); + + if (BcbTemp->CcBcbLinks.Flink == NULL) { + + InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks ); + CcBcbCount += 1; + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + SetCallersAddress( BcbTemp ); + + } else { + ExReleaseSpinLock( &CcBcbSpinLock, OldIrql ); + } + + } + +#endif + + // + // In the normal (nonoverlapping) case we return the + // correct buffer address here. + // + + if (CurrentBcbPtr == (PBCB *)&MyBcb) { + *Buffer = LocalBuffer; + } + + if (Zero) { + RtlZeroMemory( *Buffer, OriginalLength ); + } + + CcSetDirtyPinnedData( MyBcb, NULL ); + + Result = TRUE; + + try_exit: NOTHING; + } + finally { + + CcMissCounter = &CcThrowAway; + + if (!Result) { + + // + // We may have gotten partway through + // + + if (MyBcb != NULL) { + CcUnpinData( MyBcb ); + } + } + + DebugTrace(-1, me, "CcPreparePinWrite -> %02lx\n", Result ); + } + + return Result; +} + + +VOID +CcUnpinData ( + IN PVOID Bcb + ) + +/*++ + +Routine Description: + + This routine must be called at IPL0, some time after calling CcPinRead + or CcPreparePinWrite. It performs any cleanup that is necessary. + +Arguments: + + Bcb - Bcb parameter returned from the last call to CcPinRead. + +Return Value: + + None. + +--*/ + +{ + DebugTrace(+1, me, "CcUnpinData:\n", 0 ); + DebugTrace( 0, me, " >Bcb = %08lx\n", Bcb ); + + // + // Test for ReadOnly and unpin accordingly. + // + + if (((ULONG)Bcb & 1) != 0) { + + // + // Remove the Read Only flag + // + + (PCHAR)Bcb -= 1; + + CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN ); + + } else { + + // + // Handle the overlapped Bcb case. + // + + if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) { + + PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0]; + + // + // Loop to free all Bcbs with recursive calls + // (rather than dealing with RO for this uncommon case). + // + + while (*BcbPtrPtr != NULL) { + CcUnpinData(*(BcbPtrPtr++)); + } + + // + // Then free the pool for the Obcb + // + + ExFreePool( Bcb ); + + // + // Otherwise, it is a normal Bcb + // + + } else { + CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN ); + } + } + + DebugTrace(-1, me, "CcUnPinData -> VOID\n", 0 ); +} + + +VOID +CcSetBcbOwnerPointer ( + IN PVOID Bcb, + IN PVOID OwnerPointer + ) + +/*++ + +Routine Description: + + This routine may be called to set the resource owner for the Bcb resource, + for cases where another thread will do the unpin *and* the current thread + may exit. + +Arguments: + + Bcb - Bcb parameter returned from the last call to CcPinRead. + + OwnerPointer - A valid resource owner pointer, which means a pointer to + an allocated system address, with the low-order two bits + set. The address may not be deallocated until after the + unpin call. + +Return Value: + + None. + +--*/ + +{ + ASSERT(((ULONG)Bcb & 1) == 0); + + // + // Handle the overlapped Bcb case. + // + + if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) { + + PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0]; + + // + // Loop to set owner for all Bcbs. + // + + while (*BcbPtrPtr != NULL) { + ExSetResourceOwnerPointer( &(*BcbPtrPtr)->Resource, OwnerPointer ); + BcbPtrPtr++; + } + + // + // Otherwise, it is a normal Bcb + // + + } else { + + // + // Handle normal case. + // + + ExSetResourceOwnerPointer( &((PBCB)Bcb)->Resource, OwnerPointer ); + } +} + + +VOID +CcUnpinDataForThread ( + IN PVOID Bcb, + IN ERESOURCE_THREAD ResourceThreadId + ) + +/*++ + +Routine Description: + + This routine must be called at IPL0, some time after calling CcPinRead + or CcPreparePinWrite. It performs any cleanup that is necessary, + releasing the Bcb resource for the given thread. + +Arguments: + + Bcb - Bcb parameter returned from the last call to CcPinRead. + +Return Value: + + None. + +--*/ + +{ + DebugTrace(+1, me, "CcUnpinDataForThread:\n", 0 ); + DebugTrace( 0, me, " >Bcb = %08lx\n", Bcb ); + DebugTrace( 0, me, " >ResoureceThreadId = %08lx\n", ResoureceThreadId ); + + // + // Test for ReadOnly and unpin accordingly. + // + + if (((ULONG)Bcb & 1) != 0) { + + // + // Remove the Read Only flag + // + + (PCHAR)Bcb -= 1; + + CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN ); + + } else { + + // + // Handle the overlapped Bcb case. + // + + if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) { + + PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0]; + + // + // Loop to free all Bcbs with recursive calls + // (rather than dealing with RO for this uncommon case). + // + + while (*BcbPtrPtr != NULL) { + CcUnpinDataForThread( *(BcbPtrPtr++), ResourceThreadId ); + } + + // + // Then free the pool for the Obcb + // + + ExFreePool( Bcb ); + + // + // Otherwise, it is a normal Bcb + // + + } else { + + // + // If not readonly, we can release the resource for the thread first, + // and then call CcUnpinFileData. Release resource first in case + // Bcb gets deallocated. + // + + ExReleaseResourceForThread( &((PBCB)Bcb)->Resource, ResourceThreadId ); + CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN ); + } + } + DebugTrace(-1, me, "CcUnpinDataForThread -> VOID\n", 0 ); +} + + +POBCB +CcAllocateObcb ( + IN PLARGE_INTEGER FileOffset, + IN ULONG Length, + IN PBCB FirstBcb + ) + +/*++ + +Routine Description: + + This routine is called by the various pinning routines to allocate and + initialize an overlap Bcb. + +Arguments: + + FileOffset - Starting file offset for the Obcb (An Obcb starts with a + public structure, which someone could use) + + Length - Length of the range covered by the Obcb + + FirstBcb - First Bcb already created, which only covers the start of + the desired range (low order bit may be set to indicate ReadOnly) + +Return Value: + + Pointer to the allocated Obcb + +--*/ + +{ + ULONG LengthToAllocate; + POBCB Obcb; + + // + // Allocate according to the worst case, assuming that we + // will need as many additional Bcbs as there are pages + // remaining. (One Bcb pointer is already in OBCB.) Also + // throw in one more pointer to guarantee users of the OBCB + // can always terminate on NULL. + // + + LengthToAllocate = sizeof(OBCB) + + (((Length - ((PBCB)((ULONG)FirstBcb & ~1))->ByteLength + + (2 * PAGE_SIZE) - 1) / PAGE_SIZE) * sizeof(PBCB)); + + Obcb = FsRtlAllocatePool( NonPagedPool, LengthToAllocate ); + RtlZeroMemory( Obcb, LengthToAllocate ); + Obcb->NodeTypeCode = CACHE_NTC_OBCB; + Obcb->NodeByteSize = (USHORT)LengthToAllocate; + Obcb->ByteLength = Length; + Obcb->FileOffset = *FileOffset; + Obcb->Bcbs[0] = FirstBcb; + + return Obcb; +} diff --git a/private/ntos/cache/sources.inc b/private/ntos/cache/sources.inc new file mode 100644 index 000000000..8e6e120b6 --- /dev/null +++ b/private/ntos/cache/sources.inc @@ -0,0 +1,53 @@ +!IF 0 + +Copyright (c) 1989 Microsoft Corporation + +Module Name: + + sources. + +Abstract: + + This file specifies the target component being built and the list of + sources files needed to build that component. Also specifies optional + compiler switches and libraries that are unique for the component being + built. + + +Author: + + Steve Wood (stevewo) 12-Apr-1990 + +NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl + +!ENDIF + +MAJORCOMP=ntos +MINORCOMP=cache + +TARGETNAME=cache +TARGETTYPE=LIBRARY + +INCLUDES=..;..\..\inc +MIPS_OPTIONS=-nodwalign +GPSIZE=32 + +MSC_WARNING_LEVEL=/W3 /WX + +C_DEFINES=$(C_DEFINES) -D_NTSYSTEM_ + +SOURCES=..\cachedat.c \ + ..\cachesub.c \ + ..\copysup.c \ + ..\fssup.c \ + ..\lazyrite.c \ + ..\logsup.c \ + ..\mdlsup.c \ + ..\pinsup.c \ + ..\vacbsup.c + +PRECOMPILED_INCLUDE=..\cc.h +PRECOMPILED_PCH=cc.pch +PRECOMPILED_OBJ=cc.obj + +SOURCES_USED=..\sources.inc diff --git a/private/ntos/cache/up/makefile b/private/ntos/cache/up/makefile new file mode 100644 index 000000000..6ee4f43fa --- /dev/null +++ b/private/ntos/cache/up/makefile @@ -0,0 +1,6 @@ +# +# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source +# file to this component. This file merely indirects to the real make file +# that is shared by all the components of NT OS/2 +# +!INCLUDE $(NTMAKEENV)\makefile.def diff --git a/private/ntos/cache/up/sources b/private/ntos/cache/up/sources new file mode 100644 index 000000000..6dca9c583 --- /dev/null +++ b/private/ntos/cache/up/sources @@ -0,0 +1,27 @@ +!IF 0 + +Copyright (c) 1989 Microsoft Corporation + +Module Name: + + sources. + +Abstract: + + This file specifies the target component being built and the list of + sources files needed to build that component. Also specifies optional + compiler switches and libraries that are unique for the component being + built. + + +Author: + + Steve Wood (stevewo) 12-Apr-1990 + +NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl + +!ENDIF + +TARGETPATH=..\..\obj + +!include ..\sources.inc diff --git a/private/ntos/cache/vacbsup.c b/private/ntos/cache/vacbsup.c new file mode 100644 index 000000000..d1e0e09f9 --- /dev/null +++ b/private/ntos/cache/vacbsup.c @@ -0,0 +1,1421 @@ +/*++ + +Copyright (c) 1990 Microsoft Corporation + +Module Name: + + vacbsup.c + +Abstract: + + This module implements the support routines for the Virtual Address + Control Block support for the Cache Manager. These routines are used + to manage a large number of relatively small address windows to map + file data for all forms of cache access. + +Author: + + Tom Miller [TomM] 8-Feb-1992 + +Revision History: + +--*/ + +#include "cc.h" + +// +// Define our debug constant +// + +#define me 0x000000040 + +// +// Define a few macros for manipulating the Vacb array. +// + +#define GetVacb(SCM,OFF) ( \ + ((OFF).HighPart != 0) ? \ + (SCM)->Vacbs[(ULONG)((ULONGLONG)((OFF).QuadPart) >> VACB_OFFSET_SHIFT)] : \ + (SCM)->Vacbs[(OFF).LowPart >> VACB_OFFSET_SHIFT] \ +) + +#define SetVacb(SCM,OFF,VACB) { \ + ASSERT((OFF).HighPart < VACB_MAPPING_GRANULARITY); \ + if ((OFF).HighPart != 0) { \ + (SCM)->Vacbs[(ULONG)((ULONGLONG)((OFF).QuadPart) >> VACB_OFFSET_SHIFT)] = (VACB); \ + } else {(SCM)->Vacbs[(OFF).LowPart >> VACB_OFFSET_SHIFT] = (VACB);} \ +} + +#define SizeOfVacbArray(LSZ) ( \ + ((LSZ).HighPart != 0) ? \ + ((ULONG)((ULONGLONG)((LSZ).QuadPart) >> VACB_OFFSET_SHIFT) * sizeof(PVACB)) : \ + (LSZ).LowPart > (PREALLOCATED_VACBS * VACB_MAPPING_GRANULARITY) ? \ + (((LSZ).LowPart >> VACB_OFFSET_SHIFT) * sizeof(PVACB)) : \ + (PREALLOCATED_VACBS * sizeof(PVACB)) \ +) + +#define CheckedDec(N) { \ + ASSERT((N) != 0); \ + (N) -= 1; \ +} + +// +// Internal Support Routines. +// + +VOID +CcUnmapVacb ( + IN PVACB Vacb, + IN PSHARED_CACHE_MAP SharedCacheMap + ); + +PVACB +CcGetVacbMiss ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER FileOffset, + IN OUT PKIRQL OldIrql + ); + +#ifdef ALLOC_PRAGMA +#pragma alloc_text(INIT, CcInitializeVacbs) +#endif + + +VOID +CcInitializeVacbs( +) + +/*++ + +Routine Description: + + This routine must be called during Cache Manager initialization to + initialize the Virtual Address Control Block structures. + +Arguments: + + None. + +Return Value: + + None. + +--*/ + +{ + ULONG VacbBytes; + + CcNumberVacbs = (MmSizeOfSystemCacheInPages >> (VACB_OFFSET_SHIFT - PAGE_SHIFT)) - 2; + VacbBytes = CcNumberVacbs * sizeof(VACB); + + KeInitializeSpinLock( &CcVacbSpinLock ); + CcNextVictimVacb = + CcVacbs = (PVACB)FsRtlAllocatePool( NonPagedPool, VacbBytes ); + CcBeyondVacbs = (PVACB)((PCHAR)CcVacbs + VacbBytes); + RtlZeroMemory( CcVacbs, VacbBytes ); +} + + +PVOID +CcGetVirtualAddressIfMapped ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LONGLONG FileOffset, + OUT PVACB *Vacb, + OUT PULONG ReceivedLength + ) + +/*++ + +Routine Description: + + This routine returns a virtual address for the specified FileOffset, + iff it is mapped. Otherwise, it informs the caller that the specified + virtual address was not mapped. In the latter case, it still returns + a ReceivedLength, which may be used to advance to the next view boundary. + +Arguments: + + SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file. + + FileOffset - Supplies the desired FileOffset within the file. + + Vach - Returns a Vacb pointer which must be supplied later to free + this virtual address, or NULL if not mapped. + + ReceivedLength - Returns the number of bytes to the next view boundary, + whether the desired file offset is mapped or not. + +Return Value: + + The virtual address at which the desired data is mapped, or NULL if it + is not mapped. + +--*/ + +{ + KIRQL OldIrql; + ULONG VacbOffset = (ULONG)FileOffset & (VACB_MAPPING_GRANULARITY - 1); + PVOID Value = NULL; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + // + // Generate ReceivedLength return right away. + // + + *ReceivedLength = VACB_MAPPING_GRANULARITY - VacbOffset; + + // + // Acquire the Vacb lock to see if the desired offset is already mapped. + // + + ExAcquireFastLock( &CcVacbSpinLock, &OldIrql ); + + ASSERT( FileOffset <= SharedCacheMap->SectionSize.QuadPart ); + + if ((*Vacb = GetVacb( SharedCacheMap, *(PLARGE_INTEGER)&FileOffset )) != NULL) { + + if ((*Vacb)->Overlay.ActiveCount == 0) { + SharedCacheMap->VacbActiveCount += 1; + } + + (*Vacb)->Overlay.ActiveCount += 1; + + + Value = (PVOID)((PCHAR)(*Vacb)->BaseAddress + VacbOffset); + } + + ExReleaseFastLock( &CcVacbSpinLock, OldIrql ); + return Value; +} + + +PVOID +CcGetVirtualAddress ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER FileOffset, + OUT PVACB *Vacb, + OUT PULONG ReceivedLength + ) + +/*++ + +Routine Description: + + This is the main routine for Vacb management. It may be called to acquire + a virtual address for a given file offset. If the desired file offset is + already mapped, this routine does very little work before returning with + the desired virtual address and Vacb pointer (which must be supplied to + free the mapping). + + If the desired virtual address is not currently mapped, then this routine + claims a Vacb from the tail of the Vacb LRU to reuse its mapping. This Vacb + is then unmapped if necessary (normally not required), and mapped to the + desired address. + +Arguments: + + SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file. + + FileOffset - Supplies the desired FileOffset within the file. + + Vacb - Returns a Vacb pointer which must be supplied later to free + this virtual address. + + ReceivedLength - Returns the number of bytes which are contiguously + mapped starting at the virtual address returned. + +Return Value: + + The virtual address at which the desired data is mapped. + +--*/ + +{ + KIRQL OldIrql; + PVACB TempVacb; + ULONG VacbOffset = FileOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1); + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + // + // Acquire the Vacb lock to see if the desired offset is already mapped. + // + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + + ASSERT( FileOffset.QuadPart <= SharedCacheMap->SectionSize.QuadPart ); + + if ((TempVacb = GetVacb( SharedCacheMap, FileOffset )) == NULL) { + + TempVacb = CcGetVacbMiss( SharedCacheMap, FileOffset, &OldIrql ); + + } else { + + if (TempVacb->Overlay.ActiveCount == 0) { + SharedCacheMap->VacbActiveCount += 1; + } + + TempVacb->Overlay.ActiveCount += 1; + } + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + + // + // Now form all outputs. + // + + *Vacb = TempVacb; + *ReceivedLength = VACB_MAPPING_GRANULARITY - VacbOffset; + + ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL); + + return (PVOID)((PCHAR)TempVacb->BaseAddress + VacbOffset); +} + + +PVACB +CcGetVacbMiss ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER FileOffset, + IN OUT PKIRQL OldIrql + ) + +/*++ + +Routine Description: + + This is the main routine for Vacb management. It may be called to acquire + a virtual address for a given file offset. If the desired file offset is + already mapped, this routine does very little work before returning with + the desired virtual address and Vacb pointer (which must be supplied to + free the mapping). + + If the desired virtual address is not currently mapped, then this routine + claims a Vacb from the tail of the Vacb LRU to reuse its mapping. This Vacb + is then unmapped if necessary (normally not required), and mapped to the + desired address. + +Arguments: + + SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file. + + FileOffset - Supplies the desired FileOffset within the file. + + OldIrql - Pointer to the OldIrql variable in the caller + +Return Value: + + The Vacb. + +--*/ + +{ + PSHARED_CACHE_MAP OldSharedCacheMap; + PVACB Vacb, TempVacb; + LARGE_INTEGER MappedLength; + LARGE_INTEGER NormalOffset; + NTSTATUS Status; + ULONG ActivePage; + ULONG PageIsDirty; + PVACB ActiveVacb = NULL; + BOOLEAN MasterAcquired = FALSE; + ULONG VacbOffset = FileOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1); + + NormalOffset = FileOffset; + NormalOffset.LowPart -= VacbOffset; + + // + // For Sequential only files, we periodically unmap unused views + // behind us as we go, to keep from hogging memory. + // + + if (FlagOn(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN) && + ((NormalOffset.LowPart & (SEQUENTIAL_ONLY_MAP_LIMIT - 1)) == 0) && + (NormalOffset.QuadPart >= (SEQUENTIAL_ONLY_MAP_LIMIT * 2))) { + + // + // Use MappedLength as a scratch variable to form the offset + // to start unmapping. We are not synchronized with these past + // views, so it is possible that CcUnmapVacbArray will kick out + // early when it sees an active view. That is why we go back + // twice the distance, and effectively try to unmap everything + // twice. The second time should normally do it. If the file + // is truly sequential only, then the only collision expected + // might be the previous view if we are being called from readahead, + // or there is a small chance that we can collide with the + // Lazy Writer during the small window where he briefly maps + // the file to push out the dirty bits. + // + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + MappedLength.QuadPart = NormalOffset.QuadPart - (SEQUENTIAL_ONLY_MAP_LIMIT * 2); + CcUnmapVacbArray( SharedCacheMap, &MappedLength, (SEQUENTIAL_ONLY_MAP_LIMIT * 2) ); + ExAcquireSpinLock( &CcVacbSpinLock, OldIrql ); + } + + // + // Scan from the next victim for a free Vacb + // + + Vacb = CcNextVictimVacb; + + while (TRUE) { + + // + // Handle the wrap case + // + + if (Vacb == CcBeyondVacbs) { + Vacb = CcVacbs; + } + + // + // If this guy is not active, break out and use him. Also, if + // it is an Active Vacb, nuke it now, because the reader may be idle and we + // want to clean up. + // + + OldSharedCacheMap = Vacb->SharedCacheMap; + if ((Vacb->Overlay.ActiveCount == 0) || + ((ActiveVacb == NULL) && + (OldSharedCacheMap != NULL) && + (OldSharedCacheMap->ActiveVacb == Vacb))) { + + // + // The normal case is that the Vacb is no longer mapped + // and we can just get out and use it. + // + + if (Vacb->BaseAddress == NULL) { + break; + } + + // + // Else the Vacb is mapped. If we haven't done so + // already, we have to bias the open count so the + // SharedCacheMap (and its section reference) do not + // get away before we complete the unmap. Unfortunately + // we have to free the Vacb lock first to obey our locking + // order. + // + + if (!MasterAcquired) { + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + ExAcquireSpinLock( &CcMasterSpinLock, OldIrql ); + ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock ); + MasterAcquired = TRUE; + + // + // Reset the next victim on this rare path to allow our guy + // to scan the entire list again. Since we terminate the scan + // when we see we have incremented into this guy, we have cannot + // leave it on the first Vacb! In this case we will terminate + // at CcBeyondVacbs. Third time should be the charm on this fix! + // + + CcNextVictimVacb = Vacb; + if (CcNextVictimVacb == CcVacbs) { + CcNextVictimVacb = CcBeyondVacbs; + } + } + + // + // If this Vacb went active while we had the spin lock + // dropped, then we have to start a new scan! At least + // now we have both locks so that this cannot happen again. + // + + if (Vacb->Overlay.ActiveCount != 0) { + + // + // Most likely we are here to free an Active Vacb from copy + // read. Rather than repeat all the tests from above, we will + // just try to get the active Vacb if we haven't already got + // one. + // + + if ((ActiveVacb == NULL) && (Vacb->SharedCacheMap != NULL)) { + + // + // Get the active Vacb. + // + + GetActiveVacbAtDpcLevel( Vacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // Otherwise we will break out and use this Vacb. If it + // is still mapped we can now safely increment the open + // count. + // + + } else { + + if (Vacb->BaseAddress != NULL) { + + // + // Note that if the SharedCacheMap is currently + // being deleted, we need to skip over + // it, otherwise we will become the second + // deleter. CcDeleteSharedCacheMap clears the + // pointer in the SectionObjectPointer. + // + + if (Vacb->SharedCacheMap->FileObject->SectionObjectPointer->SharedCacheMap == + Vacb->SharedCacheMap) { + + Vacb->SharedCacheMap->OpenCount += 1; + break; + } + + } else { + + break; + } + } + } + + // + // Advance to the next guy and see if we have scanned + // the entire list. + // + + Vacb += 1; + + if (Vacb == CcNextVictimVacb) { + + // + // Release the spinlock(s) acquired above. + // + + if (MasterAcquired) { + + ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock ); + ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql ); + + } else { + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + } + + // + // If we found an active vacb, then free it and go back and + // try again. Else it's time to bail. + // + + if (ActiveVacb != NULL) { + CcFreeActiveVacb( ActiveVacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + ActiveVacb = NULL; + + // + // Reacquire spinlocks to loop back + // + + ExAcquireSpinLock( &CcMasterSpinLock, OldIrql ); + ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock ); + MasterAcquired = TRUE; + + } else { + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + } + } + + CcNextVictimVacb = Vacb + 1; + + // + // Unlink it from the other SharedCacheMap, so the other + // guy will not try to use it when we free the spin lock. + // + + if (Vacb->SharedCacheMap != NULL) { + + OldSharedCacheMap = Vacb->SharedCacheMap; + SetVacb( OldSharedCacheMap, Vacb->Overlay.FileOffset, NULL ); + Vacb->SharedCacheMap = NULL; + } + + // + // Mark it in use so no one else will muck with it after + // we release the spin lock. + // + + Vacb->Overlay.ActiveCount = 1; + SharedCacheMap->VacbActiveCount += 1; + + // + // Release the spinlock(s) acquired above. + // + + if (MasterAcquired) { + + ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock ); + ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql ); + + } else { + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + } + + // + // If the Vacb is already mapped, then unmap it. + // + + if (Vacb->BaseAddress != NULL) { + + CcUnmapVacb( Vacb, OldSharedCacheMap ); + + // + // Now we can decrement the open count as we normally + // do, possibly deleting the guy. + // + + ExAcquireSpinLock( &CcMasterSpinLock, OldIrql ); + + // + // Now release our open count. + // + + OldSharedCacheMap->OpenCount -= 1; + + if ((OldSharedCacheMap->OpenCount == 0) && + !FlagOn(OldSharedCacheMap->Flags, WRITE_QUEUED) && + (OldSharedCacheMap->DirtyPages == 0)) { + + // + // Move to the dirty list. + // + + RemoveEntryList( &OldSharedCacheMap->SharedCacheMapLinks ); + InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, + &OldSharedCacheMap->SharedCacheMapLinks ); + + // + // Make sure the Lazy Writer will wake up, because we + // want him to delete this SharedCacheMap. + // + + LazyWriter.OtherWork = TRUE; + if (!LazyWriter.ScanActive) { + CcScheduleLazyWriteScan(); + } + } + + ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql ); + } + + // + // Use try-finally to return this guy to the list if we get an + // exception. + // + + try { + + // + // Assume we are mapping to the end of the section, but + // reduce to our normal mapping granularity if the section + // is too large. + // + + MappedLength.QuadPart = SharedCacheMap->SectionSize.QuadPart - NormalOffset.QuadPart; + + if ((MappedLength.HighPart != 0) || + (MappedLength.LowPart > VACB_MAPPING_GRANULARITY)) { + + MappedLength.LowPart = VACB_MAPPING_GRANULARITY; + } + + // + // Now map this one in the system cache. + // + + DebugTrace( 0, mm, "MmMapViewInSystemCache:\n", 0 ); + DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section ); + DebugTrace2(0, mm, " Offset = %08lx, %08lx\n", + NormalOffset.LowPart, + NormalOffset.HighPart ); + DebugTrace( 0, mm, " ViewSize = %08lx\n", MappedLength.LowPart ); + + Status = + MmMapViewInSystemCache( SharedCacheMap->Section, + &Vacb->BaseAddress, + &NormalOffset, + &MappedLength.LowPart ); + + DebugTrace( 0, mm, " BaseAddress ); + DebugTrace( 0, mm, " SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); + } + + // + // On abnormal termination, get this guy back in the list. + // + + if (AbnormalTermination()) { + + ExAcquireSpinLock( &CcVacbSpinLock, OldIrql ); + + // + // This is like the unlucky case below. Just back out the stuff + // we did and put the guy at the tail of the list. Basically + // only the Map should fail, and we clear BaseAddress accordingly. + // + + Vacb->BaseAddress = NULL; + + CheckedDec(Vacb->Overlay.ActiveCount); + CheckedDec(SharedCacheMap->VacbActiveCount); + + // + // If there is someone waiting for this count to go to zero, + // wake them here. + // + + if (SharedCacheMap->WaitOnActiveCount != NULL) { + KeSetEvent( SharedCacheMap->WaitOnActiveCount, 0, FALSE ); + } + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + } + } + + // + // Finish filling in the Vacb, and store its address in the array in + // the Shared Cache Map. (We have to rewrite the ActiveCount + // since it is overlaid.) To do this we must racquire the + // spin lock one more time. Note we have to check for the unusual + // case that someone beat us to mapping this view, since we had to + // drop the spin lock. + // + + ExAcquireSpinLock( &CcVacbSpinLock, OldIrql ); + + if ((TempVacb = GetVacb( SharedCacheMap, NormalOffset )) == NULL) { + + Vacb->SharedCacheMap = SharedCacheMap; + Vacb->Overlay.FileOffset = NormalOffset; + Vacb->Overlay.ActiveCount = 1; + + SetVacb( SharedCacheMap, NormalOffset, Vacb ); + + // + // This is the unlucky case where we collided with someone else + // trying to map the same view. He can get in because we dropped + // the spin lock above. Rather than allocating events and making + // someone wait, considering this case is fairly unlikely, we just + // dump this one at the tail of the list and use the one from the + // guy who beat us. + // + + } else { + + // + // Now we have to increment all of the counts for the one that + // was already there, then ditch the one we had. + // + + if (TempVacb->Overlay.ActiveCount == 0) { + SharedCacheMap->VacbActiveCount += 1; + } + + TempVacb->Overlay.ActiveCount += 1; + + // + // Now unmap the one we mapped and proceed with the other Vacb. + // On this path we have to release the spinlock to do the unmap, + // and then reacquire the spinlock before cleaning up. + // + + ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql ); + + CcUnmapVacb( Vacb, SharedCacheMap ); + + ExAcquireSpinLock( &CcVacbSpinLock, OldIrql ); + CheckedDec(Vacb->Overlay.ActiveCount); + CheckedDec(SharedCacheMap->VacbActiveCount); + Vacb->SharedCacheMap = NULL; + + Vacb = TempVacb; + } + + return Vacb; +} + + +VOID +FASTCALL +CcFreeVirtualAddress ( + IN PVACB Vacb + ) + +/*++ + +Routine Description: + + This routine must be called once for each call to CcGetVirtualAddress, + to free that virtual address. + +Arguments: + + Vacb - Supplies the Vacb which was returned from CcGetVirtualAddress. + +Return Value: + + None. + +--*/ + +{ + KIRQL OldIrql; + PSHARED_CACHE_MAP SharedCacheMap = Vacb->SharedCacheMap; + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + + CheckedDec(Vacb->Overlay.ActiveCount); + + // + // If the count goes to zero, then we want to decrement the global + // Active count, and the count in the Scb. + // + + if (Vacb->Overlay.ActiveCount == 0) { + + // + // If the SharedCacheMap address is not NULL, then this one is + // in use by a shared cache map, and we have to decrement his + // count and see if anyone is waiting. + // + + if (SharedCacheMap != NULL) { + + CheckedDec(SharedCacheMap->VacbActiveCount); + + // + // If there is someone waiting for this count to go to zero, + // wake them here. + // + + if (SharedCacheMap->WaitOnActiveCount != NULL) { + KeSetEvent( SharedCacheMap->WaitOnActiveCount, 0, FALSE ); + } + } + } + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); +} + + +VOID +CcWaitOnActiveCount ( + IN PSHARED_CACHE_MAP SharedCacheMap + ) + +/*++ + +Routine Description: + + This routine may be called to wait for outstanding mappings for + a given SharedCacheMap to go inactive. It is intended to be called + from CcUninitializeCacheMap, which is called by the file systems + during cleanup processing. In that case this routine only has to + wait if the user closed a handle without waiting for all I/Os on the + handle to complete. + + This routine returns each time the active count is decremented. The + caller must recheck his wait conditions on return, either waiting for + the ActiveCount to go to 0, or for specific views to go inactive + (CcPurgeCacheSection case). + +Arguments: + + SharedCacheMap - Supplies the Shared Cache Map on whose VacbActiveCount + we wish to wait. + +Return Value: + + None. + +--*/ + +{ + KIRQL OldIrql; + PKEVENT Event; + + // + // In the unusual case that we get a cleanup while I/O is still going + // on, we can wait here. The caller must test the count for nonzero + // before calling this routine. + // + // Since we are being called from cleanup, we cannot afford to + // fail here. + // + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + + // + // It is possible that the count went to zero before we acquired the + // spinlock, so we must handle two cases here. + // + + if (SharedCacheMap->VacbActiveCount != 0) { + + if ((Event = SharedCacheMap->WaitOnActiveCount) == NULL) { + + // + // If the local even is not being used as a create event, + // then we can use it. + // + + if (SharedCacheMap->CreateEvent == NULL) { + + Event = &SharedCacheMap->Event; + + } else { + + Event = (PKEVENT)ExAllocatePool( NonPagedPoolMustSucceed, + sizeof(KEVENT) ); + } + } + + KeInitializeEvent( Event, + NotificationEvent, + FALSE ); + + SharedCacheMap->WaitOnActiveCount = Event; + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + + KeWaitForSingleObject( Event, + Executive, + KernelMode, + FALSE, + (PLARGE_INTEGER)NULL); + } else { + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + } +} + + +// +// Internal Support Routine. +// + +VOID +CcUnmapVacb ( + IN PVACB Vacb, + IN PSHARED_CACHE_MAP SharedCacheMap + ) + +/*++ + +Routine Description: + + This routine may be called to unmap a previously mapped Vacb, and + clear its BaseAddress field. + +Arguments: + + Vacb - Supplies the Vacb which was returned from CcGetVirtualAddress. + +Return Value: + + None. + +--*/ + +{ + // + // Make sure it is mapped. + // + + ASSERT(SharedCacheMap != NULL); + ASSERT(Vacb->BaseAddress != NULL); + + // + // Call MM to unmap it. + // + + DebugTrace( 0, mm, "MmUnmapViewInSystemCache:\n", 0 ); + DebugTrace( 0, mm, " BaseAddress = %08lx\n", Vacb->BaseAddress ); + + MmUnmapViewInSystemCache( Vacb->BaseAddress, + SharedCacheMap->Section, + FlagOn(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN) ); + + Vacb->BaseAddress = NULL; +} + + +VOID +FASTCALL +CcCreateVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER NewSectionSize + ) + +/*++ + +Routine Description: + + This routine must be called when a SharedCacheMap is created to create + and initialize the initial Vacb array. + +Arguments: + + SharedCacheMap - Supplies the shared cache map for which the array is + to be created. + + NewSectionSize - Supplies the current size of the section which must be + covered by the Vacb array. + +Return Value: + + None. + +--*/ + +{ + PVACB *NewAddresses; + ULONG NewSize, SizeToAllocate; + PLIST_ENTRY BcbListHead; + + NewSize = SizeToAllocate = SizeOfVacbArray(NewSectionSize); + + // + // The following limit is greater than the MM limit + // (i.e., MM actually only supports even smaller sections). + // This limit is required here in order to get the correct + // answer from SizeOfVacbArray. + // + + if (NewSectionSize.HighPart & 0xFFFFC000) { + ExRaiseStatus(STATUS_SECTION_TOO_BIG); + } + + // + // See if we can use the array inside the shared cache map. + // + + if (NewSize == (PREALLOCATED_VACBS * sizeof(PVACB))) { + + NewAddresses = &SharedCacheMap->InitialVacbs[0]; + + // + // Else allocate the array. + // + + } else { + + // + // For large metadata streams, double the size to allocate + // an array of Bcb listheads. Each two Vacb pointers also + // gets its own Bcb listhead, thus requiring double the size. + // + + ASSERT(SIZE_PER_BCB_LIST == (VACB_MAPPING_GRANULARITY * 2)); + + // + // Does this stream get a Bcb Listhead array? + // + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) && + (NewSectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY)) { + + SizeToAllocate *= 2; + } + + NewAddresses = ExAllocatePool( NonPagedPool, SizeToAllocate ); + if (NewAddresses == NULL) { + SharedCacheMap->Status = STATUS_INSUFFICIENT_RESOURCES; + ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); + } + } + + RtlZeroMemory( NewAddresses, NewSize ); + + // + // Loop to insert the Bcb listheads (if any) in the *descending* order + // Bcb list. + // + + if (SizeToAllocate != NewSize) { + + for (BcbListHead = (PLIST_ENTRY)((PCHAR)NewAddresses + NewSize); + BcbListHead < (PLIST_ENTRY)((PCHAR)NewAddresses + SizeToAllocate); + BcbListHead++) { + + InsertHeadList( &SharedCacheMap->BcbList, BcbListHead ); + } + } + + SharedCacheMap->Vacbs = NewAddresses; + SharedCacheMap->SectionSize = NewSectionSize; +} + + +VOID +CcExtendVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN LARGE_INTEGER NewSectionSize + ) + +/*++ + +Routine Description: + + This routine must be called any time the section for a shared cache + map is extended, in order to extend the Vacb array (if necessary). + +Arguments: + + SharedCacheMap - Supplies the shared cache map for which the array is + to be created. + + NewSectionSize - Supplies the new size of the section which must be + covered by the Vacb array. + +Return Value: + + None. + +--*/ + +{ + KIRQL OldIrql; + PVACB *OldAddresses; + PVACB *NewAddresses; + ULONG OldSize; + ULONG NewSize, SizeToAllocate; + ULONG GrowingBcbListHeads = FALSE; + + // + // The following limit is greater than the MM limit + // (i.e., MM actually only supports even smaller sections). + // This limit is required here in order to get the correct + // answer from SizeOfVacbArray. + // + + if (NewSectionSize.HighPart & 0xFFFFC000) { + ExRaiseStatus(STATUS_SECTION_TOO_BIG); + } + + // + // See if we will be growing the Bcb ListHeads, and take out the + // master lock if so. + // + + if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) && + (NewSectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY)) { + + GrowingBcbListHeads = TRUE; + ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); + ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock ); + + } else { + + // + // Acquire the spin lock to serialize with anyone who might like + // to "steal" one of the mappings we are going to move. + // + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + } + + // + // It's all a noop if the new size is not larger... + // + + if (NewSectionSize.QuadPart > SharedCacheMap->SectionSize.QuadPart) { + + NewSize = SizeToAllocate = SizeOfVacbArray(NewSectionSize); + OldSize = SizeOfVacbArray(SharedCacheMap->SectionSize); + + // + // Only do something if the size is growing. + // + + if (NewSize > OldSize) { + + // + // Does this stream get a Bcb Listhead array? + // + + if (GrowingBcbListHeads) { + SizeToAllocate *= 2; + } + + NewAddresses = ExAllocatePool( NonPagedPool, SizeToAllocate ); + + if (NewAddresses == NULL) { + if (GrowingBcbListHeads) { + ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock ); + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } else { + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + } + ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES); + } + + OldAddresses = SharedCacheMap->Vacbs; + if (OldAddresses != NULL) { + RtlCopyMemory( NewAddresses, OldAddresses, OldSize ); + } else { + OldSize = 0; + } + + RtlZeroMemory( (PCHAR)NewAddresses + OldSize, NewSize - OldSize ); + + // + // See if we have to initialize Bcb Listheads. + // + + if (SizeToAllocate != NewSize) { + + LARGE_INTEGER Offset; + PLIST_ENTRY BcbListHeadNew, TempEntry; + + Offset.QuadPart = 0; + BcbListHeadNew = (PLIST_ENTRY)((PCHAR)NewAddresses + NewSize); + + // + // Handle case where the old array had Bcb Listheads. + // + + if ((SharedCacheMap->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && + (OldAddresses != NULL)) { + + PLIST_ENTRY BcbListHeadOld; + + BcbListHeadOld = (PLIST_ENTRY)((PCHAR)OldAddresses + OldSize); + + // + // Loop to remove each old listhead and insert the new one + // in its place. + // + + do { + TempEntry = BcbListHeadOld->Flink; + RemoveEntryList( BcbListHeadOld ); + InsertTailList( TempEntry, BcbListHeadNew ); + Offset.QuadPart += SIZE_PER_BCB_LIST; + BcbListHeadOld += 1; + BcbListHeadNew += 1; + } while (Offset.QuadPart < SharedCacheMap->SectionSize.QuadPart); + + // + // Otherwise, handle the case where we are adding Bcb + // Listheads. + // + + } else { + + TempEntry = SharedCacheMap->BcbList.Blink; + + // + // Loop through any/all Bcbs to insert the new listheads. + // + + while (TempEntry != &SharedCacheMap->BcbList) { + + // + // Sit on this Bcb until we have inserted all listheads + // that go before it. + // + + while (Offset.QuadPart <= ((PBCB)CONTAINING_RECORD(TempEntry, BCB, BcbLinks))->FileOffset.QuadPart) { + + InsertHeadList(TempEntry, BcbListHeadNew); + Offset.QuadPart += SIZE_PER_BCB_LIST; + BcbListHeadNew += 1; + } + TempEntry = TempEntry->Blink; + } + } + + // + // Now insert the rest of the new listhead entries that were + // not finished in either loop above. + // + + while (Offset.QuadPart < NewSectionSize.QuadPart) { + + InsertHeadList(&SharedCacheMap->BcbList, BcbListHeadNew); + Offset.QuadPart += SIZE_PER_BCB_LIST; + BcbListHeadNew += 1; + } + } + + SharedCacheMap->Vacbs = NewAddresses; + + if ((OldAddresses != &SharedCacheMap->InitialVacbs[0]) && + (OldAddresses != NULL)) { + ExFreePool( OldAddresses ); + } + } + + SharedCacheMap->SectionSize = NewSectionSize; + } + + if (GrowingBcbListHeads) { + ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock ); + ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); + } else { + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + } +} + + +BOOLEAN +FASTCALL +CcUnmapVacbArray ( + IN PSHARED_CACHE_MAP SharedCacheMap, + IN PLARGE_INTEGER FileOffset OPTIONAL, + IN ULONG Length + ) + +/*++ + +Routine Description: + + This routine must be called to do any unmapping and associated + cleanup for a shared cache map, just before it is deleted. + +Arguments: + + SharedCacheMap - Supplies a pointer to the shared cache map + which is about to be deleted. + + FileOffset - If supplied, only unmap the specified offset and length + + Length - Completes range to unmap if FileOffset specified. If FileOffset + is specified, Length of 0 means unmap to the end of the section. + +Return Value: + + FALSE -- if an the unmap was not done due to an active vacb + TRUE -- if the unmap was done + +--*/ + +{ + PVACB Vacb; + KIRQL OldIrql; + LARGE_INTEGER StartingFileOffset = {0,0}; + LARGE_INTEGER EndingFileOffset = SharedCacheMap->SectionSize; + + // + // We could be just cleaning up for error recovery. + // + + if (SharedCacheMap->Vacbs == NULL) { + return TRUE; + } + + // + // See if a range was specified. + // + + if (ARGUMENT_PRESENT(FileOffset)) { + StartingFileOffset = *FileOffset; + if (Length != 0) { + EndingFileOffset.QuadPart = FileOffset->QuadPart + Length; + } + } + + // + // Acquire the spin lock to + // + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + + while (StartingFileOffset.QuadPart < EndingFileOffset.QuadPart) { + + // + // Note that the caller with an explicit range may be off the + // end of the section (example CcPurgeCacheSection for cache + // coherency). That is the reason for the first part of the + // test below. + // + // Check the next cell once without the spin lock, it probably will + // not change, but we will handle it if it does not. + // + + if ((StartingFileOffset.QuadPart < SharedCacheMap->SectionSize.QuadPart) && + ((Vacb = GetVacb( SharedCacheMap, StartingFileOffset )) != NULL)) { + + // + // Return here if we are unlucky and see an active + // Vacb. It could be Purge calling, and the Lazy Writer + // may have done a CcGetVirtualAddressIfMapped! + // + + if (Vacb->Overlay.ActiveCount != 0) { + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + return FALSE; + } + + // + // Unlink it from the other SharedCacheMap, so the other + // guy will not try to use it when we free the spin lock. + // + + SetVacb( SharedCacheMap, StartingFileOffset, NULL ); + Vacb->SharedCacheMap = NULL; + + // + // Increment the open count so that no one else will + // try to unmap or reuse until we are done. + // + + Vacb->Overlay.ActiveCount += 1; + + // + // Release the spin lock. + // + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + + // + // Unmap and free it if we really got it above. + // + + CcUnmapVacb( Vacb, SharedCacheMap ); + + // + // Reacquire the spin lock so that we can decrment the count. + // + + ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql ); + Vacb->Overlay.ActiveCount -= 1; + } + + StartingFileOffset.QuadPart = StartingFileOffset.QuadPart + VACB_MAPPING_GRANULARITY; + } + + ExReleaseSpinLock( &CcVacbSpinLock, OldIrql ); + + return TRUE; +} + + -- cgit v1.2.3