summaryrefslogtreecommitdiffstats
path: root/private/ntos/cache
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--private/ntos/cache/cachedat.c183
-rw-r--r--private/ntos/cache/cachesub.c5633
-rw-r--r--private/ntos/cache/cc.h1746
-rw-r--r--private/ntos/cache/copysup.c2117
-rw-r--r--private/ntos/cache/dirs24
-rw-r--r--private/ntos/cache/fssup.c3343
-rw-r--r--private/ntos/cache/lazyrite.c732
-rw-r--r--private/ntos/cache/logsup.c548
-rw-r--r--private/ntos/cache/mdlsup.c999
-rw-r--r--private/ntos/cache/mp/makefile6
-rw-r--r--private/ntos/cache/mp/sources29
-rw-r--r--private/ntos/cache/pinsup.c1274
-rw-r--r--private/ntos/cache/sources.inc53
-rw-r--r--private/ntos/cache/up/makefile6
-rw-r--r--private/ntos/cache/up/sources27
-rw-r--r--private/ntos/cache/vacbsup.c1421
16 files changed, 18141 insertions, 0 deletions
diff --git a/private/ntos/cache/cachedat.c b/private/ntos/cache/cachedat.c
new file mode 100644
index 000000000..e6755d915
--- /dev/null
+++ b/private/ntos/cache/cachedat.c
@@ -0,0 +1,183 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ cachedat.c
+
+Abstract:
+
+ This module implements the Memory Management based cache management
+ routines for the common Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Global SharedCacheMap lists and resource to synchronize access to it.
+//
+//
+
+extern KSPIN_LOCK CcMasterSpinLock;
+LIST_ENTRY CcCleanSharedCacheMapList;
+SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList;
+SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor;
+
+//
+// Worker thread structures:
+//
+// A spinlock to synchronize all three lists.
+// A count of the number of worker threads Cc will use
+// A listhead for preinitialized executive work items for Cc use.
+// A listhead for an express queue of WORK_QUEUE_ENTRYs
+// A listhead for a regular queue of WORK_QUEUE_ENTRYs
+//
+
+extern KSPIN_LOCK CcWorkQueueSpinlock;
+ULONG CcNumberWorkerThreads = 0;
+LIST_ENTRY CcIdleWorkerThreadList;
+LIST_ENTRY CcExpressWorkQueue;
+LIST_ENTRY CcRegularWorkQueue;
+
+//
+// Store the current idle delay and target time to clean all.
+//
+
+LARGE_INTEGER CcNoDelay;
+LARGE_INTEGER CcFirstDelay = {(ULONG)-(3*LAZY_WRITER_IDLE_DELAY), -1};
+LARGE_INTEGER CcIdleDelay = {(ULONG)-LAZY_WRITER_IDLE_DELAY, -1};
+LARGE_INTEGER CcCollisionDelay = {(ULONG)-LAZY_WRITER_COLLISION_DELAY, -1};
+LARGE_INTEGER CcTargetCleanDelay = {(ULONG)-(LONG)(LAZY_WRITER_IDLE_DELAY * (LAZY_WRITER_MAX_AGE_TARGET + 1)), -1};
+
+//
+// Spinlock for controlling access to Vacb and related global structures,
+// and a counter indicating how many Vcbs are active.
+//
+
+extern KSPIN_LOCK CcVacbSpinLock;
+ULONG CcNumberVacbs;
+
+//
+// Pointer to the global Vacb vector.
+//
+
+PVACB CcVacbs;
+PVACB CcBeyondVacbs;
+PVACB CcNextVictimVacb;
+
+//
+// Deferred write list and respective Thresholds
+//
+
+extern KSPIN_LOCK CcDeferredWriteSpinLock;
+LIST_ENTRY CcDeferredWrites;
+ULONG CcDirtyPageThreshold;
+ULONG CcDirtyPageTarget;
+ULONG CcPagesYetToWrite;
+ULONG CcPagesWrittenLastTime = 0;
+ULONG CcDirtyPagesLastScan = 0;
+ULONG CcAvailablePagesThreshold = 100;
+ULONG CcTotalDirtyPages = 0;
+
+//
+// Captured system size
+//
+
+MM_SYSTEMSIZE CcCapturedSystemSize;
+
+//
+// Tuning options du Jour
+//
+
+ULONG CcTune = 0;
+
+//
+// Global structure controlling lazy writer algorithms
+//
+
+LAZY_WRITER LazyWriter;
+
+NPAGED_LOOKASIDE_LIST CcTwilightLookasideList;
+
+#ifdef CCDBG
+
+LONG CcDebugTraceLevel = 0;
+LONG CcDebugTraceIndent = 0;
+
+#ifdef CCDBG_LOCK
+extern KSPIN_LOCK CcDebugTraceLock;
+#endif // def CCDBG_LOCK
+
+#endif
+
+//
+// Global list of pinned Bcbs which may be examined for debug purposes
+//
+
+#if DBG
+
+ULONG CcBcbCount;
+LIST_ENTRY CcBcbList;
+extern KSPIN_LOCK CcBcbSpinLock;
+
+#endif
+
+//
+// Throw away miss counter.
+//
+
+ULONG CcThrowAway;
+
+//
+// Performance Counters
+//
+
+ULONG CcFastReadNoWait;
+ULONG CcFastReadWait;
+ULONG CcFastReadResourceMiss;
+ULONG CcFastReadNotPossible;
+
+ULONG CcFastMdlReadNoWait;
+ULONG CcFastMdlReadWait;
+ULONG CcFastMdlReadResourceMiss;
+ULONG CcFastMdlReadNotPossible;
+
+ULONG CcMapDataNoWait;
+ULONG CcMapDataWait;
+ULONG CcMapDataNoWaitMiss;
+ULONG CcMapDataWaitMiss;
+
+ULONG CcPinMappedDataCount;
+
+ULONG CcPinReadNoWait;
+ULONG CcPinReadWait;
+ULONG CcPinReadNoWaitMiss;
+ULONG CcPinReadWaitMiss;
+
+ULONG CcCopyReadNoWait;
+ULONG CcCopyReadWait;
+ULONG CcCopyReadNoWaitMiss;
+ULONG CcCopyReadWaitMiss;
+
+ULONG CcMdlReadNoWait;
+ULONG CcMdlReadWait;
+ULONG CcMdlReadNoWaitMiss;
+ULONG CcMdlReadWaitMiss;
+
+ULONG CcReadAheadIos;
+
+ULONG CcLazyWriteHotSpots;
+ULONG CcLazyWriteIos;
+ULONG CcLazyWritePages;
+ULONG CcDataFlushes;
+ULONG CcDataPages;
+
+PULONG CcMissCounter = &CcThrowAway;
diff --git a/private/ntos/cache/cachesub.c b/private/ntos/cache/cachesub.c
new file mode 100644
index 000000000..bbbcb88d9
--- /dev/null
+++ b/private/ntos/cache/cachesub.c
@@ -0,0 +1,5633 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ cachesub.c
+
+Abstract:
+
+ This module implements the common subroutines for the Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+extern POBJECT_TYPE IoFileObjectType;
+
+//
+// The Bug check file id for this module
+//
+
+#define BugCheckFileId (CACHE_BUG_CHECK_CACHESUB)
+
+//
+// Define our debug constant
+//
+
+#define me 0x00000002
+
+//
+// Define those errors which should be retried
+//
+
+#define RetryError(STS) (((STS) == STATUS_VERIFY_REQUIRED) || ((STS) == STATUS_FILE_LOCK_CONFLICT))
+
+ULONG CcMaxDirtyWrite = 0x10000;
+
+//
+// Local support routines
+//
+
+BOOLEAN
+CcFindBcb (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN OUT PLARGE_INTEGER BeyondLastByte,
+ OUT PBCB *Bcb
+ );
+
+PBCB
+CcAllocateInitializeBcb (
+ IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
+ IN OUT PBCB AfterBcb,
+ IN PLARGE_INTEGER FileOffset,
+ IN PLARGE_INTEGER Length
+ );
+
+NTSTATUS
+CcSetValidData(
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER ValidDataLength
+ );
+
+BOOLEAN
+CcAcquireByteRangeForWrite (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER TargetOffset OPTIONAL,
+ IN ULONG TargetLength,
+ OUT PLARGE_INTEGER FileOffset,
+ OUT PULONG Length,
+ OUT PBCB *FirstBcb
+ );
+
+VOID
+CcReleaseByteRangeFromWrite (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN PBCB FirstBcb,
+ IN BOOLEAN VerifyRequired
+ );
+
+
+//
+// Internal support routine
+//
+
+BOOLEAN
+CcPinFileData (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN ReadOnly,
+ IN BOOLEAN WriteOnly,
+ IN BOOLEAN Wait,
+ OUT PBCB *Bcb,
+ OUT PVOID *BaseAddress,
+ OUT PLARGE_INTEGER BeyondLastByte
+ )
+
+/*++
+
+Routine Description:
+
+ This routine locks the specified range of file data into memory.
+
+ Note that the data desired by the caller (or the first part of it)
+ may be in one of three states:
+
+ No Bcb exists which describes the data
+
+ A Bcb exists describing the data, but it is not mapped
+ (BcbOut->BaseAddress == NULL)
+
+ A Bcb exists describing the data, and it is mapped
+
+ Given the above three states, and given that the caller may call
+ with either Wait == FALSE or Wait == TRUE, this routine has basically
+ six cases. What has to be done, and the order in which things must be
+ done varies quite a bit with each of these six cases. The most
+ straight-forward implementation of this routine, with the least amount
+ of branching, is achieved by determining which of the six cases applies,
+ and dispatching fairly directly to that case. The handling of the
+ cases is summarized in the following table:
+
+ Wait == TRUE Wait == FALSE
+ ------------ -------------
+
+ no Bcb Case 1: Case 2:
+
+ CcAllocateInitializeBcb CcMapAndRead (exit if FALSE)
+ Acquire Bcb Exclusive CcAllocateInitializeBcb
+ Release BcbList SpinLock Acquire Bcb Shared if not ReadOnly
+ CcMapAndRead w/ Wait Release BcbList SpinLock
+ Convert/Release Bcb Resource
+
+ Bcb not Case 3: Case 4:
+ mapped
+ Increment PinCount Acquire Bcb Exclusive (exit if FALSE)
+ Release BcbList SpinLock CcMapAndRead (exit if FALSE)
+ Acquire Bcb Excl. w/ Wait Increment PinCount
+ if still not mapped Convert/Release Bcb Resource
+ CcMapAndRead w/ Wait Release BcbList SpinLock
+ Convert/Release Bcb Resource
+
+ Bcb mapped Case 5: Case 6:
+
+ Increment PinCount if not ReadOnly
+ Release BcbList SpinLock Acquire Bcb shared (exit if FALSE)
+ if not ReadOnly Increment PinCount
+ Acquire Bcb Shared Release BcbList SpinLock
+
+ It is important to note that most changes to this routine will affect
+ multiple cases from above.
+
+Arguments:
+
+ FileObject - Pointer to File Object for file
+
+ FileOffset - Offset in file at which map should begin
+
+ Length - Length of desired map in bytes
+
+ ReadOnly - Supplies TRUE if caller will only read the mapped data (i.e.,
+ TRUE for CcCopyRead, CcMapData and CcMdlRead and FALSE for
+ everyone else)
+
+ WriteOnly - The specified range of bytes will only be written.
+
+ Wait - Supplies TRUE if it is ok to block the caller's thread
+ Supplies 3 if it is ok to block the caller's thread and the Bcb should
+ be exclusive
+ Supplies FALSE if it is not ok to block the caller's thread
+
+ Bcb - Returns a pointer to the Bcb representing the pinned data.
+
+ BaseAddress - Returns base address of desired data
+
+ BeyondLastByte - Returns the File Offset of the first byte beyond the
+ last accessible byte.
+
+Return Value:
+
+ FALSE - if Wait was supplied as TRUE, and it was impossible to lock all
+ of the data without blocking
+ TRUE - if the desired data, is being returned
+
+Raises:
+
+ STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
+ This can only occur if Wait was specified as TRUE. (If Wait is
+ specified as FALSE, and an allocation failure occurs, this
+ routine simply returns FALSE.)
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ LARGE_INTEGER TrialBound;
+ KIRQL OldIrql;
+ PBCB BcbOut = NULL;
+ ULONG ZeroFlags = 0;
+ BOOLEAN SpinLockAcquired = FALSE;
+ BOOLEAN UnmapBcb = FALSE;
+ BOOLEAN Result = FALSE;
+
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PVACB ActiveVacb = NULL;
+
+ DebugTrace(+1, me, "CcPinFileData:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+ DebugTrace( 0, me, " Wait = %02lx\n", Wait );
+
+ //
+ // Get pointer to SharedCacheMap via File Object.
+ //
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+
+ //
+ // See if we have an active Vacb, that we need to free.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // If there is an end of a page to be zeroed, then free that page now,
+ // so it does not cause our data to get zeroed. If there is an active
+ // page, free it so we have the correct ValidDataGoal.
+ //
+
+ if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Make sure the calling file system is not asking to map beyond the
+ // end of the section, for example, that it did not forget to do
+ // CcExtendCacheSection.
+ //
+
+ ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length ) <=
+ SharedCacheMap->SectionSize.QuadPart );
+
+ //
+ // Initially clear output
+ //
+
+ *Bcb = NULL;
+ *BaseAddress = NULL;
+
+ //
+ // Acquire Bcb List Exclusive to look for Bcb
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SpinLockAcquired = TRUE;
+
+ //
+ // Use try to guarantee cleanup on the way out.
+ //
+
+ try {
+
+ BOOLEAN Found;
+ LARGE_INTEGER FOffset;
+ LARGE_INTEGER TLength;
+ PVOID BAddress;
+ PVACB Vacb;
+
+ //
+ // Search for Bcb describing the largest matching "prefix" byte range,
+ // or where to insert it.
+ //
+
+ TrialBound.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
+ Found = CcFindBcb( SharedCacheMap, FileOffset, &TrialBound, &BcbOut );
+
+
+ //
+ // Cases 1 and 2 - Bcb was not found.
+ //
+ // First caculate data to pin down.
+ //
+
+ if (!Found) {
+
+ //
+ // Not found, calculate data to pin down.
+ //
+ // Round local copy of FileOffset down to page boundary, and
+ // round copies of size and minimum size up. Also make sure that
+ // we keep the length from crossing the end of the SharedCacheMap.
+ //
+
+ FOffset = *FileOffset;
+ TLength.QuadPart = TrialBound.QuadPart - FOffset.QuadPart;
+
+ TLength.LowPart += FOffset.LowPart & (PAGE_SIZE - 1);
+
+ //
+ // At this point we can calculate the ReadOnly flag for
+ // the purposes of whether to use the Bcb resource, and
+ // we can calculate the ZeroFlags.
+ //
+
+ if ((!ReadOnly && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) || WriteOnly) {
+
+ //
+ // We can always zero middle pages, if any.
+ //
+
+ ZeroFlags = ZERO_MIDDLE_PAGES;
+
+ if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) &&
+ (Length >= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_FIRST_PAGE;
+ }
+
+ if ((TLength.LowPart & (PAGE_SIZE - 1)) == 0) {
+ ZeroFlags |= ZERO_LAST_PAGE;
+ }
+ }
+
+ //
+ // We treat Bcbs as ReadOnly (do not acquire resource) if they
+ // are in sections for which we have not disabled modified writing.
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+ ReadOnly = TRUE;
+ }
+
+ TLength.LowPart = ROUND_TO_PAGES( TLength.LowPart );
+
+ FOffset.LowPart &= ~(PAGE_SIZE - 1);
+
+ //
+ // Even if we are readonly, we can still zero pages entirely
+ // beyond valid data length.
+ //
+
+ if (FOffset.QuadPart >= SharedCacheMap->ValidDataGoal.QuadPart) {
+
+ ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+
+ } else if ((FOffset.QuadPart + (LONGLONG)PAGE_SIZE) >=
+ SharedCacheMap->ValidDataGoal.QuadPart) {
+
+ ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ }
+
+ //
+ // We will get into trouble if we try to read more than we
+ // can map by one Vacb. So make sure that our lengths stay
+ // within a Vacb.
+ //
+
+ if (TLength.LowPart > VACB_MAPPING_GRANULARITY) {
+
+ TLength.LowPart = VACB_MAPPING_GRANULARITY;
+ }
+
+ if ((FOffset.LowPart & ~(VACB_MAPPING_GRANULARITY - 1))
+
+ !=
+
+ ((FOffset.LowPart + TLength.LowPart - 1) &
+ ~(VACB_MAPPING_GRANULARITY - 1))) {
+
+ TLength.LowPart = VACB_MAPPING_GRANULARITY -
+ (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1));
+ }
+
+
+ //
+ // Case 1 - Bcb was not found and Wait is TRUE.
+ //
+ // Note that it is important to minimize the time that the Bcb
+ // List spin lock is held, as well as guarantee we do not take
+ // any faults while holding this lock.
+ //
+ // If we can (and perhaps will) wait, then it is important to
+ // allocate the Bcb acquire it exclusive and free the Bcb List.
+ // We then procede to read in the data, and anyone else finding
+ // our Bcb will have to wait shared to insure that the data is
+ // in.
+ //
+
+ if (Wait) {
+
+ BcbOut = CcAllocateInitializeBcb ( SharedCacheMap,
+ BcbOut,
+ &FOffset,
+ &TLength );
+
+ if (BcbOut == NULL) {
+ DebugTrace( 0, 0, "Bcb allocation failure\n", 0 );
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // Now just acquire the newly-allocated Bcb shared, and
+ // release the spin lock.
+ //
+
+ if (!ReadOnly) {
+ if (Wait == 3) {
+ (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE );
+ } else {
+ (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
+ }
+ }
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+
+ //
+ // Now read in the data.
+ //
+ // We set UnmapBcb to be TRUE for the duration of this call,
+ // so that if we get an exception, we will call CcUnpinFileData
+ // and probably delete the Bcb.
+ //
+
+ UnmapBcb = TRUE;
+ (VOID)CcMapAndRead( SharedCacheMap,
+ &FOffset,
+ TLength.LowPart,
+ ZeroFlags,
+ TRUE,
+ &Vacb,
+ &BAddress );
+
+ UnmapBcb = FALSE;
+
+ //
+ // Now we have to reacquire the Bcb List spinlock to load
+ // up the mapping if we are the first one, else we collided
+ // with someone else who loaded the mapping first, and we
+ // will just free our mapping. It is guaranteed that the
+ // data will be mapped to the same place.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if (BcbOut->BaseAddress == NULL) {
+
+ BcbOut->BaseAddress = BAddress;
+ BcbOut->Vacb = Vacb;
+
+ } else {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Calculate Base Address of the data we want.
+ //
+
+ *BaseAddress = (PCHAR)BcbOut->BaseAddress +
+ (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
+
+ //
+ // Success!
+ //
+
+ try_return( Result = TRUE );
+ }
+
+
+ //
+ // Case 2 - Bcb was not found and Wait is FALSE
+ //
+ // If we cannot wait, then we go immediately see if the data is
+ // there (CcMapAndRead), and then only set up the Bcb and release
+ // the spin lock if the data is there. Note here we call
+ // CcMapAndRead while holding the spin lock, because we know we
+ // will not fault and not block before returning.
+ //
+
+ else {
+
+ //
+ // Now try to allocate and initialize the Bcb. If we
+ // fail to allocate one, then return FALSE, since we know that
+ // Wait = FALSE. The caller may get lucky if he calls
+ // us back with Wait = TRUE.
+ //
+
+ BcbOut = CcAllocateInitializeBcb ( SharedCacheMap,
+ BcbOut,
+ &FOffset,
+ &TLength );
+
+ if (BcbOut == NULL) {
+
+ try_return( Result = FALSE );
+ }
+
+ //
+ // If we are not ReadOnly, we must acquire the newly-allocated
+ // resource shared, and then we can free the spin lock.
+ //
+
+ if (!ReadOnly) {
+ ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
+ }
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+
+ //
+ // Note that since this call has Wait = FALSE, it cannot
+ // get an exception (see procedure header).
+ //
+
+ UnmapBcb = TRUE;
+ if (!CcMapAndRead( SharedCacheMap,
+ &FOffset,
+ TLength.LowPart,
+ ZeroFlags,
+ FALSE,
+ &Vacb,
+ &BAddress )) {
+
+ try_return( Result = FALSE );
+ }
+ UnmapBcb = FALSE;
+
+ //
+ // Now we have to reacquire the Bcb List spinlock to load
+ // up the mapping if we are the first one, else we collided
+ // with someone else who loaded the mapping first, and we
+ // will just free our mapping. It is guaranteed that the
+ // data will be mapped to the same place.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if (BcbOut->BaseAddress == NULL) {
+
+ BcbOut->BaseAddress = BAddress;
+ BcbOut->Vacb = Vacb;
+
+ } else {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Calculate Base Address of the data we want.
+ //
+
+ *BaseAddress = (PCHAR)BcbOut->BaseAddress +
+ (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
+
+ //
+ // Success!
+ //
+
+ try_return( Result = TRUE );
+ }
+
+ } else {
+
+ //
+ // We treat Bcbs as ReadOnly (do not acquire resource) if they
+ // are in sections for which we have not disabled modified writing.
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+ ReadOnly = TRUE;
+ }
+ }
+
+
+ //
+ // Cases 3 and 4 - Bcb is there but not mapped
+ //
+
+ if (BcbOut->BaseAddress == NULL) {
+
+ //
+ // It is too complicated to attempt to calculate any ZeroFlags in this
+ // case, because we have to not only do the tests above, but also
+ // compare to the byte range in the Bcb since we will be passing
+ // those parameters to CcMapAndRead. Also, the probability of hitting
+ // some window where zeroing is of any advantage is quite small.
+ //
+
+ //
+ // Set up to just reread the Bcb exactly as the data in it is
+ // described.
+ //
+
+ FOffset = BcbOut->FileOffset;
+ TLength.QuadPart = (LONGLONG)BcbOut->ByteLength;
+
+ //
+ // Case 3 - Bcb is there but not mapped and Wait is TRUE
+ //
+ // Increment the PinCount, and then release the BcbList
+ // SpinLock so that we can wait to acquire the Bcb exclusive.
+ // Once we have the Bcb exclusive, map and read it in if no
+ // one beats us to it. Someone may have beat us to it since
+ // we had to release the SpinLock above.
+ //
+
+ if (Wait) {
+
+ BcbOut->PinCount += 1;
+
+ //
+ // Now we have to release the BcbList SpinLock in order to
+ // acquire the Bcb shared.
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+ if (!ReadOnly) {
+ if (Wait == 3) {
+ (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE );
+ } else {
+ (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
+ }
+ }
+
+ //
+ // Now procede to map and read the data in.
+ //
+ // Now read in the data.
+ //
+ // We set UnmapBcb to be TRUE for the duration of this call,
+ // so that if we get an exception, we will call CcUnpinFileData
+ // and probably delete the Bcb.
+ //
+
+ UnmapBcb = TRUE;
+ (VOID)CcMapAndRead( SharedCacheMap,
+ &FOffset,
+ TLength.LowPart,
+ ZeroFlags,
+ TRUE,
+ &Vacb,
+ &BAddress );
+ UnmapBcb = FALSE;
+
+ //
+ // Now we have to reacquire the Bcb List spinlock to load
+ // up the mapping if we are the first one, else we collided
+ // with someone else who loaded the mapping first, and we
+ // will just free our mapping. It is guaranteed that the
+ // data will be mapped to the same place.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if (BcbOut->BaseAddress == NULL) {
+
+ BcbOut->BaseAddress = BAddress;
+ BcbOut->Vacb = Vacb;
+
+ } else {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ //
+ // Calculate Base Address of the data we want.
+ //
+
+ *BaseAddress = (PCHAR)BcbOut->BaseAddress +
+ (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
+
+ //
+ // Success!
+ //
+
+ try_return( Result = TRUE );
+ }
+
+
+ //
+ // Case 4 - Bcb is there but not mapped, and Wait is FALSE
+ //
+ // Since we cannot wait, we go immediately see if the data is
+ // there (CcMapAndRead), and then only set up the Bcb and release
+ // the spin lock if the data is there. Note here we call
+ // CcMapAndRead while holding the spin lock, because we know we
+ // will not fault and not block before returning.
+ //
+
+ else {
+
+ if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
+ try_return( Result = FALSE );
+ }
+
+ BcbOut->PinCount += 1;
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+
+ //
+ // Note that since this call has Wait = FALSE, it cannot
+ // get an exception (see procedure header).
+ //
+
+ UnmapBcb = TRUE;
+ if (!CcMapAndRead( SharedCacheMap,
+ &BcbOut->FileOffset,
+ BcbOut->ByteLength,
+ ZeroFlags,
+ FALSE,
+ &Vacb,
+ &BAddress )) {
+
+ try_return( Result = FALSE );
+ }
+ UnmapBcb = FALSE;
+
+ //
+ // Now we have to reacquire the Bcb List spinlock to load
+ // up the mapping if we are the first one, else we collided
+ // with someone else who loaded the mapping first, and we
+ // will just free our mapping. It is guaranteed that the
+ // data will be mapped to the same place.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if (BcbOut->BaseAddress == NULL) {
+
+ BcbOut->BaseAddress = BAddress;
+ BcbOut->Vacb = Vacb;
+
+ } else {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Calculate Base Address of the data we want.
+ //
+
+ *BaseAddress = (PCHAR)BcbOut->BaseAddress +
+ (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
+
+ //
+ // Success!
+ //
+
+ try_return( Result = TRUE );
+ }
+ }
+
+
+ //
+ // Cases 5 and 6 - Bcb is there and it is mapped
+ //
+
+ else {
+
+ //
+ // Case 5 - Bcb is there and mapped, and Wait is TRUE
+ //
+ // We can just increment the PinCount, release the SpinLock
+ // and then acquire the Bcb Shared if we are not ReadOnly.
+ //
+
+ if (Wait) {
+
+ BcbOut->PinCount += 1;
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+
+ //
+ // Acquire Bcb Resource shared to insure that it is in memory.
+ //
+
+ if (!ReadOnly) {
+ if (Wait == 3) {
+ (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE );
+ } else {
+ (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE );
+ }
+ }
+ }
+
+ //
+ // Case 6 - Bcb is there and mapped, and Wait is FALSE
+ //
+ // If we are not ReadOnly, we have to first see if we can
+ // acquire the Bcb shared before incrmenting the PinCount,
+ // since we will have to return FALSE if we cannot acquire the
+ // resource.
+ //
+
+ else {
+
+ //
+ // Acquire Bcb Resource shared to insure that it is in memory.
+ //
+
+ if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) {
+ try_return( Result = FALSE );
+ }
+ BcbOut->PinCount += 1;
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SpinLockAcquired = FALSE;
+ }
+
+ //
+ // Calculate Base Address of the data we want.
+ //
+
+ *BaseAddress = (PCHAR)BcbOut->BaseAddress +
+ (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart );
+
+ //
+ // Success!
+ //
+
+ try_return( Result = TRUE );
+ }
+
+
+ try_exit: NOTHING;
+
+ }
+
+ finally {
+
+ //
+ // Release the spinlock if it is acquired.
+ //
+
+ if (SpinLockAcquired) {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // An abnormal termination can occur on an allocation failure,
+ // or on a failure to map and read the buffer. The latter
+ // operation is performed with UnmapBcb = TRUE, so that we
+ // know to make the unmap call.
+ //
+
+ if (UnmapBcb) {
+ CcUnpinFileData( BcbOut, ReadOnly, UNPIN );
+ BcbOut = NULL;
+ }
+
+ if (Result) {
+
+ *Bcb = BcbOut;
+ if (BcbOut != NULL) {
+ *BeyondLastByte = BcbOut->BeyondLastByte;
+ }
+ else {
+ *BeyondLastByte = *FileOffset;
+ }
+ }
+
+ DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
+ DebugTrace( 0, me, " <BaseAddress = %08lx\n", *BaseAddress );
+ DebugTrace(-1, me, "CcPinFileData -> %02lx\n", Result );
+ }
+
+ return Result;
+}
+
+
+//
+// Internal Support Routine
+//
+
+VOID
+FASTCALL
+CcUnpinFileData (
+ IN OUT PBCB Bcb,
+ IN BOOLEAN ReadOnly,
+ IN UNMAP_ACTIONS UnmapAction
+ )
+
+/*++
+
+Routine Description:
+
+ This routine umaps and unlocks the specified buffer, which was previously
+ locked and mapped by calling CcPinFileData.
+
+Arguments:
+
+ Bcb - Pointer previously returned from CcPinFileData. As may be
+ seen above, this pointer may be either a Bcb or a Vacb.
+
+ ReadOnly - must specify same value as when data was mapped
+
+ UnmapAction - UNPIN or SET_CLEAN
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+
+ DebugTrace(+1, me, "CcUnpinFileData >Bcb = %08lx\n", Bcb );
+
+ //
+ // Note, since we have to allocate so many Vacbs, we do not use
+ // a node type code. However, the Vacb starts with a BaseAddress,
+ // so we assume that the low byte of the Bcb node type code has
+ // some bits set, which a page-aligned Base Address cannot.
+ //
+
+ ASSERT( (CACHE_NTC_BCB & 0xFF) != 0 );
+
+ if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
+
+ ASSERT(((PVACB)Bcb)->SharedCacheMap->NodeTypeCode == CACHE_NTC_SHARED_CACHE_MAP);
+
+ CcFreeVirtualAddress( (PVACB)Bcb );
+
+ DebugTrace(-1, me, "CcUnpinFileData -> VOID (simple release)\n", 0 );
+
+ return;
+ }
+
+ SharedCacheMap = Bcb->SharedCacheMap;
+
+ //
+ // We treat Bcbs as ReadOnly (do not acquire resource) if they
+ // are in sections for which we have not disabled modified writing.
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+ ReadOnly = TRUE;
+ }
+
+ //
+ // Synchronize
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ switch (UnmapAction) {
+
+ case UNPIN:
+
+ ASSERT( Bcb->PinCount > 0 );
+
+ Bcb->PinCount -= 1;
+ break;
+
+ case SET_CLEAN:
+
+ if (Bcb->Dirty) {
+
+ ULONG Pages = Bcb->ByteLength >> PAGE_SHIFT;
+
+ //
+ // Reverse the rest of the actions taken when the Bcb was set dirty.
+ //
+
+ Bcb->Dirty = FALSE;
+ SharedCacheMap->DirtyPages -= Pages;
+ CcTotalDirtyPages -= Pages;
+
+ //
+ // Normally we need to reduce CcPagesYetToWrite appropriately.
+ //
+
+ if (CcPagesYetToWrite > Pages) {
+ CcPagesYetToWrite -= Pages;
+ } else {
+ CcPagesYetToWrite = 0;
+ }
+
+ //
+ // Remove SharedCacheMap from dirty list if nothing more dirty,
+ // and someone still has the cache map opened.
+ //
+
+ if ((SharedCacheMap->DirtyPages == 0) &&
+ (SharedCacheMap->OpenCount != 0)) {
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcCleanSharedCacheMapList,
+ &SharedCacheMap->SharedCacheMapLinks );
+ }
+ }
+
+ break;
+
+ default:
+ CcBugCheck( UnmapAction, 0, 0 );
+ }
+
+ //
+ // If we brought it to 0, then we have to kill it.
+ //
+
+ if (Bcb->PinCount == 0) {
+
+ //
+ // If the Bcb is Dirty, we only release the resource and unmap now.
+ //
+
+ if (Bcb->Dirty) {
+
+ if (Bcb->BaseAddress != NULL) {
+
+ //
+ // Capture CcFreeVirtualAddress parameters to locals so that we can
+ // reset Bcb->BaseAddress and release the spinlock before
+ // unmapping.
+ //
+
+ PVOID BaseAddress = Bcb->BaseAddress;
+ ULONG ByteLength = Bcb->ByteLength;
+ PVACB Vacb = Bcb->Vacb;
+
+ Bcb->BaseAddress = NULL;
+ Bcb->Vacb = NULL;
+
+ if (!ReadOnly) {
+ ExReleaseResource( &Bcb->Resource );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ CcFreeVirtualAddress( Vacb );
+ }
+ else {
+
+ if (!ReadOnly) {
+ ExReleaseResource( &Bcb->Resource );
+ }
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+ }
+
+ //
+ // Otherwise, we also delete the Bcb.
+ //
+
+ else {
+
+ RemoveEntryList( &Bcb->BcbLinks );
+
+ if (Bcb->BaseAddress != NULL) {
+
+ CcFreeVirtualAddress( Bcb->Vacb );
+ }
+
+ //
+ // Debug routines used to remove Bcbs from the global list
+ //
+
+#if LIST_DBG
+
+ ExAcquireSpinLockAtDpcLevel( &CcBcbSpinLock );
+
+ if (Bcb->CcBcbLinks.Flink != NULL) {
+
+ RemoveEntryList( &Bcb->CcBcbLinks );
+ CcBcbCount -= 1;
+ }
+
+ ExReleaseSpinLockFromDpcLevel( &CcBcbSpinLock );
+
+#endif
+#if DBG
+ if (!ReadOnly) {
+ ExReleaseResource( &Bcb->Resource );
+ }
+
+ //
+ // ASSERT that the resource is unowned.
+ //
+
+ ASSERT( Bcb->Resource.ActiveCount == 0 );
+#endif
+ CcDeallocateBcb( Bcb );
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+ }
+
+ //
+ // Else we just have to release our Shared access, if we are not
+ // readonly. We don't need to do this above, since we deallocate
+ // the entire Bcb there.
+ //
+
+ else {
+
+ if (!ReadOnly) {
+ ExReleaseResource( &Bcb->Resource );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcUnpinFileData -> VOID\n", 0 );
+
+ return;
+}
+
+
+VOID
+CcSetReadAheadGranularity (
+ IN PFILE_OBJECT FileObject,
+ IN ULONG Granularity
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to set the read ahead granularity used by
+ the Cache Manager. The default is PAGE_SIZE. The number is decremented
+ and stored as a mask.
+
+Arguments:
+
+ FileObject - File Object for which granularity shall be set
+
+ Granularity - new granularity, which must be an even power of 2 and
+ >= PAGE_SIZE
+
+Return Value:
+
+ None
+--*/
+
+{
+ ((PPRIVATE_CACHE_MAP)FileObject->PrivateCacheMap)->ReadAheadMask = Granularity - 1;
+}
+
+
+VOID
+CcScheduleReadAhead (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called by Copy Read and Mdl Read file system routines to
+ perform common Read Ahead processing. The input parameters describe
+ the current read which has just been completed, or perhaps only started
+ in the case of Mdl Reads. Based on these parameters, an
+ assessment is made on how much data should be read ahead, and whether
+ that data has already been read ahead.
+
+ The processing is divided into two parts:
+
+ CALCULATE READ AHEAD REQUIREMENTS (CcScheduleReadAhead)
+
+ PERFORM READ AHEAD (CcPerformReadAhead)
+
+ File systems should always call CcReadAhead, which will conditionally
+ call CcScheduleReadAhead (if the read is large enough). If such a call
+ determines that there is read ahead work to do, and no read ahead is
+ currently active, then it will set ReadAheadActive and schedule read
+ ahead to be peformed by the Lazy Writer, who will call CcPeformReadAhead.
+
+Arguments:
+
+ FileObject - supplies pointer to FileObject on which readahead should be
+ considered.
+
+ FileOffset - supplies the FileOffset at which the last read just occurred.
+
+ Length - supplies the length of the last read.
+
+Return Value:
+
+ None
+--*/
+
+{
+ LARGE_INTEGER NewOffset;
+ LARGE_INTEGER NewBeyond;
+ LARGE_INTEGER FileOffset1, FileOffset2;
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ PWORK_QUEUE_ENTRY WorkQueueEntry;
+ ULONG ReadAheadSize;
+ BOOLEAN Changed = FALSE;
+
+ DebugTrace(+1, me, "CcScheduleReadAhead:\n", 0 );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ if ((PrivateCacheMap == NULL) ||
+ (SharedCacheMap == NULL) ||
+ FlagOn(SharedCacheMap->Flags, DISABLE_READ_AHEAD)) {
+
+ DebugTrace(-1, me, "CcScheduleReadAhead -> VOID (Nooped)\n", 0 );
+
+ return;
+ }
+
+ //
+ // Round boundaries of transfer up to some greater granularity, so that
+ // sequential reads will be recognized even if a few bytes are skipped
+ // between records.
+ //
+
+ NewOffset = *FileOffset;
+ NewBeyond.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
+
+ //
+ // Find the next read ahead boundary beyond the current read.
+ //
+
+ ReadAheadSize = (Length + PrivateCacheMap->ReadAheadMask) & ~PrivateCacheMap->ReadAheadMask;
+ FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
+ FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
+
+ //
+ // CALCULATE READ AHEAD REQUIREMENTS
+ //
+
+ //
+ // Take out the ReadAhead spinlock to synchronize our read ahead decision.
+ //
+
+ ExAcquireSpinLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
+
+ //
+ // Read Ahead Case 0.
+ //
+ // Sequential-only hint in the file object. For this case we will
+ // try and always keep two read ahead granularities read ahead from
+ // and including the end of the current transfer. This case has the
+ // lowest overhead, and the code is completely immune to how the
+ // caller skips around. Sequential files use ReadAheadOffset[1] in
+ // the PrivateCacheMap as their "high water mark".
+ //
+
+ if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
+
+ //
+ // If the next boundary is greater than or equal to the high-water mark,
+ // then read ahead.
+ //
+
+ if (FileOffset2.QuadPart >= PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
+
+ //
+ // On the first read if we are using a large read ahead granularity,
+ // and the read did not get it all, we will just get the rest of the
+ // first data we want.
+ //
+
+ if ((FileOffset->QuadPart == 0)
+
+ &&
+
+ (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
+
+ &&
+
+ ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
+
+ FileOffset1.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
+ PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize - FileOffset1.LowPart;
+ FileOffset2.QuadPart = (LONGLONG)ReadAheadSize;
+
+ //
+ // Calculate the next read ahead boundary.
+ //
+
+ } else {
+
+ FileOffset1.QuadPart = PrivateCacheMap->ReadAheadOffset[1].QuadPart +
+ (LONGLONG)ReadAheadSize;
+
+ //
+ // If the end of the current read is actually beyond where we would
+ // normally do our read ahead, then we have fallen behind, and we must
+ // advance to that spot.
+ //
+
+ if (FileOffset2.QuadPart > FileOffset1.QuadPart) {
+ FileOffset1 = FileOffset2;
+ }
+ PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize;
+ FileOffset2.QuadPart = FileOffset1.QuadPart + (LONGLONG)ReadAheadSize;
+ }
+
+ //
+ // Now issue the next two read aheads.
+ //
+
+ PrivateCacheMap->ReadAheadOffset[0] = FileOffset1;
+
+ PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
+ PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
+
+ Changed = TRUE;
+ }
+
+ //
+ // Read Ahead Case 1.
+ //
+ // If this is the third of three sequential reads, then we will see if
+ // we can read ahead. Note that if the first read to a file is to
+ // offset 0, it passes this test.
+ //
+
+ } else if ((NewOffset.HighPart == PrivateCacheMap->BeyondLastByte2.HighPart)
+
+ &&
+
+ ((NewOffset.LowPart & ~NOISE_BITS)
+ == (PrivateCacheMap->BeyondLastByte2.LowPart & ~NOISE_BITS))
+
+ &&
+
+ (PrivateCacheMap->FileOffset2.HighPart
+ == PrivateCacheMap->BeyondLastByte1.HighPart)
+
+ &&
+
+ ((PrivateCacheMap->FileOffset2.LowPart & ~NOISE_BITS)
+ == (PrivateCacheMap->BeyondLastByte1.LowPart & ~NOISE_BITS))) {
+
+ //
+ // On the first read if we are using a large read ahead granularity,
+ // and the read did not get it all, we will just get the rest of the
+ // first data we want.
+ //
+
+ if ((FileOffset->QuadPart == 0)
+
+ &&
+
+ (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1))
+
+ &&
+
+ ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) {
+
+ FileOffset2.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) );
+ }
+
+ //
+ // Round read offset to next read ahead boundary.
+ //
+
+ else {
+ FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize;
+
+ FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask;
+ }
+
+ //
+ // Set read ahead length to be the same as for the most recent read,
+ // up to our max.
+ //
+
+ if (FileOffset2.QuadPart != PrivateCacheMap->ReadAheadOffset[1].QuadPart) {
+
+ ASSERT( FileOffset2.HighPart >= 0 );
+
+ Changed = TRUE;
+ PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
+ PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize;
+ }
+ }
+
+ //
+ // Read Ahead Case 2.
+ //
+ // If this is the third read following a particular stride, then we
+ // will see if we can read ahead. One example of an application that
+ // might do this is a spreadsheet. Note that this code even works
+ // for negative strides.
+ //
+
+ else if ( ( NewOffset.QuadPart -
+ PrivateCacheMap->FileOffset2.QuadPart ) ==
+ ( PrivateCacheMap->FileOffset2.QuadPart -
+ PrivateCacheMap->FileOffset1.QuadPart )) {
+
+ //
+ // According to the current stride, the next offset will be:
+ //
+ // NewOffset + (NewOffset - FileOffset2)
+ //
+ // which is the same as:
+ //
+ // (NewOffset * 2) - FileOffset2
+ //
+
+ FileOffset2.QuadPart = ( NewOffset.QuadPart << 1 ) - PrivateCacheMap->FileOffset2.QuadPart;
+
+ //
+ // If our stride is going backwards through the file, we
+ // have to detect the case where the next step would wrap.
+ //
+
+ if (FileOffset2.HighPart >= 0) {
+
+ //
+ // The read ahead length must be extended by the same amount that
+ // we will round the PrivateCacheMap->ReadAheadOffset down.
+ //
+
+ Length += FileOffset2.LowPart & (PAGE_SIZE - 1);
+
+ //
+ // Now round the PrivateCacheMap->ReadAheadOffset down.
+ //
+
+ FileOffset2.LowPart &= ~(PAGE_SIZE - 1);
+ PrivateCacheMap->ReadAheadOffset[1] = FileOffset2;
+
+ //
+ // Round to page boundary.
+ //
+
+ PrivateCacheMap->ReadAheadLength[1] = ROUND_TO_PAGES(Length);
+ Changed = TRUE;
+ }
+ }
+
+ //
+ // Get out if the ReadAhead requirements did not change.
+ //
+
+ if (!Changed || PrivateCacheMap->ReadAheadActive) {
+
+ DebugTrace( 0, me, "Read ahead already in progress or no change\n", 0 );
+
+ ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
+ return;
+ }
+
+ //
+ // Otherwise, we will proceed and try to schedule the read ahead
+ // ourselves.
+ //
+
+ PrivateCacheMap->ReadAheadActive = TRUE;
+
+ //
+ // Release spin lock on way out
+ //
+
+ ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
+
+ //
+ // Queue the read ahead request to the Lazy Writer's work queue.
+ //
+
+ DebugTrace( 0, me, "Queueing read ahead to worker thread\n", 0 );
+
+ WorkQueueEntry = CcAllocateWorkQueueEntry();
+
+ //
+ // If we failed to allocate a work queue entry, then, we will
+ // quietly bag it. Read ahead is only an optimization, and
+ // no one ever requires that it occur.
+ //
+
+ if (WorkQueueEntry != NULL) {
+
+ //
+ // We must reference this file object so that it cannot go away
+ // until we finish Read Ahead processing in the Worker Thread.
+ //
+
+ ObReferenceObject ( FileObject );
+
+ //
+ // Increment open count to make sure the SharedCacheMap stays around.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SharedCacheMap->OpenCount += 1;
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ WorkQueueEntry->Function = (UCHAR)ReadAhead;
+ WorkQueueEntry->Parameters.Read.FileObject = FileObject;
+
+ CcPostWorkQueue( WorkQueueEntry, &CcExpressWorkQueue );
+ }
+
+ //
+ // If we failed to allocate a Work Queue Entry, or all of the pages
+ // are resident we must set the active flag false.
+ //
+
+ else {
+
+ ExAcquireFastLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql );
+ PrivateCacheMap->ReadAheadActive = FALSE;
+ ExReleaseFastLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcScheduleReadAhead -> VOID\n", 0 );
+
+ return;
+}
+
+
+VOID
+FASTCALL
+CcPerformReadAhead (
+ IN PFILE_OBJECT FileObject
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called by the Lazy Writer to perform read ahead which
+ has been scheduled for this file by CcScheduleReadAhead.
+
+Arguments:
+
+ FileObject - supplies pointer to FileObject on which readahead should be
+ considered.
+
+Return Value:
+
+ None
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ ULONG i;
+ LARGE_INTEGER ReadAheadOffset[2];
+ ULONG ReadAheadLength[2];
+ PCACHE_MANAGER_CALLBACKS Callbacks;
+ PVOID Context;
+ ULONG SavedState;
+ BOOLEAN Done;
+ BOOLEAN HitEof = FALSE;
+ BOOLEAN ReadAheadPerformed = FALSE;
+ BOOLEAN FaultOccurred = FALSE;
+ PETHREAD Thread = PsGetCurrentThread();
+ PVACB Vacb = NULL;
+
+ DebugTrace(+1, me, "CcPerformReadAhead:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+ try {
+
+ //
+ // Since we have the open count biased, we can safely access the
+ // SharedCacheMap.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ Callbacks = SharedCacheMap->Callbacks;
+ Context = SharedCacheMap->LazyWriteContext;
+
+ //
+ // After the first time, keep looping as long as there are new
+ // read ahead requirements. (We will skip out below.)
+ //
+
+ while (TRUE) {
+
+ //
+ // Get SharedCacheMap and PrivateCacheMap. If either are now NULL, get
+ // out.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // Now capture the information that we need, so that we can drop the
+ // SharedList Resource. This information is advisory only anyway, and
+ // the caller must guarantee that the FileObject is referenced.
+ //
+
+ if (PrivateCacheMap != NULL) {
+
+ ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
+
+ //
+ // We are done when the lengths are 0
+ //
+
+ Done = ((PrivateCacheMap->ReadAheadLength[0] |
+ PrivateCacheMap->ReadAheadLength[1]) == 0);
+
+ ReadAheadOffset[0] = PrivateCacheMap->ReadAheadOffset[0];
+ ReadAheadOffset[1] = PrivateCacheMap->ReadAheadOffset[1];
+ ReadAheadLength[0] = PrivateCacheMap->ReadAheadLength[0];
+ ReadAheadLength[1] = PrivateCacheMap->ReadAheadLength[1];
+ PrivateCacheMap->ReadAheadLength[0] = 0;
+ PrivateCacheMap->ReadAheadLength[1] = 0;
+
+ ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Acquire the file shared.
+ //
+
+ (*Callbacks->AcquireForReadAhead)( Context, TRUE );
+
+ if ((PrivateCacheMap == NULL) || Done) {
+
+ try_return( NOTHING );
+ }
+
+ //
+ // PERFORM READ AHEAD
+ //
+ //
+ // Now loop until everything is read in. The Read ahead is accomplished
+ // by touching the pages with an appropriate ReadAhead parameter in MM.
+ //
+
+ i = 0;
+
+ do {
+
+ LARGE_INTEGER Offset, SavedOffset;
+ ULONG Length, SavedLength;
+
+ Offset = ReadAheadOffset[i];
+ Length = ReadAheadLength[i];
+ SavedOffset = Offset;
+ SavedLength = Length;
+
+ if ((Length != 0)
+
+ &&
+
+ ( Offset.QuadPart <= SharedCacheMap->FileSize.QuadPart )) {
+
+ ReadAheadPerformed = TRUE;
+
+ //
+ // Keep length within file and MAX_READ_AHEAD
+ //
+
+ if ( ( Offset.QuadPart + (LONGLONG)Length ) >= SharedCacheMap->FileSize.QuadPart ) {
+
+ Length = (ULONG)( SharedCacheMap->FileSize.QuadPart - Offset.QuadPart );
+ HitEof = TRUE;
+
+ }
+ if (Length > MAX_READ_AHEAD) {
+ Length = MAX_READ_AHEAD;
+ }
+
+ //
+ // Now loop to read all of the desired data in. This loop
+ // is more or less like the same loop to read data in
+ // CcCopyRead, except that we do not copy anything, just
+ // unmap as soon as it is in.
+ //
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ PVOID CacheBuffer;
+ ULONG PagesToGo;
+
+ //
+ // Call local routine to Map or Access the file data.
+ // If we cannot map the data because of a Wait condition,
+ // return FALSE.
+ //
+ // Since this routine is intended to be called from
+ // the finally handler from file system read modules,
+ // it is imperative that it not raise any exceptions.
+ // Therefore, if any expected exception is raised, we
+ // will simply get out.
+ //
+
+ CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
+ Offset,
+ &Vacb,
+ &ReceivedLength );
+
+ //
+ // If we got more than we need, make sure to only transfer
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ //
+ // Now loop to touch all of the pages, calling MM to insure
+ // that if we fault, we take in exactly the number of pages
+ // we need.
+ //
+
+ PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer,
+ ReceivedLength );
+
+ CcMissCounter = &CcReadAheadIos;
+
+ while (PagesToGo) {
+
+ MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
+ FaultOccurred = (BOOLEAN)!MmCheckCachedPageState(CacheBuffer, FALSE);
+
+ CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
+ PagesToGo -= 1;
+ }
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // Calculate how much data we have left to go.
+ //
+
+ Length -= ReceivedLength;
+
+ //
+ // Assume we did not get all the data we wanted, and set
+ // Offset to the end of the returned data.
+ //
+
+ Offset.QuadPart = Offset.QuadPart + (LONGLONG)ReceivedLength;
+
+ //
+ // It was only a page, so we can just leave this loop
+ // After freeing the address.
+ //
+
+ CcFreeVirtualAddress( Vacb );
+ Vacb = NULL;
+ }
+ }
+ i += 1;
+ } while (i <= 1);
+
+ //
+ // Release the file
+ //
+
+ (*Callbacks->ReleaseFromReadAhead)( Context );
+ }
+
+ try_exit: NOTHING;
+ }
+ finally {
+
+ MmResetPageFaultReadAhead(Thread, SavedState);
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // If we got an error faulting a single page in, release the Vacb
+ // here. It is important to free any mapping before dropping the
+ // resource to prevent purge problems.
+ //
+
+ if (Vacb != NULL) {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ //
+ // Release the file
+ //
+
+ (*Callbacks->ReleaseFromReadAhead)( Context );
+
+ //
+ // To show we are done, we must make sure the PrivateCacheMap is
+ // still there.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // Show readahead is going inactive.
+ //
+
+ if (PrivateCacheMap != NULL) {
+
+ ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
+ PrivateCacheMap->ReadAheadActive = FALSE;
+
+ //
+ // If he said sequential only and we smashed into Eof, then
+ // let's reset the highwater mark in case he wants to read the
+ // file sequentially again.
+ //
+
+ if (HitEof && FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
+ PrivateCacheMap->ReadAheadOffset[1].LowPart =
+ PrivateCacheMap->ReadAheadOffset[1].HighPart = 0;
+ }
+
+ //
+ // If no faults occurred, turn read ahead off.
+ //
+
+ if (ReadAheadPerformed && !FaultOccurred) {
+ PrivateCacheMap->ReadAheadEnabled = FALSE;
+ }
+
+ ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock );
+ }
+
+ //
+ // Free SharedCacheMap list
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ ObDereferenceObject( FileObject );
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcPerformReadAhead -> VOID\n", 0 );
+
+ return;
+}
+
+
+VOID
+CcSetDirtyInMask (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to set a range of pages dirty in a user data
+ file, by just setting the corresponding bits in the mask bcb.
+
+Arguments:
+
+ SharedCacheMap - SharedCacheMap where the pages are to be set dirty.
+
+ FileOffset - FileOffset of first page to set dirty
+
+ Length - Used in conjunction with FileOffset to determine how many pages
+ to set dirty.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PULONG MaskPtr;
+ ULONG Mask;
+ PMBCB Mbcb;
+ ULONG FirstPage;
+ ULONG LastPage;
+ LARGE_INTEGER BeyondLastByte;
+
+ //
+ // Here is the maximum size file supported by this implementation.
+ //
+
+ ASSERT((FileOffset->HighPart & ~(PAGE_SIZE - 1)) == 0);
+
+ //
+ // Initialize our locals.
+ //
+
+ FirstPage = (ULONG)((FileOffset->LowPart >> PAGE_SHIFT) |
+ (FileOffset->HighPart << (32 - PAGE_SHIFT)));
+ LastPage = FirstPage +
+ ((ULONG)((FileOffset->LowPart & (PAGE_SIZE - 1)) + Length - 1) >> PAGE_SHIFT);
+ BeyondLastByte.LowPart = (LastPage + 1) << PAGE_SHIFT;
+ BeyondLastByte.HighPart = (LONG)(LastPage >> (32 - PAGE_SHIFT));
+
+ //
+ // We have to acquire the shared cache map list, because we
+ // may be changing lists.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // If there is no Mbcb, or it is not big enough, we will have to allocate one.
+ //
+
+ Mbcb = SharedCacheMap->Mbcb;
+ if ((Mbcb == NULL) || (LastPage >= (Mbcb->Bitmap.SizeOfBitMap - 1))) {
+
+ PMBCB NewMbcb;
+ ULONG RoundedBcbSize = ((sizeof(BCB) + 7) & ~7);
+ ULONG SizeInBytes = ((LastPage + 1 + 1 + 7) / 8) + sizeof(MBCB);
+
+ //
+ // If the size needed is not larger than a Bcb, then get one from the
+ // Bcb zone.
+ //
+
+ if (SizeInBytes <= RoundedBcbSize) {
+
+ NewMbcb = (PMBCB)CcAllocateInitializeBcb( NULL, NULL, NULL, NULL );
+
+ if (NewMbcb != NULL) {
+ NewMbcb->Bitmap.SizeOfBitMap = (RoundedBcbSize - sizeof(MBCB)) * 8;
+ } else {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // Otherwise, we will allocate one from the pool. We throw in a fudge
+ // factor of 1 below to account for any bits that may shift off the end,
+ // plus 4 to insure a long word of 0's at the end for scanning, and then
+ // round up to a quad word boundary that we will get anyway.
+ //
+
+ } else {
+
+ ULONG SizeToAllocate = (ULONG)(((SharedCacheMap->SectionSize.LowPart >> (PAGE_SHIFT + 3)) |
+ (SharedCacheMap->SectionSize.HighPart << (32 - (PAGE_SHIFT + 3)))) +
+ sizeof(MBCB) + 1 + 7) & ~7;
+
+ NewMbcb = ExAllocatePool( NonPagedPool, SizeToAllocate );
+
+ if (NewMbcb != NULL) {
+ RtlZeroMemory( NewMbcb, SizeToAllocate );
+ NewMbcb->Bitmap.SizeOfBitMap = (SizeToAllocate - sizeof(MBCB)) * 8;
+ } else {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+ }
+
+ //
+ // Set in the node type, "empty" FirstDirtyPage state, and the address
+ // of the bitmap.
+ //
+
+ NewMbcb->NodeTypeCode = CACHE_NTC_MBCB;
+ NewMbcb->FirstDirtyPage = MAXULONG;
+ NewMbcb->Bitmap.Buffer = (PULONG)(NewMbcb + 1);
+
+ //
+ // If there already was an Mbcb, we need to copy the relevant data from
+ // it and deallocate it.
+ //
+
+ if (Mbcb != NULL) {
+
+ NewMbcb->DirtyPages = Mbcb->DirtyPages;
+ NewMbcb->FirstDirtyPage = Mbcb->FirstDirtyPage;
+ NewMbcb->LastDirtyPage = Mbcb->LastDirtyPage;
+ NewMbcb->ResumeWritePage = Mbcb->ResumeWritePage;
+ RtlCopyMemory( NewMbcb + 1, Mbcb + 1, Mbcb->Bitmap.SizeOfBitMap / 8 );
+
+ CcDeallocateBcb( (PBCB)Mbcb );
+ }
+
+ //
+ // Finally, set to use our new Mbcb.
+ //
+
+ SharedCacheMap->Mbcb = Mbcb = NewMbcb;
+ }
+
+ //
+ // If this is the first dirty page for this cache map, there is some work
+ // to do.
+ //
+
+ if (SharedCacheMap->DirtyPages == 0) {
+
+ //
+ // If the lazy write scan is not active, then start it.
+ //
+
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ Mbcb->ResumeWritePage = FirstPage;
+ }
+
+ //
+ // Now update the first and last dirty page indices and the bitmap.
+ //
+
+ if (FirstPage < Mbcb->FirstDirtyPage) {
+ Mbcb->FirstDirtyPage = FirstPage;
+ }
+
+ if (LastPage > Mbcb->LastDirtyPage) {
+ Mbcb->LastDirtyPage = LastPage;
+ }
+
+ MaskPtr = &Mbcb->Bitmap.Buffer[FirstPage / 32];
+ Mask = 1 << (FirstPage % 32);
+
+ //
+ // Loop to set all of the bits and adjust the DirtyPage totals.
+ //
+
+ for ( ; FirstPage <= LastPage; FirstPage++) {
+
+ if ((*MaskPtr & Mask) == 0) {
+
+ CcTotalDirtyPages += 1;
+ SharedCacheMap->DirtyPages += 1;
+ Mbcb->DirtyPages += 1;
+ *MaskPtr |= Mask;
+ }
+
+ Mask <<= 1;
+
+ if (Mask == 0) {
+
+ MaskPtr += 1;
+ Mask = 1;
+ }
+ }
+
+ //
+ // See if we need to advance our goal for ValidDataLength.
+ //
+
+ BeyondLastByte.QuadPart = FileOffset->QuadPart + (LONGLONG)Length;
+
+ if ( BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) {
+
+ SharedCacheMap->ValidDataGoal = BeyondLastByte;
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+}
+
+
+VOID
+CcSetDirtyPinnedData (
+ IN PVOID BcbVoid,
+ IN PLARGE_INTEGER Lsn OPTIONAL
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to set a Bcb (returned by CcPinFileData)
+ dirty, and a candidate for the Lazy Writer. All Bcbs should be set
+ dirty by calling this routine, even if they are to be flushed
+ another way.
+
+Arguments:
+
+ Bcb - Supplies a pointer to a pinned (by CcPinFileData) Bcb, to
+ be set dirty.
+
+ Lsn - Lsn to be remembered with page.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PBCB Bcbs[2];
+ PBCB *BcbPtrPtr;
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+
+ DebugTrace(+1, me, "CcSetDirtyPinnedData: Bcb = %08lx\n", BcbVoid );
+
+ //
+ // Assume this is a normal Bcb, and set up for loop below.
+ //
+
+ Bcbs[0] = (PBCB)BcbVoid;
+ Bcbs[1] = NULL;
+ BcbPtrPtr = &Bcbs[0];
+
+ //
+ // If it is an overlap Bcb, then point into the Bcb vector
+ // for the loop.
+ //
+
+ if (Bcbs[0]->NodeTypeCode == CACHE_NTC_OBCB) {
+ BcbPtrPtr = &((POBCB)Bcbs[0])->Bcbs[0];
+ }
+
+ //
+ // Loop to set all Bcbs dirty
+ //
+
+ while (*BcbPtrPtr != NULL) {
+
+ Bcbs[0] = *(BcbPtrPtr++);
+
+ //
+ // Should be no ReadOnly Bcbs
+ //
+
+ ASSERT(((ULONG)Bcbs[0] & 1) != 1);
+
+ SharedCacheMap = Bcbs[0]->SharedCacheMap;
+
+ //
+ // We have to acquire the shared cache map list, because we
+ // may be changing lists.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if (!Bcbs[0]->Dirty) {
+
+ ULONG Pages = Bcbs[0]->ByteLength >> PAGE_SHIFT;
+
+ //
+ // Set dirty to keep the Bcb from going away until
+ // it is set Undirty, and assign the next modification time stamp.
+ //
+
+ Bcbs[0]->Dirty = TRUE;
+
+ //
+ // Initialize the OldestLsn field.
+ //
+
+ if (ARGUMENT_PRESENT(Lsn)) {
+ Bcbs[0]->OldestLsn = *Lsn;
+ Bcbs[0]->NewestLsn = *Lsn;
+ }
+
+ //
+ // Move it to the dirty list if these are the first dirty pages,
+ // and this is not disabled for write behind.
+ //
+ // Increase the count of dirty bytes in the shared cache map.
+ //
+
+ if ((SharedCacheMap->DirtyPages == 0) &&
+ !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
+
+ //
+ // If the lazy write scan is not active, then start it.
+ //
+
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+ }
+
+ SharedCacheMap->DirtyPages += Pages;
+ CcTotalDirtyPages += Pages;
+ }
+
+ //
+ // If this Lsn happens to be older/newer than the ones we have stored, then
+ // change it.
+ //
+
+ if (ARGUMENT_PRESENT(Lsn)) {
+
+ if ((Bcbs[0]->OldestLsn.QuadPart == 0) || (Lsn->QuadPart < Bcbs[0]->OldestLsn.QuadPart)) {
+ Bcbs[0]->OldestLsn = *Lsn;
+ }
+
+ if (Lsn->QuadPart > Bcbs[0]->NewestLsn.QuadPart) {
+ Bcbs[0]->NewestLsn = *Lsn;
+ }
+ }
+
+ //
+ // See if we need to advance our goal for ValidDataLength.
+ //
+
+ if ( Bcbs[0]->BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) {
+
+ SharedCacheMap->ValidDataGoal = Bcbs[0]->BeyondLastByte;
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcSetDirtyPinnedData -> VOID\n", 0 );
+}
+
+
+NTSTATUS
+CcSetValidData(
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER ValidDataLength
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is used to call the File System to update ValidDataLength
+ for a file.
+
+Arguments:
+
+ FileObject - A pointer to a referenced file object describing which file
+ the read should be performed from.
+
+ ValidDataLength - Pointer to new ValidDataLength.
+
+Return Value:
+
+ Status of operation.
+
+--*/
+
+{
+ PIO_STACK_LOCATION IrpSp;
+ PDEVICE_OBJECT DeviceObject;
+ NTSTATUS Status;
+ FILE_END_OF_FILE_INFORMATION Buffer;
+ IO_STATUS_BLOCK IoStatus;
+ KEVENT Event;
+ PIRP Irp;
+
+ DebugTrace(+1, me, "CcSetValidData:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace2(0, me, " ValidDataLength = %08lx, %08lx\n",
+ ValidDataLength->LowPart, ValidDataLength->HighPart );
+
+ //
+ // Copy ValidDataLength to our buffer.
+ //
+
+ Buffer.EndOfFile = *ValidDataLength;
+
+ //
+ // Initialize the event.
+ //
+
+ KeInitializeEvent( &Event, NotificationEvent, FALSE );
+
+ //
+ // Begin by getting a pointer to the device object that the file resides
+ // on.
+ //
+
+ DeviceObject = IoGetRelatedDeviceObject( FileObject );
+
+ //
+ // Allocate an I/O Request Packet (IRP) for this in-page operation.
+ //
+
+ Irp = IoAllocateIrp( DeviceObject->StackSize, FALSE );
+ if (Irp == NULL) {
+
+ DebugTrace(-1, me, "CcSetValidData-> STATUS_INSUFFICIENT_RESOURCES\n", 0 );
+
+ return STATUS_INSUFFICIENT_RESOURCES;
+ }
+
+ //
+ // Get a pointer to the first stack location in the packet. This location
+ // will be used to pass the function codes and parameters to the first
+ // driver.
+ //
+
+ IrpSp = IoGetNextIrpStackLocation( Irp );
+
+ //
+ // Fill in the IRP according to this request, setting the flags to
+ // just cause IO to set the event and deallocate the Irp.
+ //
+
+ Irp->Flags = IRP_PAGING_IO | IRP_SYNCHRONOUS_PAGING_IO;
+ Irp->RequestorMode = KernelMode;
+ Irp->UserIosb = &IoStatus;
+ Irp->UserEvent = &Event;
+ Irp->Tail.Overlay.OriginalFileObject = FileObject;
+ Irp->Tail.Overlay.Thread = PsGetCurrentThread();
+ Irp->AssociatedIrp.SystemBuffer = &Buffer;
+
+ //
+ // Fill in the normal read parameters.
+ //
+
+ IrpSp->MajorFunction = IRP_MJ_SET_INFORMATION;
+ IrpSp->FileObject = FileObject;
+ IrpSp->DeviceObject = DeviceObject;
+ IrpSp->Parameters.SetFile.Length = sizeof(FILE_END_OF_FILE_INFORMATION);
+ IrpSp->Parameters.SetFile.FileInformationClass = FileEndOfFileInformation;
+ IrpSp->Parameters.SetFile.FileObject = NULL;
+ IrpSp->Parameters.SetFile.AdvanceOnly = TRUE;
+
+ //
+ // Queue the packet to the appropriate driver based on whether or not there
+ // is a VPB associated with the device. This routine should not raise.
+ //
+
+ Status = IoCallDriver( DeviceObject, Irp );
+
+ //
+ // If pending is returned (which is a successful status),
+ // we must wait for the request to complete.
+ //
+
+ if (Status == STATUS_PENDING) {
+ KeWaitForSingleObject( &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ (PLARGE_INTEGER)NULL);
+ }
+
+ //
+ // If we got an error back in Status, then the Iosb
+ // was not written, so we will just copy the status
+ // there, then test the final status after that.
+ //
+
+ if (!NT_SUCCESS(Status)) {
+ IoStatus.Status = Status;
+ }
+
+ DebugTrace(-1, me, "CcSetValidData-> %08lx\n", IoStatus.Status );
+
+ return IoStatus.Status;
+}
+
+
+//
+// Internal Support Routine
+//
+
+BOOLEAN
+CcAcquireByteRangeForWrite (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER TargetOffset OPTIONAL,
+ IN ULONG TargetLength,
+ OUT PLARGE_INTEGER FileOffset,
+ OUT PULONG Length,
+ OUT PBCB *FirstBcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called by the Lazy Writer to try to find a contiguous
+ range of bytes from the specified SharedCacheMap that are dirty and
+ should be flushed. After flushing, these bytes should be released
+ by calling CcReleaseByteRangeFromWrite.
+
+Arguments:
+
+ SharedCacheMap - for the file for which the dirty byte range is sought
+
+ TargetOffset - If specified, then only the specified range is
+ to be flushed.
+
+ TargetLength - If target offset specified, this completes the range.
+ In any case, this field is zero for the Lazy Writer,
+ and nonzero for explicit flush calls.
+
+ FileOffset - Returns the offset for the beginning of the dirty byte
+ range to flush
+
+ Length - Returns the length of bytes in the range.
+
+ FirstBcb - Returns the first Bcb in the list for the range, to be used
+ when calling CcReleaseByteRangeFromWrite, or NULL if dirty
+ pages were found in the mask Bcb.
+
+Return Value:
+
+ FALSE - if no dirty byte range could be found to match the necessary
+ criteria.
+
+ TRUE - if a dirty byte range is being returned.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PMBCB Mbcb;
+ PBCB Bcb;
+ LARGE_INTEGER LsnToFlushTo = {0, 0};
+
+ DebugTrace(+1, me, "CcAcquireByteRangeForWrite:\n", 0);
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap);
+
+ //
+ // Initially clear outputs.
+ //
+
+ FileOffset->QuadPart = 0;
+ *Length = 0;
+
+ //
+ // We must acquire the CcMasterSpinLock.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // See if there is a simple Mask Bcb, and if there is anything dirty in
+ // it. If so we will simply handle that case here by processing the bitmap.
+ //
+
+ Mbcb = SharedCacheMap->Mbcb;
+
+ if ((Mbcb != NULL) &&
+ (Mbcb->DirtyPages != 0) &&
+ ((Mbcb->PagesToWrite != 0) || (TargetLength != 0))) {
+
+ PULONG EndPtr;
+ PULONG MaskPtr;
+ ULONG Mask;
+ ULONG FirstDirtyPage;
+ ULONG OriginalFirstDirtyPage;
+
+ //
+ // If a target range was specified (outside call to CcFlush for a range),
+ // then calculate FirstPage and EndPtr based on these inputs.
+ //
+
+ if (ARGUMENT_PRESENT(TargetOffset)) {
+
+ FirstDirtyPage = (ULONG)(TargetOffset->QuadPart >> PAGE_SHIFT);
+ EndPtr = &Mbcb->Bitmap.Buffer[(ULONG)((TargetOffset->QuadPart + TargetLength - 1) >> PAGE_SHIFT) / 32];
+
+ //
+ // We do not grow the bitmap with the file, only as we set dirty
+ // pages, so it is possible that the caller is off the end. If
+ // If even the first page is off the end, we will catch it below.
+ //
+
+ if (EndPtr > &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]) {
+
+ EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32];
+ }
+
+ //
+ // Otherwise, for the Lazy Writer pick up where we left off.
+ //
+
+ } else {
+
+ //
+ // If a length was specified, then it is an explicit flush, and
+ // we want to start with the first dirty page.
+ //
+
+ FirstDirtyPage = Mbcb->FirstDirtyPage;
+
+ //
+ // Otherwise, it is the Lazy Writer, so pick up at the resume
+ // point so long as that is beyond the FirstDirtyPage.
+ //
+
+ if ((TargetLength == 0) && (Mbcb->ResumeWritePage >= FirstDirtyPage)) {
+ FirstDirtyPage = Mbcb->ResumeWritePage;
+ }
+ EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32];
+ }
+
+ //
+ // Form a few other inputs for our dirty page scan.
+ //
+
+ MaskPtr = &Mbcb->Bitmap.Buffer[FirstDirtyPage / 32];
+ Mask = (ULONG)(-1 << (FirstDirtyPage % 32));
+ OriginalFirstDirtyPage = FirstDirtyPage;
+
+ //
+ // Because of the possibility of getting stuck on a "hot spot" which gets
+ // modified over and over, we want to be very careful to resume exactly
+ // at the recorded resume point. If there is nothing there, then we
+ // fall into the loop below to scan for nozero long words in the bitmap,
+ // starting at the next longword.
+ //
+
+ if ((MaskPtr > EndPtr) || (*MaskPtr & Mask) == 0) {
+
+ MaskPtr += 1;
+ Mask = (ULONG)-1;
+ FirstDirtyPage = (FirstDirtyPage + 32) & ~31;
+
+ //
+ // If we go beyond the end, then we must wrap back to the first
+ // dirty page. We will just go back to the start of the first
+ // longword.
+ //
+
+ if (MaskPtr > EndPtr) {
+
+ //
+ // If this is an explicit flush, get out when we hit the end
+ // of the range.
+ //
+
+ if (TargetLength != 0) {
+
+ goto Scan_Bcbs;
+ }
+
+ MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32];
+ FirstDirtyPage = Mbcb->FirstDirtyPage & ~31;
+ OriginalFirstDirtyPage = Mbcb->FirstDirtyPage;
+
+ //
+ // We can also backup the last dirty page hint to our
+ // resume point.
+ //
+
+ ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage);
+
+ Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1;
+ }
+
+ //
+ // To scan the bitmap faster, we scan for entire long words which are
+ // nonzero.
+ //
+
+ while (*MaskPtr == 0) {
+
+ MaskPtr += 1;
+ FirstDirtyPage += 32;
+
+ //
+ // If we go beyond the end, then we must wrap back to the first
+ // dirty page. We will just go back to the start of the first
+ // longword.
+ //
+
+ if (MaskPtr > EndPtr) {
+
+ //
+ // If this is an explicit flush, get out when we hit the end
+ // of the range.
+ //
+
+ if (TargetLength != 0) {
+
+ goto Scan_Bcbs;
+ }
+
+ MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32];
+ FirstDirtyPage = Mbcb->FirstDirtyPage & ~31;
+ OriginalFirstDirtyPage = Mbcb->FirstDirtyPage;
+
+ //
+ // We can also backup the last dirty page hint to our
+ // resume point.
+ //
+
+ ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage);
+
+ Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1;
+ }
+ }
+ }
+
+ //
+ // Calculate the first set bit in the mask that we hit on.
+ //
+
+ Mask = ~Mask + 1;
+
+ //
+ // Now loop to find the first set bit.
+ //
+
+ while ((*MaskPtr & Mask) == 0) {
+
+ Mask <<= 1;
+ FirstDirtyPage += 1;
+ }
+
+ //
+ // If a TargetOffset was specified, then make sure we do not start
+ // beyond the specified range.
+ //
+
+ if (ARGUMENT_PRESENT(TargetOffset) &&
+ (FirstDirtyPage >= ((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT))) {
+
+ goto Scan_Bcbs;
+ }
+
+ //
+ // Now loop to count the set bits at that point, clearing them as we
+ // go because we plan to write the corresponding pages. Stop as soon
+ // as we find a clean page, or we reach our maximum write size. Of
+ // course we want to ignore long word boundaries and keep trying to
+ // extend the write. We do not check for wrapping around the end of
+ // the bitmap here, because we guarantee some zero bits at the end
+ // in CcSetDirtyInMask.
+ //
+
+ while (((*MaskPtr & Mask) != 0) && (*Length < (MAX_WRITE_BEHIND / PAGE_SIZE)) &&
+ (!ARGUMENT_PRESENT(TargetOffset) || ((FirstDirtyPage + *Length) <
+ (ULONG)((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)))) {
+
+ ASSERT(MaskPtr <= (&Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]));
+
+ *MaskPtr -= Mask;
+ *Length += 1;
+ Mask <<= 1;
+
+ if (Mask == 0) {
+
+ MaskPtr += 1;
+ Mask = 1;
+
+ if (MaskPtr > EndPtr) {
+ break;
+ }
+ }
+ }
+
+ //
+ // Now reduce the count of pages we were supposed to write this time,
+ // possibly clearing this count.
+ //
+
+ if (*Length < Mbcb->PagesToWrite) {
+
+ Mbcb->PagesToWrite -= *Length;
+
+ } else {
+
+ Mbcb->PagesToWrite = 0;
+ }
+
+ //
+ // Reduce the dirty page counts by the number of pages we just cleared.
+ //
+
+ ASSERT(Mbcb->DirtyPages >= *Length);
+
+ CcTotalDirtyPages -= *Length;
+ SharedCacheMap->DirtyPages -= *Length;
+ Mbcb->DirtyPages -= *Length;
+
+ //
+ // Normally we need to reduce CcPagesYetToWrite appropriately.
+ //
+
+ if (CcPagesYetToWrite > *Length) {
+ CcPagesYetToWrite -= *Length;
+ } else {
+ CcPagesYetToWrite = 0;
+ }
+
+ //
+ // If we took out the last dirty page, then move the SharedCacheMap
+ // back to the clean list.
+ //
+
+ if (SharedCacheMap->DirtyPages == 0) {
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcCleanSharedCacheMapList,
+ &SharedCacheMap->SharedCacheMapLinks );
+ }
+
+ //
+ // If the number of dirty pages for the Mcb went to zero, we can reset
+ // our hint fields now.
+ //
+
+ if (Mbcb->DirtyPages == 0) {
+
+ Mbcb->FirstDirtyPage = MAXULONG;
+ Mbcb->LastDirtyPage = 0;
+ Mbcb->ResumeWritePage = 0;
+
+ //
+ // Otherwise we have to update the hint fields.
+ //
+
+ } else {
+
+ //
+ // Advance the first dirty page hint if we can.
+ //
+
+ if (Mbcb->FirstDirtyPage == OriginalFirstDirtyPage) {
+
+ Mbcb->FirstDirtyPage = FirstDirtyPage + *Length;
+ }
+
+ //
+ // Set to resume the next scan at the next bit for
+ // the Lazy Writer.
+ //
+
+ if (TargetLength == 0) {
+
+ Mbcb->ResumeWritePage = FirstDirtyPage + *Length;
+ }
+ }
+
+ //
+ // We can save a callback by letting our caller know when
+ // we have no more pages to write.
+ //
+
+ if (IsListEmpty(&SharedCacheMap->BcbList)) {
+ SharedCacheMap->PagesToWrite = Mbcb->PagesToWrite;
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Now form all of our outputs. We calculated *Length as a page count,
+ // but our caller wants it in bytes.
+ //
+
+ *Length <<= PAGE_SHIFT;
+ FileOffset->QuadPart = (LONGLONG)FirstDirtyPage << PAGE_SHIFT;
+ *FirstBcb = NULL;
+
+ DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " <Length = %08lx\n", *Length );
+ DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> TRUE\n", 0 );
+
+ return TRUE;
+ }
+
+ //
+ // We get here if there is no Mbcb or no dirty pages in it. Note that we
+ // wouldn't even be here if there were no dirty pages in this SharedCacheMap.
+ //
+
+ //
+ // Now point to last Bcb in List, and loop until we hit one of the
+ // breaks below or the beginning of the list.
+ //
+
+Scan_Bcbs:
+
+ //
+ // Use while TRUE to handle case where the current target range wraps
+ // (escape is at the bottom).
+ //
+
+ while (TRUE) {
+
+ Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
+
+ //
+ // If this is a large file, and we are to resume from a nonzero FileOffset,
+ // call CcFindBcb to get a quicker start.
+ //
+
+ if ((SharedCacheMap->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) &&
+ !ARGUMENT_PRESENT(TargetOffset) &&
+ (SharedCacheMap->BeyondLastFlush != 0)) {
+
+ LARGE_INTEGER TempQ;
+
+ TempQ.QuadPart = SharedCacheMap->BeyondLastFlush + PAGE_SIZE;
+
+ //
+ // Position ourselves. If we did not find a Bcb for the BeyondLastFlush
+ // page, then a lower FileOffset was returned, so we want to move forward
+ // one.
+ //
+
+ if (!CcFindBcb( SharedCacheMap,
+ (PLARGE_INTEGER)&SharedCacheMap->BeyondLastFlush,
+ &TempQ,
+ &Bcb )) {
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
+ }
+ }
+
+ while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) {
+
+ //
+ // Skip over this item if it is a listhead.
+ //
+
+ if (Bcb->NodeTypeCode != CACHE_NTC_BCB) {
+
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
+ continue;
+ }
+
+ //
+ // If we are doing a specified range, then get out if we hit a
+ // higher Bcb.
+ //
+
+ if (ARGUMENT_PRESENT(TargetOffset) &&
+ ((TargetOffset->QuadPart + TargetLength) <= Bcb->FileOffset.QuadPart)) {
+
+ break;
+ }
+
+ //
+ // If we have not started a run, then see if this Bcb is a candidate
+ // to start one.
+ //
+
+ if (*Length == 0) {
+
+ //
+ // Else see if the Bcb is dirty, and is in our specified range, if
+ // there is one.
+ //
+
+ if (!Bcb->Dirty ||
+ (ARGUMENT_PRESENT(TargetOffset) && (TargetOffset->QuadPart >= Bcb->BeyondLastByte.QuadPart)) ||
+ (!ARGUMENT_PRESENT(TargetOffset) && (Bcb->FileOffset.QuadPart < SharedCacheMap->BeyondLastFlush))) {
+
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
+ continue;
+ }
+ }
+
+ //
+ // Else, if we have started a run, then if this guy cannot be
+ // appended to the run, then break. Note that we ignore the
+ // Bcb's modification time stamp here to simplify the test.
+ //
+ // If the Bcb is currently pinned, then there is no sense in causing
+ // contention, so we will skip over this guy as well.
+ //
+
+ else {
+ if (!Bcb->Dirty || ( Bcb->FileOffset.QuadPart != ( FileOffset->QuadPart + (LONGLONG)*Length))
+ || (*Length + Bcb->ByteLength > MAX_WRITE_BEHIND)
+ || (Bcb->PinCount != 0)) {
+
+ break;
+ }
+ }
+
+ //
+ // Increment PinCount to prevent Bcb from going away once the
+ // SpinLock is released, or we set it clean for the case where
+ // modified write is allowed.
+ //
+
+ Bcb->PinCount += 1;
+
+ //
+ // Release the SpinLock before waiting on the resource.
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
+ !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
+
+ //
+ // Now acquire the Bcb exclusive, so that we know that nobody
+ // has it pinned and thus no one can be modifying the described
+ // buffer. To acquire the first Bcb in a run, we can afford
+ // to wait, because we are not holding any resources. However
+ // if we already have a Bcb, then we better not wait, because
+ // someone could have this Bcb pinned, and then wait for the
+ // Bcb we already have exclusive.
+ //
+ // For streams for which we have not disabled modified page
+ // writing, we do not need to acquire this resource, and the
+ // foreground processing will not be acquiring the Bcb either.
+ //
+
+ if (!ExAcquireResourceExclusive( &Bcb->Resource,
+ (BOOLEAN)(*Length == 0) )) {
+
+ DebugTrace( 0, me, "Could not acquire 2nd Bcb\n", 0 );
+
+ //
+ // Release the Bcb count we took out above. We say
+ // ReadOnly = TRUE since we do not own the resource,
+ // and SetClean = FALSE because we just want to decement
+ // the count.
+ //
+
+ CcUnpinFileData( Bcb, TRUE, UNPIN );
+
+ //
+ // When we leave the loop, we have to have the spin lock
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ break;
+ }
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // If someone has the file open WriteThrough, then the Bcb may no
+ // longer be dirty. If so, call CcUnpinFileData to decrement the
+ // PinCount we incremented and free the resource.
+ //
+
+ if (!Bcb->Dirty) {
+
+ //
+ // Release the spinlock so that we can call CcUnpinFileData
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ CcUnpinFileData( Bcb, FALSE, UNPIN );
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // Now if we already have some data we can just break to return
+ // it, otherwise we have to restart the scan, since our Bcb
+ // may have gone away.
+ //
+
+ if (*Length != 0) {
+ break;
+ }
+ else {
+
+ Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks );
+ continue;
+ }
+ }
+
+ //
+ // If we are not in the disable modified write mode (normal user data)
+ // then we must set the buffer clean before doing the write, since we
+ // are unsynchronized with anyone producing dirty data. That way if we,
+ // for example, are writing data out while it is actively being changed,
+ // at least the changer will mark the buffer dirty afterwards and cause
+ // us to write it again later.
+ //
+
+ } else {
+
+ CcUnpinFileData( Bcb, TRUE, SET_CLEAN );
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ DebugTrace( 0, me, "Adding Bcb = %08lx to run\n", Bcb );
+
+ //
+ // Update all of our return values. Note that FirstBcb refers to the
+ // FirstBcb in terms of how the Bcb list is ordered. Since the Bcb list
+ // is ordered by descending file offsets, FirstBcb will actually return
+ // the Bcb with the highest FileOffset.
+ //
+
+ if (*Length == 0) {
+ *FileOffset = Bcb->FileOffset;
+ }
+ *FirstBcb = Bcb;
+ *Length += Bcb->ByteLength;
+
+ //
+ // If there is a log file flush callback for this stream, then we must
+ // remember the largest Lsn we are about to flush.
+ //
+
+ if ((SharedCacheMap->FlushToLsnRoutine != NULL) &&
+ (Bcb->NewestLsn.QuadPart > LsnToFlushTo.QuadPart)) {
+
+ LsnToFlushTo = Bcb->NewestLsn;
+ }
+
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks );
+ }
+
+ //
+ // If we found something, update our range last flush range and reduce
+ // PagesToWrite.
+ //
+
+ if (*Length != 0) {
+
+ //
+ // If this is the Lazy Writer, then update BeyondLastFlush and
+ // the PagesToWrite target.
+ //
+
+ if (!ARGUMENT_PRESENT(TargetOffset)) {
+
+ SharedCacheMap->BeyondLastFlush = FileOffset->QuadPart + *Length;
+
+ if (SharedCacheMap->PagesToWrite > (*Length >> PAGE_SHIFT)) {
+ SharedCacheMap->PagesToWrite -= (*Length >> PAGE_SHIFT);
+ } else {
+ SharedCacheMap->PagesToWrite = 0;
+ }
+ }
+
+ break;
+
+ //
+ // Else, if we scanned the entire file, get out - nothing to write now.
+ //
+
+ } else if ((SharedCacheMap->BeyondLastFlush == 0) || ARGUMENT_PRESENT(TargetOffset)) {
+ break;
+ }
+
+ //
+ // Otherwise, we may have not found anything because there is nothing
+ // beyond the last flush. In that case it is time to wrap back to 0
+ // and keep scanning.
+ //
+
+ SharedCacheMap->BeyondLastFlush = 0;
+ }
+
+
+
+ //
+ // Now release the spinlock file while we go off and do the I/O
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // If we need to flush to some Lsn, this is the time to do it now
+ // that we have found the largest Lsn and freed the spin lock.
+ //
+
+ if (LsnToFlushTo.QuadPart != 0) {
+
+ try {
+
+ (*SharedCacheMap->FlushToLsnRoutine) ( SharedCacheMap->LogHandle,
+ LsnToFlushTo );
+ } except( CcExceptionFilter( GetExceptionCode() )) {
+
+ //
+ // If there was an error, it will be raised. We cannot
+ // write anything until we successfully flush the log
+ // file, so we will release everything here and just
+ // return with 0 bytes.
+ //
+
+ LARGE_INTEGER LastOffset;
+ PBCB NextBcb;
+
+ //
+ // Now loop to free up all of the Bcbs. Set the time
+ // stamps to 0, so that we are guaranteed to try to
+ // flush them again on the next sweep.
+ //
+
+ do {
+ NextBcb = CONTAINING_RECORD( (*FirstBcb)->BcbLinks.Flink, BCB, BcbLinks );
+
+ //
+ // Skip over any listheads.
+ //
+
+ if ((*FirstBcb)->NodeTypeCode == CACHE_NTC_BCB) {
+
+ LastOffset = (*FirstBcb)->FileOffset;
+
+ CcUnpinFileData( *FirstBcb, FALSE, UNPIN );
+ }
+
+ *FirstBcb = NextBcb;
+ } while (FileOffset->QuadPart != LastOffset.QuadPart);
+
+ //
+ // Show we did not acquire anything.
+ //
+
+ *Length = 0;
+ }
+ }
+
+ //
+ // If we got anything, return TRUE.
+ //
+
+ DebugTrace2(0, me, " <FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " <Length = %08lx\n", *Length );
+ DebugTrace(-1, me, "CcAcquireByteRangeForWrite -> %02lx\n", *Length != 0 );
+
+ return ((BOOLEAN)(*Length != 0));
+}
+
+
+//
+// Internal Support Routine
+//
+
+VOID
+CcReleaseByteRangeFromWrite (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN PBCB FirstBcb,
+ IN BOOLEAN VerifyRequired
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called by the Lazy Writer to free a range of bytes and
+ clear all dirty bits, for a byte range returned by CcAcquireByteRangeForWrite.
+
+Arguments:
+
+ SharedCacheMap - As supplied to CcAcquireByteRangeForWrite
+
+ FileOffset - As returned from CcAcquireByteRangeForWrite
+
+ Length - As returned from CcAcquirebyteRangeForWrite
+
+ FirstBcb - As returned from CcAcquireByteRangeForWrite
+
+ VerifyRequired - supplied as TRUE if a verify required error was received.
+ In this case we must mark/leave the data dirty so that
+ we will try to write it again.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ LARGE_INTEGER LastOffset;
+ PBCB NextBcb;
+
+ DebugTrace(+1, me, "CcReleaseByteRangeFromWrite:\n", 0);
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+
+ //
+ // If it is a mask Mbcb we are getting, then we only have to check
+ // for VerifyRequired.
+ //
+
+ if (FirstBcb == NULL) {
+
+ ASSERT(Length != 0);
+
+ if (VerifyRequired) {
+ CcSetDirtyInMask( SharedCacheMap, FileOffset, Length );
+ }
+
+ DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
+
+ return;
+ }
+
+ //
+ // Now loop to free up all of the Bcbs. If modified writing is disabled
+ // for each Bcb, then we are to set it clean here, since we are synchronized
+ // with callers who set the data dirty. Otherwise we only have the Bcb pinned
+ // so it will not go away, and we only unpin it here.
+ //
+
+ do {
+ NextBcb = CONTAINING_RECORD( FirstBcb->BcbLinks.Flink, BCB, BcbLinks );
+
+ //
+ // Skip over any listheads.
+ //
+
+ if (FirstBcb->NodeTypeCode == CACHE_NTC_BCB) {
+
+ LastOffset = FirstBcb->FileOffset;
+
+ //
+ // If this is file system metadata (we disabled modified writing),
+ // then this is the time to mark the buffer clean, so long as we
+ // did not get verify required.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+
+ CcUnpinFileData( FirstBcb,
+ BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND),
+ SET_CLEAN );
+ }
+
+ //
+ // If we got verify required, we have to mark the buffer dirty again
+ // so we will try again later. Note we have to make this call again
+ // to make sure the right thing happens with time stamps.
+ //
+
+ if (VerifyRequired) {
+ CcSetDirtyPinnedData( FirstBcb, NULL );
+ }
+
+ //
+ // Finally remove a pin count left over from CcAcquireByteRangeForWrite.
+ //
+
+ CcUnpinFileData( FirstBcb, TRUE, UNPIN );
+ }
+
+ FirstBcb = NextBcb;
+ } while (FileOffset->QuadPart != LastOffset.QuadPart);
+
+ DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0);
+}
+
+
+//
+// Internal Support Routine
+//
+
+NTSTATUS
+FASTCALL
+CcWriteBehind (
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called with Wait = FALSE to see if write behind
+ is required, or with Wait = TRUE to perform write behind as required.
+
+ The code is very similar to the the code that the Lazy Writer performs
+ for each SharedCacheMap. The main difference is in the call to
+ CcAcquireByteRangeForWrite. Write Behind does not care about time
+ stamps (passing ULONG to accept all time stamps), but it will never
+ dump the first (highest byte offset) buffer in the list if the last
+ byte of that buffer is not yet written. The Lazy Writer does exactly
+ the opposite, in the sense that it is totally time-driven, and will
+ even dump a partially modified buffer if it sits around long enough.
+
+Arguments:
+
+ SharedCacheMap - Pointer to SharedCacheMap to be written
+
+Return Value:
+
+ FALSE - if write behind is required, but the caller supplied
+ Wait = FALSE
+
+ TRUE - if write behind is complete or not required
+
+--*/
+
+{
+ IO_STATUS_BLOCK IoStatus;
+ KIRQL OldIrql;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PMBCB Mbcb;
+ NTSTATUS Status;
+ ULONG FileExclusive = FALSE;
+ PVACB ActiveVacb = NULL;
+
+ DebugTrace(+1, me, "CcWriteBehind\n", 0 );
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
+
+ //
+ // First we have to acquire the file for LazyWrite, to avoid
+ // deadlocking with writers to the file. We do this via the
+ // CallBack procedure specified to CcInitializeCacheMap.
+ //
+
+ (*SharedCacheMap->Callbacks->AcquireForLazyWrite)
+ ( SharedCacheMap->LazyWriteContext, TRUE );
+
+ //
+ // See if there is a previous active page to clean up, but only
+ // do so now if it is the last dirty page or no users have the
+ // file open. We will free it below after dropping the spinlock.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ if ((SharedCacheMap->DirtyPages <= 1) || (SharedCacheMap->OpenCount == 0)) {
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Increment open count so that our caller's views stay available
+ // for CcGetVacbMiss. We could be tying up all of the views, and
+ // still need to write file sizes.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+
+ //
+ // If there is a mask bcb, then we need to establish a target for
+ // it to flush.
+ //
+
+ if ((Mbcb = SharedCacheMap->Mbcb) != 0) {
+
+ //
+ // Set a target of pages to write, assuming that any Active
+ // Vacb will increase the number.
+ //
+
+ Mbcb->PagesToWrite = Mbcb->DirtyPages + ((ActiveVacb != NULL) ? 1 : 0);
+
+ if (Mbcb->PagesToWrite > CcPagesYetToWrite) {
+
+ Mbcb->PagesToWrite = CcPagesYetToWrite;
+ }
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Now free the active Vacb, if we found one.
+ //
+
+ if (ActiveVacb != NULL) {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Now perform the lazy writing for this file via a special call
+ // to CcFlushCache. He recognizes us by the &CcNoDelay input to
+ // FileOffset, which signifies a Lazy Write, but is subsequently
+ // ignored.
+ //
+
+ CcFlushCache( SharedCacheMap->FileObject->SectionObjectPointer,
+ &CcNoDelay,
+ 1,
+ &IoStatus );
+
+ //
+ // No need for the Lazy Write resource now.
+ //
+
+ (*SharedCacheMap->Callbacks->ReleaseFromLazyWrite)
+ ( SharedCacheMap->LazyWriteContext );
+
+ //
+ // Check if we need to put up a popup.
+ //
+
+ if (!NT_SUCCESS(IoStatus.Status) && !RetryError(IoStatus.Status)) {
+
+ //
+ // We lost writebehind data. Try to get the filename. If we can't,
+ // then just raise the error returned by the failing write
+ //
+
+ POBJECT_NAME_INFORMATION FileNameInfo;
+ NTSTATUS QueryStatus;
+ ULONG whocares;
+
+ FileNameInfo = ExAllocatePool(PagedPool,1024);
+
+ if ( FileNameInfo ) {
+ QueryStatus = ObQueryNameString( SharedCacheMap->FileObject,
+ FileNameInfo,
+ 1024,
+ &whocares );
+
+ if ( !NT_SUCCESS(QueryStatus) ) {
+ ExFreePool(FileNameInfo);
+ FileNameInfo = NULL;
+ }
+ }
+
+ if ( FileNameInfo ) {
+ IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&FileNameInfo->Name, NULL );
+ ExFreePool(FileNameInfo);
+ } else {
+ if ( SharedCacheMap->FileObject->FileName.Length &&
+ SharedCacheMap->FileObject->FileName.MaximumLength &&
+ SharedCacheMap->FileObject->FileName.Buffer ) {
+
+ IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&SharedCacheMap->FileObject->FileName, NULL );
+ }
+ }
+
+ //
+ // See if there is any deferred writes we can post.
+ //
+
+ } else if (!IsListEmpty(&CcDeferredWrites)) {
+ CcPostDeferredWrites();
+ }
+
+ //
+ // Now acquire CcMasterSpinLock again to
+ // see if we need to call CcUninitialize before returning.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // If the the current ValidDataGoal is greater (or equal) than ValidDataLength,
+ // then we must see if we have advanced beyond the current ValidDataLength.
+ //
+ // If we have NEVER written anything out from this shared cache map, then
+ // there is no need to check anything associtated with valid data length
+ // here. We will come by here again when, and if, anybody actually
+ // modifies the file and we lazy write some data.
+ //
+
+ Status = STATUS_SUCCESS;
+ if (FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED) &&
+ (SharedCacheMap->ValidDataGoal.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart) &&
+ (SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG) &&
+ (SharedCacheMap->FileSize.QuadPart != 0)) {
+
+ LARGE_INTEGER NewValidDataLength = {0,0};
+
+ //
+ // If the Bcb List is completely empty, then we must have written
+ // everything, and then new ValidDataLength is equal to ValidDataGoal.
+ //
+
+ if (SharedCacheMap->DirtyPages == 0) {
+
+ NewValidDataLength = SharedCacheMap->ValidDataGoal;
+ }
+
+ //
+ // Else we will look at the last Bcb in the descending-order Bcb
+ // list, and see if it describes data beyond ValidDataGoal.
+ //
+ // (This test is logically too conservative. For example, the last Bcb
+ // may not even be dirty (in which case we should look at its
+ // predecessor), or we may have earlier written valid data to this
+ // byte range (which also means if we knew this we could look at
+ // the predessor). This simply means that the Lazy Writer may not
+ // successfully get ValidDataLength updated in a file being randomly
+ // accessed until the level of file access dies down, or at the latest
+ // until the file is closed. However, security will never be
+ // compromised.)
+ //
+
+ else {
+
+ PBCB LastBcb;
+ PMBCB Mbcb = SharedCacheMap->Mbcb;
+
+ if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0)) {
+
+ NewValidDataLength.QuadPart = (LONGLONG)Mbcb->FirstDirtyPage << PAGE_SHIFT;
+ }
+
+ LastBcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink,
+ BCB,
+ BcbLinks );
+
+ while (&LastBcb->BcbLinks != &SharedCacheMap->BcbList) {
+
+ if ((LastBcb->NodeTypeCode == CACHE_NTC_BCB) && LastBcb->Dirty) {
+ break;
+ }
+
+ LastBcb = CONTAINING_RECORD( LastBcb->BcbLinks.Flink,
+ BCB,
+ BcbLinks );
+ }
+
+ //
+ // Check the Base of the last entry.
+ //
+
+ if ((&LastBcb->BcbLinks != &SharedCacheMap->BcbList) &&
+ (LastBcb->FileOffset.QuadPart < NewValidDataLength.QuadPart )) {
+
+ NewValidDataLength = LastBcb->FileOffset;
+ }
+ }
+
+ //
+ // If New ValidDataLength has been written, then we have to
+ // call the file system back to update it. We must temporarily
+ // drop our global list while we do this, which is safe to do since
+ // we have not cleared WRITE_QUEUED.
+ //
+ // Note we keep calling any time we wrote the last page of the file,
+ // to solve the "famous" AFS Server problem. The file system will
+ // truncate our valid data call to whatever is currently valid. But
+ // then if he writes a little more, we do not want to stop calling
+ // back.
+ //
+
+ if ( NewValidDataLength.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart ) {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Call file system to set new valid data. We have no
+ // one to tell if this doesn't work.
+ //
+
+ Status = CcSetValidData( SharedCacheMap->FileObject,
+ &NewValidDataLength );
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ if (NT_SUCCESS(Status)) {
+ SharedCacheMap->ValidDataLength = NewValidDataLength;
+#ifdef TOMM
+ } else if ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)) {
+ DbgPrint("Unexpected status from CcSetValidData: %08lx, FileObject: %08lx\n",
+ Status,
+ SharedCacheMap->FileObject);
+ DbgBreakPoint();
+#endif TOMM
+ }
+ }
+ }
+
+ //
+ // Show we are done.
+ //
+
+ SharedCacheMap->OpenCount -= 1;
+
+ //
+ // Make an approximate guess about whether we will call CcDeleteSharedCacheMap or not
+ // to truncate the file. If we fail to acquire here, then we will not delete below,
+ // and just catch it on a subsequent pass.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED) && (SharedCacheMap->OpenCount == 0)) {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ FsRtlAcquireFileExclusive( SharedCacheMap->FileObject );
+ FileExclusive = TRUE;
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ //
+ // Otherwise see if we are to delete this SharedCacheMap. Note
+ // we go ahead and release the Resource first, because with
+ // OpenCount == 0 and an empty Bcb list, no one will be trying
+ // to access this SharedCacheMap but us. Also, by releasing first
+ // we avoid a deadlock with the file system when the FileObject is
+ // dereferenced. Note that CcDeleteSharedCacheMap requires that
+ // the CcMasterSpinLock already be acquired, and it
+ // releases it. We have to clear the indirect pointer in this
+ // case, because no one else will do it.
+ //
+ // Also do not delete the SharedCacheMap if we got an error on
+ // the ValidDataLength callback. If we get a resource allocation
+ // failure or a retryable error (due to log file full?), we have
+ // no one to tell, so we must just loop back and try again. Of
+ // course all I/O errors are just too bad.
+ //
+
+ if ((SharedCacheMap->OpenCount == 0)
+
+ &&
+
+ ((SharedCacheMap->DirtyPages == 0) || ((SharedCacheMap->FileSize.QuadPart == 0) &&
+ !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)))
+
+ &&
+
+ (FileExclusive || !FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED))
+
+ &&
+
+ (NT_SUCCESS(Status) || ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)))) {
+
+ CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FileExclusive );
+ }
+
+ //
+ // In the normal case, we just release the resource on the way out.
+ //
+
+ else {
+
+ //
+ // Now release the file if we have it.
+ //
+
+ if (FileExclusive) {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ FsRtlReleaseFile( SharedCacheMap->FileObject );
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcWriteBehind->VOID\n", 0 );
+
+ return IoStatus.Status;
+}
+
+
+VOID
+CcFlushCache (
+ IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
+ IN PLARGE_INTEGER FileOffset OPTIONAL,
+ IN ULONG Length,
+ OUT PIO_STATUS_BLOCK IoStatus OPTIONAL
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to flush dirty data from the cache to the
+ cached file on disk. Any byte range within the file may be flushed,
+ or the entire file may be flushed by omitting the FileOffset parameter.
+
+ This routine does not take a Wait parameter; the caller should assume
+ that it will always block.
+
+Arguments:
+
+ SectionObjectPointer - A pointer to the Section Object Pointers
+ structure in the nonpaged Fcb.
+
+
+ FileOffset - If this parameter is supplied (not NULL), then only the
+ byte range specified by FileOffset and Length are flushed.
+ If &CcNoDelay is specified, then this signifies the call
+ from the Lazy Writer, and the lazy write scan should resume
+ as normal from the last spot where it left off in the file.
+
+ Length - Defines the length of the byte range to flush, starting at
+ FileOffset. This parameter is ignored if FileOffset is
+ specified as NULL.
+
+ IoStatus - The I/O status resulting from the flush operation.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ LARGE_INTEGER NextFileOffset, TargetOffset;
+ ULONG NextLength;
+ PBCB FirstBcb;
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ IO_STATUS_BLOCK TrashStatus;
+ PVOID TempVa;
+ ULONG RemainingLength, TempLength;
+ NTSTATUS PopupStatus;
+ BOOLEAN HotSpot;
+ ULONG BytesWritten = 0;
+ BOOLEAN PopupRequired = FALSE;
+ BOOLEAN VerifyRequired = FALSE;
+ BOOLEAN IsLazyWriter = FALSE;
+ BOOLEAN FreeActiveVacb = FALSE;
+ PVACB ActiveVacb = NULL;
+ NTSTATUS Status = STATUS_SUCCESS;
+
+ DebugTrace(+1, me, "CcFlushCache:\n", 0 );
+ DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
+ : 0,
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
+ : 0 );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ //
+ // If IoStatus passed a Null pointer, set up to through status away.
+ //
+
+ if (!ARGUMENT_PRESENT(IoStatus)) {
+ IoStatus = &TrashStatus;
+ }
+ IoStatus->Status = STATUS_SUCCESS;
+ IoStatus->Information = 0;
+
+ //
+ // See if this is the Lazy Writer. Since he wants to use this common
+ // routine, which is also a public routine callable by file systems,
+ // the Lazy Writer shows his call by specifying CcNoDelay as the file offset!
+ //
+ // Also, in case we do not write anything because we see only HotSpot(s),
+ // initialize the Status to indicate a retryable error, so CcWorkerThread
+ // knows we did not make any progress. Of course any actual flush will
+ // overwrite this code.
+ //
+
+ if (FileOffset == &CcNoDelay) {
+ IoStatus->Status = STATUS_VERIFY_REQUIRED;
+ IsLazyWriter = TRUE;
+ FileOffset = NULL;
+ }
+
+ //
+ // If there is nothing to do, return here.
+ //
+
+ if (ARGUMENT_PRESENT(FileOffset) && (Length == 0)) {
+
+ DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
+ return;
+ }
+
+ //
+ // See if the file is cached.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap = SectionObjectPointer->SharedCacheMap;
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // Increment the open count to keep it from going away.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+
+ if ((SharedCacheMap->NeedToZero != NULL) || (SharedCacheMap->ActiveVacb != NULL)) {
+
+ ULONG FirstPage = 0;
+ ULONG LastPage = MAXULONG;
+
+ if (ARGUMENT_PRESENT(FileOffset)) {
+
+ FirstPage = (ULONG)(FileOffset->QuadPart >> PAGE_SHIFT);
+ LastPage = (ULONG)((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT);
+ }
+
+ //
+ // Make sure we do not flush the active page without zeroing any
+ // uninitialized data. Also, it is very important to free the active
+ // page if it is the one to be flushed, so that we get the dirty
+ // bit out to the Pfn.
+ //
+
+ if (((((LONGLONG)LastPage + 1) << PAGE_SHIFT) > SharedCacheMap->ValidDataGoal.QuadPart) ||
+
+ ((SharedCacheMap->NeedToZero != NULL) &&
+ (FirstPage <= SharedCacheMap->NeedToZeroPage) &&
+ (LastPage >= SharedCacheMap->NeedToZeroPage)) ||
+
+ ((SharedCacheMap->ActiveVacb != NULL) &&
+ (FirstPage <= SharedCacheMap->ActivePage) &&
+ (LastPage >= SharedCacheMap->ActivePage))) {
+
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
+ FreeActiveVacb = TRUE;
+ }
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ if (FreeActiveVacb) {
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, RemainingLength, TempLength );
+ }
+
+ //
+ // Scan for dirty pages if there is a shared cache map.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // If FileOffset was not specified then set to flush entire region
+ // and set valid data length to the goal so that we will not get
+ // any more call backs.
+ //
+
+ if (!IsLazyWriter && !ARGUMENT_PRESENT(FileOffset)) {
+
+ SharedCacheMap->ValidDataLength = SharedCacheMap->ValidDataGoal;
+ }
+
+ //
+ // If this is an explicit flush, initialize our offset to scan for.
+ //
+
+ if (ARGUMENT_PRESENT(FileOffset)) {
+ TargetOffset = *FileOffset;
+ }
+
+ //
+ // Assume we want to pass the explicit flush flag in Length.
+ // But overwrite it if a length really was specified. On
+ // subsequent loops, NextLength will have some nonzero value.
+ //
+
+ NextLength = 1;
+ if (Length != 0) {
+ NextLength = Length;
+ }
+
+ //
+ // Loop as long as we find buffers to flush for this
+ // SharedCacheMap, and we are not trying to delete the guy.
+ //
+
+ while (((SharedCacheMap->PagesToWrite != 0) || !IsLazyWriter)
+
+ &&
+ ((SharedCacheMap->FileSize.QuadPart != 0) ||
+ FlagOn(SharedCacheMap->Flags, PIN_ACCESS))
+
+ &&
+
+ !VerifyRequired
+
+ &&
+
+ CcAcquireByteRangeForWrite ( SharedCacheMap,
+ IsLazyWriter ? NULL : (ARGUMENT_PRESENT(FileOffset) ?
+ &TargetOffset : NULL),
+ IsLazyWriter ? 0: NextLength,
+ &NextFileOffset,
+ &NextLength,
+ &FirstBcb )) {
+
+ //
+ // Assume this range is not a hot spot.
+ //
+
+ HotSpot = FALSE;
+
+ //
+ // We defer calling Mm to set address range modified until here, to take
+ // overhead out of the main line path, and to reduce the number of TBIS
+ // on a multiprocessor.
+ //
+
+ RemainingLength = NextLength;
+
+ do {
+
+ //
+ // See if the next file offset is mapped. (If not, the dirty bit
+ // was propagated on the unmap.)
+ //
+
+ if ((TempVa = CcGetVirtualAddressIfMapped( SharedCacheMap,
+ NextFileOffset.QuadPart + NextLength - RemainingLength,
+ &ActiveVacb,
+ &TempLength)) != NULL) {
+
+ //
+ // Reduce TempLength to RemainingLength if necessary, and
+ // call MM.
+ //
+
+ if (TempLength > RemainingLength) {
+ TempLength = RemainingLength;
+ }
+
+ //
+ // Clear the Dirty bit (if set) in the PTE and set the
+ // Pfn modified. Assume if the Pte was dirty, that this may
+ // be a hot spot. Do not do hot spots for metadata, and unless
+ // they are within ValidDataLength as reported to the file system
+ // via CcSetValidData.
+ //
+
+ HotSpot = (BOOLEAN)((MmSetAddressRangeModified(TempVa, TempLength) || HotSpot) &&
+ ((NextFileOffset.QuadPart + NextLength) <
+ (SharedCacheMap->ValidDataLength.QuadPart)) &&
+ ((SharedCacheMap->LazyWritePassCount & 0xF) != 0) && IsLazyWriter) &&
+ !FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED);
+
+ CcFreeVirtualAddress( ActiveVacb );
+
+ } else {
+
+ //
+ // Reduce TempLength to RemainingLength if necessary.
+ //
+
+ if (TempLength > RemainingLength) {
+ TempLength = RemainingLength;
+ }
+ }
+
+ //
+ // Reduce RemainingLength by what we processed.
+ //
+
+ RemainingLength -= TempLength;
+
+ //
+ // Loop until done.
+ //
+
+ } while (RemainingLength != 0);
+
+ CcLazyWriteHotSpots += HotSpot;
+
+ //
+ // Now flush, now flush if we do not think it is a hot spot.
+ //
+
+ if (!HotSpot) {
+
+ MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
+ &NextFileOffset,
+ NextLength,
+ IoStatus,
+ !IsLazyWriter );
+
+ if (NT_SUCCESS(IoStatus->Status)) {
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Increment performance counters
+ //
+
+ if (IsLazyWriter) {
+
+ CcLazyWriteIos += 1;
+ CcLazyWritePages += (NextLength + PAGE_SIZE - 1) >> PAGE_SHIFT;
+ }
+
+ } else {
+
+ LARGE_INTEGER Offset = NextFileOffset;
+ ULONG RetryLength = NextLength;
+
+ DebugTrace2( 0, 0, "I/O Error on Cache Flush: %08lx, %08lx\n",
+ IoStatus->Status, IoStatus->Information );
+
+ if (RetryError(IoStatus->Status)) {
+
+ VerifyRequired = TRUE;
+
+ //
+ // Loop to write each page individually, starting with one
+ // more try on the page that got the error, in case that page
+ // or any page beyond it can be successfully written
+ // individually. Note that Offset and RetryLength are
+ // guaranteed to be in integral pages, but the Information
+ // field from the failed request is not.
+ //
+ // We ignore errors now, and give it one last shot, before
+ // setting the pages clean (see below).
+ //
+
+ } else {
+
+ do {
+
+ DebugTrace2( 0, 0, "Trying page at offset %08lx, %08lx\n",
+ Offset.LowPart, Offset.HighPart );
+
+ MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
+ &Offset,
+ PAGE_SIZE,
+ IoStatus,
+ !IsLazyWriter );
+
+ DebugTrace2( 0, 0, "I/O status = %08lx, %08lx\n",
+ IoStatus->Status, IoStatus->Information );
+
+ if (NT_SUCCESS(IoStatus->Status)) {
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED);
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) {
+
+ PopupRequired = TRUE;
+ PopupStatus = IoStatus->Status;
+ }
+
+ VerifyRequired = VerifyRequired || RetryError(IoStatus->Status);
+
+ Offset.QuadPart = Offset.QuadPart + (LONGLONG)PAGE_SIZE;
+ RetryLength -= PAGE_SIZE;
+
+ } while(RetryLength > 0);
+ }
+ }
+ }
+
+ //
+ // Now release the Bcb resources and set them clean. Note we do not check
+ // here for errors, and just returned in the I/O status. Errors on writes
+ // are rare to begin with. Nonetheless, our strategy is to rely on
+ // one or more of the following (depending on the file system) to prevent
+ // errors from getting to us.
+ //
+ // - Retries and/or other forms of error recovery in the disk driver
+ // - Mirroring driver
+ // - Hot fixing in the noncached path of the file system
+ //
+ // In the unexpected case that a write error does get through, we
+ // *currently* just set the Bcbs clean anyway, rather than let
+ // Bcbs and pages accumulate which cannot be written. Note we did
+ // a popup above to at least notify the guy.
+ //
+ // Set the pages dirty again if we either saw a HotSpot or got
+ // verify required.
+ //
+
+ CcReleaseByteRangeFromWrite ( SharedCacheMap,
+ &NextFileOffset,
+ NextLength,
+ FirstBcb,
+ (BOOLEAN)(HotSpot || VerifyRequired) );
+
+ //
+ // See if there is any deferred writes we should post.
+ //
+
+ BytesWritten += NextLength;
+ if ((BytesWritten >= 0x40000) && !IsListEmpty(&CcDeferredWrites)) {
+ CcPostDeferredWrites();
+ BytesWritten = 0;
+ }
+
+ //
+ // Now for explicit flushes, we should advance our range.
+ //
+
+ if (ARGUMENT_PRESENT(FileOffset)) {
+
+ NextFileOffset.QuadPart += NextLength;
+
+ //
+ // Done yet?
+ //
+
+ if ((FileOffset->QuadPart + Length) <= NextFileOffset.QuadPart) {
+ break;
+ }
+
+ //
+ // Calculate new target range
+ //
+
+ NextLength = (ULONG)((FileOffset->QuadPart + Length) - NextFileOffset.QuadPart);
+ TargetOffset = NextFileOffset;
+ }
+ }
+ }
+
+ //
+ // If there is a user-mapped file, then we perform the "service" of
+ // flushing even data not written via the file system. To do this
+ // we simply reissue the original flush, sigh.
+ //
+
+ if ((SharedCacheMap == NULL)
+
+ ||
+
+ FlagOn(((PFSRTL_COMMON_FCB_HEADER)(SharedCacheMap->FileObject->FsContext))->Flags,
+ FSRTL_FLAG_USER_MAPPED_FILE) && !IsLazyWriter) {
+
+ //
+ // Call MM to flush the section through our view.
+ //
+
+ DebugTrace( 0, mm, "MmFlushSection:\n", 0 );
+ DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
+ : 0,
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
+ : 0 );
+ DebugTrace( 0, mm, " RegionSize = %08lx\n", Length );
+
+ try {
+
+ Status = MmFlushSection( SectionObjectPointer,
+ FileOffset,
+ Length,
+ IoStatus,
+ TRUE );
+
+ } except( CcExceptionFilter( IoStatus->Status = GetExceptionCode() )) {
+
+ KdPrint(("CACHE MANAGER: MmFlushSection raised %08lx\n", IoStatus->Status));
+ }
+
+ DebugTrace2(0, mm, " <IoStatus = %08lx, %08lx\n",
+ IoStatus->Status, IoStatus->Information );
+ }
+
+ //
+ // Now we can get rid of the open count, and clean up as required.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // Make sure and return the first error to our caller. In the
+ // case of the Lazy Writer, a popup will be issued.
+ //
+
+ if (PopupRequired) {
+ IoStatus->Status = PopupStatus;
+ }
+
+ //
+ // Let the Lazy writer know if we did anything, so he can
+
+ DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 );
+
+ return;
+}
+
+
+VOID
+CcRepinBcb (
+ IN PVOID Bcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called by a file system to pin a Bcb an additional
+ time in order to reserve it for Write Through or error recovery.
+ Typically the file system would do this the first time that it sets a
+ pinned buffer dirty while processing a WriteThrough request, or any
+ time that it determines that a buffer will be required for WriteThrough.
+
+ The call to this routine must be followed by a call to CcUnpinRepinnedBcb.
+ CcUnpinRepinnedBcb should normally be called during request completion
+ after all other resources have been released. CcUnpinRepinnedBcb
+ synchronously writes the buffer (for WriteThrough requests) and performs
+ the matching unpin for this call.
+
+Arguments:
+
+ Bcb - Supplies a pointer to a previously pinned Bcb
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ KIRQL OldIrql;
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ ((PBCB)Bcb)->PinCount += 1;
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+}
+
+
+VOID
+CcUnpinRepinnedBcb (
+ IN PVOID Bcb,
+ IN BOOLEAN WriteThrough,
+ OUT PIO_STATUS_BLOCK IoStatus
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to Write a previously pinned buffer
+ through to the file. It must have been preceded by a call to
+ CcRepinBcb. As this routine must acquire the Bcb
+ resource exclusive, the caller must be extremely careful to avoid
+ deadlocks. Ideally the caller owns no resources at all when it
+ calls this routine, or else the caller should guarantee that it
+ has nothing else pinned in this same file. (The latter rule is
+ the one used to avoid deadlocks in calls from CcCopyWrite and
+ CcMdlWrite.)
+
+Arguments:
+
+ Bcb - Pointer to a Bcb which was previously specified in a call
+ to CcRepinBcb.
+
+ WriteThrough - TRUE if the Bcb should be written through.
+
+ IoStatus - Returns the I/O status for the operation.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap = ((PBCB)Bcb)->SharedCacheMap;
+
+ DebugTrace(+1, me, "CcUnpinRepinnedBcb\n", 0 );
+ DebugTrace( 0, me, " Bcb = %08lx\n", Bcb );
+ DebugTrace( 0, me, " WriteThrough = %02lx\n", WriteThrough );
+
+ //
+ // Set status to success for non write through case.
+ //
+
+ IoStatus->Status = STATUS_SUCCESS;
+
+ if (WriteThrough) {
+
+ //
+ // Acquire Bcb exclusive to eliminate possible modifiers of the buffer,
+ // since we are about to write its buffer.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+ ExAcquireResourceExclusive( &((PBCB)Bcb)->Resource, TRUE );
+ }
+
+ //
+ // Now, there is a chance that the LazyWriter has already written
+ // it, since the resource was free. We will only write it if it
+ // is still dirty.
+ //
+
+ if (((PBCB)Bcb)->Dirty) {
+
+ //
+ // First we make sure that the dirty bit in the PFN database is set.
+ //
+
+ ASSERT( ((PBCB)Bcb)->BaseAddress != NULL );
+ MmSetAddressRangeModified( ((PBCB)Bcb)->BaseAddress,
+ ((PBCB)Bcb)->ByteLength );
+
+ //
+ // Now release the Bcb resource and set it clean. Note we do not check
+ // here for errors, and just return the I/O status. Errors on writes
+ // are rare to begin with. Nonetheless, our strategy is to rely on
+ // one or more of the following (depending on the file system) to prevent
+ // errors from getting to us.
+ //
+ // - Retries and/or other forms of error recovery in the disk driver
+ // - Mirroring driver
+ // - Hot fixing in the noncached path of the file system
+ //
+ // In the unexpected case that a write error does get through, we
+ // report it to our caller, but go ahead and set the Bcb clean. There
+ // seems to be no point in letting Bcbs (and pages in physical memory)
+ // accumulate which can never go away because we get an unrecoverable I/O
+ // error.
+ //
+
+ //
+ // We specify TRUE here for ReadOnly so that we will keep the
+ // resource during the flush.
+ //
+
+ CcUnpinFileData( (PBCB)Bcb, TRUE, SET_CLEAN );
+
+ //
+ // Write it out.
+ //
+
+ MmFlushSection( ((PBCB)Bcb)->SharedCacheMap->FileObject->SectionObjectPointer,
+ &((PBCB)Bcb)->FileOffset,
+ ((PBCB)Bcb)->ByteLength,
+ IoStatus,
+ TRUE );
+
+ //
+ // If we got verify required, we have to mark the buffer dirty again
+ // so we will try again later.
+ //
+
+ if (RetryError(IoStatus->Status)) {
+ CcSetDirtyPinnedData( (PBCB)Bcb, NULL );
+ }
+
+ //
+ // Now remove the final pin count now that we have set it clean.
+ //
+
+ CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
+
+ //
+ // See if there is any deferred writes we can post.
+ //
+
+ if (!IsListEmpty(&CcDeferredWrites)) {
+ CcPostDeferredWrites();
+ }
+ }
+ else {
+
+ //
+ // Lazy Writer got there first, just free the resource and unpin.
+ //
+
+ CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
+
+ }
+
+ DebugTrace2(0, me, " <IoStatus = %08lx, %08lx\n", IoStatus->Status,
+ IoStatus->Information );
+ }
+
+ //
+ // Non-WriteThrough case
+ //
+
+ else {
+
+ CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
+
+ //
+ // Set status to success for non write through case.
+ //
+
+ IoStatus->Status = STATUS_SUCCESS;
+ }
+
+ DebugTrace(-1, me, "CcUnpinRepinnedBcb -> VOID\n", 0 );
+}
+
+
+//
+// Internal Support Routine
+//
+
+BOOLEAN
+CcFindBcb (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN OUT PLARGE_INTEGER BeyondLastByte,
+ OUT PBCB *Bcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called to find a Bcb describing the specified byte range
+ of a file. It returns TRUE if it could at least find a Bcb which describes
+ the beginning of the specified byte range, or else FALSE if the first
+ part of the byte range is not present. In the latter case, the requested
+ byte range (TrialLength) is truncated if there is currently a Bcb which
+ describes bytes beyond the beginning of the byte range.
+
+ The caller may see if the entire byte range is being returned by examining
+ the Bcb, and the caller (or caller's caller) may then make subsequent
+ calls if the data is not all returned.
+
+ The BcbList SpinLock must be currently acquired.
+
+Arguments:
+
+ SharedCacheMap - Supplies a pointer to the SharedCacheMap for the file
+ in which the byte range is desired.
+
+ FileOffset - Supplies the file offset for the beginning of the desired
+ byte range.
+
+ BeyondLastByte - Supplies the file offset of the ending of the desired
+ byte range + 1. Note that this offset will be truncated
+ on return if the Bcb was not found, but bytes beyond the
+ beginning of the Bcb are contained in another Bcb.
+
+ Bcb - returns a Bcb describing the beginning of the byte range if also
+ returning TRUE, or else the point in the Bcb list to insert after.
+
+Return Value:
+
+ FALSE - if no Bcb describes the beginning of the desired byte range
+
+ TRUE - if a Bcb is being returned describing at least an initial
+ part of the byte range.
+
+--*/
+
+{
+ PLIST_ENTRY BcbList;
+ PBCB Bcbt;
+ BOOLEAN Found = FALSE;
+
+ DebugTrace(+1, me, "CcFindBcb:\n", 0 );
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace2(0, me, " TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
+ TrialLength->HighPart );
+
+ //
+ // We want to terminate scans by testing the NodeTypeCode field from the
+ // BcbLinks, so we want to see the SharedCacheMap signature from the same
+ // offset.
+ //
+
+ ASSERT(FIELD_OFFSET(SHARED_CACHE_MAP, BcbList) == FIELD_OFFSET(BCB, BcbLinks));
+
+ //
+ // Similarly, when we hit one of the BcbListHeads in the array, small negative
+ // offsets are all structure pointers, so we are counting on the Bcb signature
+ // to have some non-Ulong address bits set.
+ //
+
+ ASSERT((CACHE_NTC_BCB & 3) != 0);
+
+ //
+ // Get address of Bcb listhead that is *after* the Bcb we are looking for,
+ // for backwards scan.
+ //
+
+ BcbList = &SharedCacheMap->BcbList;
+ if ((FileOffset->QuadPart + SIZE_PER_BCB_LIST) < SharedCacheMap->SectionSize.QuadPart) {
+ BcbList = GetBcbListHead( SharedCacheMap, FileOffset->QuadPart + SIZE_PER_BCB_LIST );
+ }
+
+ //
+ // Search for an entry that overlaps the specified range, or until we hit
+ // a listhead.
+ //
+
+ Bcbt = CONTAINING_RECORD(BcbList->Flink, BCB, BcbLinks);
+
+ //
+ // First see if we really have to do Large arithmetic or not, and
+ // then use either a 32-bit loop or a 64-bit loop to search for
+ // the Bcb.
+ //
+
+ if (FileOffset->HighPart == 0) {
+
+ //
+ // 32-bit - loop until we get back to a listhead.
+ //
+
+ while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
+
+ //
+ // Since the Bcb list is in descending order, we first check
+ // if we are completely beyond the current entry, and if so
+ // get out.
+ //
+
+ if (FileOffset->LowPart >= Bcbt->BeyondLastByte.LowPart) {
+ break;
+ }
+
+ //
+ // Next check if the first byte we are looking for is
+ // contained in the current Bcb. If so, we either have
+ // a partial hit and must truncate to the exact amount
+ // we have found, or we may have a complete hit. In
+ // either case we break with Found == TRUE.
+ //
+
+ if (FileOffset->LowPart >= Bcbt->FileOffset.LowPart) {
+ Found = TRUE;
+ break;
+ }
+
+ //
+ // Now we know we must loop back and keep looking, but we
+ // still must check for the case where the tail end of the
+ // bytes we are looking for are described by the current
+ // Bcb. If so we must truncate what we are looking for,
+ // because this routine is only supposed to return bytes
+ // from the start of the desired range.
+ //
+
+ if (BeyondLastByte->LowPart >= Bcbt->FileOffset.LowPart) {
+ BeyondLastByte->LowPart = Bcbt->FileOffset.LowPart;
+ }
+
+ //
+ // Advance to next entry in list (which is possibly back to
+ // the listhead) and loop back.
+ //
+
+ Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
+ BCB,
+ BcbLinks );
+
+ }
+
+ } else {
+
+ //
+ // 64-bit - Loop until we get back to a listhead.
+ //
+
+ while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) {
+
+ //
+ // Since the Bcb list is in descending order, we first check
+ // if we are completely beyond the current entry, and if so
+ // get out.
+ //
+
+ if (FileOffset->QuadPart >= Bcbt->BeyondLastByte.QuadPart) {
+ break;
+ }
+
+ //
+ // Next check if the first byte we are looking for is
+ // contained in the current Bcb. If so, we either have
+ // a partial hit and must truncate to the exact amount
+ // we have found, or we may have a complete hit. In
+ // either case we break with Found == TRUE.
+ //
+
+ if (FileOffset->QuadPart >= Bcbt->FileOffset.QuadPart) {
+ Found = TRUE;
+ break;
+ }
+
+ //
+ // Now we know we must loop back and keep looking, but we
+ // still must check for the case where the tail end of the
+ // bytes we are looking for are described by the current
+ // Bcb. If so we must truncate what we are looking for,
+ // because this routine is only supposed to return bytes
+ // from the start of the desired range.
+ //
+
+ if (BeyondLastByte->QuadPart >= Bcbt->FileOffset.QuadPart) {
+ BeyondLastByte->QuadPart = Bcbt->FileOffset.QuadPart;
+ }
+
+ //
+ // Advance to next entry in list (which is possibly back to
+ // the listhead) and loop back.
+ //
+
+ Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink,
+ BCB,
+ BcbLinks );
+
+ }
+ }
+
+ *Bcb = Bcbt;
+
+ DebugTrace2(0, me, " <TrialLength = %08lx, %08lx\n", TrialLength->LowPart,
+ TrialLength->HighPart );
+ DebugTrace( 0, me, " <Bcb = %08lx\n", *Bcb );
+ DebugTrace(-1, me, "CcFindBcb -> %02lx\n", Found );
+
+ return Found;
+}
+
+
+//
+// Internal Support Routine
+//
+
+PBCB
+CcAllocateInitializeBcb (
+ IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL,
+ IN OUT PBCB AfterBcb,
+ IN PLARGE_INTEGER FileOffset,
+ IN PLARGE_INTEGER TrialLength
+ )
+
+/*++
+
+Routine Description:
+
+ This routine allocates and initializes a Bcb to describe the specified
+ byte range, and inserts it into the Bcb List of the specified Shared
+ Cache Map. The Bcb List spin lock must currently be acquired.
+
+ CcMasterSpinLock must be acquired on entry.
+
+Arguments:
+
+ SharedCacheMap - Supplies the SharedCacheMap for the new Bcb.
+
+ AfterBcb - Supplies where in the descending-order BcbList the new Bcb
+ should be inserted: either the ListHead (masquerading as
+ a Bcb) or a Bcb.
+
+ FileOffset - Supplies File Offset for the desired data.
+
+ TrialLength - Supplies length of desired data.
+
+Return Value:
+
+ Address of the allocated and initialized Bcb
+
+--*/
+
+{
+ PBCB Bcb;
+ CSHORT NodeIsInZone;
+ ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7;
+
+ //
+ // Loop until we have a new Work Queue Entry
+ //
+
+ while (TRUE) {
+
+ PVOID Segment;
+ ULONG SegmentSize;
+
+ Bcb = ExAllocateFromZone( &LazyWriter.BcbZone );
+
+ if (Bcb != NULL) {
+ NodeIsInZone = 1;
+ break;
+ }
+
+ //
+ // Allocation failure - on large systems, extend zone
+ //
+
+ if ( MmQuerySystemSize() == MmLargeSystem ) {
+
+ SegmentSize = sizeof(ZONE_SEGMENT_HEADER) + RoundedBcbSize * 32;
+
+ if ((Segment = ExAllocatePool( NonPagedPool, SegmentSize)) == NULL) {
+
+ return NULL;
+ }
+
+ if (!NT_SUCCESS(ExExtendZone( &LazyWriter.BcbZone, Segment, SegmentSize ))) {
+ CcBugCheck( 0, 0, 0 );
+ }
+ } else {
+ if ((Bcb = ExAllocatePool( NonPagedPool, sizeof(BCB))) == NULL) {
+ return NULL;
+ }
+ NodeIsInZone = 0;
+ break;
+ }
+ }
+
+ //
+ // Initialize the newly allocated Bcb. First zero it, then fill in
+ // nonzero fields.
+ //
+
+ RtlZeroMemory( Bcb, RoundedBcbSize );
+
+ Bcb->NodeIsInZone = NodeIsInZone;
+
+ //
+ // For Mbcb's, SharedCacheMap is NULL, and the rest of this initialization
+ // is not desired.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ Bcb->NodeTypeCode = CACHE_NTC_BCB;
+ Bcb->FileOffset = *FileOffset;
+ Bcb->ByteLength = TrialLength->LowPart;
+ Bcb->BeyondLastByte.QuadPart = FileOffset->QuadPart + TrialLength->QuadPart;
+ Bcb->PinCount += 1;
+ ExInitializeResource( &Bcb->Resource );
+ Bcb->SharedCacheMap = SharedCacheMap;
+
+ //
+ // Now insert the Bcb in the Bcb List
+ //
+
+ InsertTailList( &AfterBcb->BcbLinks, &Bcb->BcbLinks );
+
+ //
+ // If this resource was no write behind, let Ex know that the
+ // resource will never be acquired exclusive. Also disable
+ // boost (I know this is useless, but KenR said I had to do it).
+ //
+
+ if (SharedCacheMap &&
+ FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) {
+#if DBG
+ SetFlag(Bcb->Resource.Flag, ResourceNeverExclusive);
+#endif
+ ExDisableResourceBoost( &Bcb->Resource );
+ }
+
+
+ }
+
+ return Bcb;
+}
+
+
+//
+// Internal support routine
+//
+
+VOID
+FASTCALL
+CcDeallocateBcb (
+ IN PBCB Bcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine deallocates a Bcb to the BcbZone. It must
+ already be removed from the BcbList.
+
+ CcMasterSpinLock must be acquired on entry.
+
+Arguments:
+
+ Bcb - the Bcb to deallocate
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ //
+ // Deallocate Resource structures
+ //
+
+ if (Bcb->NodeTypeCode == CACHE_NTC_BCB) {
+
+ ExDeleteResource( &Bcb->Resource );
+ }
+
+ if ( Bcb->NodeIsInZone ) {
+
+ //
+ // Synchronize access to the BcbZone
+ //
+
+ ExFreeToZone( &LazyWriter.BcbZone,
+ Bcb );
+ } else {
+ ExFreePool(Bcb);
+ }
+ return;
+}
+
+
+//
+// Internal Support Routine
+//
+
+BOOLEAN
+CcMapAndRead(
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN ULONG ZeroFlags,
+ IN BOOLEAN Wait,
+ OUT PVACB *Vacb,
+ OUT PVOID *BaseAddress
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to insure that the specified data is mapped,
+ read into memory and locked. If TRUE is returned, then the
+ correct I/O status for the transfer is also returned, along with
+ a system-space address for the data.
+
+Arguments:
+
+ SharedCacheMap - Supplies the address of the SharedCacheMap for the
+ data.
+
+ FileOffset - Supplies the file offset of the desired data.
+
+ Length - Supplies the total amount of data desired.
+
+ ZeroFlags - Defines which pages may be zeroed if not resident.
+
+ Wait - Supplies FALSE if the caller is not willing to block for the
+ data, or TRUE if the caller is willing to block.
+
+ Vacb - Returns the address of the Vacb which is mapping the enclosing
+ virtual address range.
+
+ BaseAddress - Returns the system base address at which the data may
+ be accessed.
+
+Return Value:
+
+ FALSE - if the caller supplied Wait = FALSE and the data could not
+ be returned without blocking.
+
+ TRUE - if the data is being returned.
+
+ Note: this routine may raise an exception due to a map or read failure,
+ however, this can only happen if Wait was specified as TRUE, since
+ mapping and reading will not be performed if the caller cannot wait.
+
+--*/
+
+{
+ ULONG ReceivedLength;
+ ULONG ZeroCase;
+ ULONG SavedState;
+ BOOLEAN Result = FALSE;
+ PETHREAD Thread = PsGetCurrentThread();
+
+ DebugTrace(+1, me, "CcMapAndRead:\n", 0 );
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ *BaseAddress = NULL;
+ *Vacb = NULL;
+
+ *BaseAddress = CcGetVirtualAddress( SharedCacheMap,
+ *FileOffset,
+ Vacb,
+ &ReceivedLength );
+
+ ASSERT( ReceivedLength >= Length );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+
+ //
+ // try around everything for cleanup.
+ //
+
+ try {
+
+ PVOID CacheBuffer;
+ ULONG PagesToGo;
+
+ //
+ // If we got more than we need, make sure to only use
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ //
+ // Now loop to touch all of the pages, calling MM to insure
+ // that if we fault, we take in exactly the number of pages
+ // we need.
+ //
+
+ CacheBuffer = *BaseAddress;
+ PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer,
+ ReceivedLength );
+
+ //
+ // Loop to touch or zero the pages.
+ //
+
+ ZeroCase = ZERO_FIRST_PAGE;
+
+ while (PagesToGo) {
+
+ //
+ // If we cannot zero this page, or Mm failed to return
+ // a zeroed page, then just fault it in.
+ //
+
+ MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) );
+
+ if (!FlagOn(ZeroFlags, ZeroCase) ||
+ !MmCheckCachedPageState(CacheBuffer, TRUE)) {
+
+ //
+ // If we get here, it is almost certainly due to the fact
+ // that we can not take a zero page. MmCheckCachedPageState
+ // will so rarely return FALSE, that we will not worry
+ // about it. We will only check if the page is there if
+ // Wait is FALSE, so that we can do the right thing.
+ //
+
+ if (!MmCheckCachedPageState(CacheBuffer, FALSE) && !Wait) {
+ try_return( Result = FALSE );
+ }
+ }
+
+ CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
+ PagesToGo -= 1;
+
+ if (PagesToGo == 1) {
+ ZeroCase = ZERO_LAST_PAGE;
+ } else {
+ ZeroCase = ZERO_MIDDLE_PAGES;
+ }
+ }
+
+ try_return( Result = TRUE );
+
+ try_exit: NOTHING;
+ }
+
+ //
+ // Cleanup on the way out.
+ //
+
+ finally {
+
+ MmResetPageFaultReadAhead(Thread, SavedState);
+
+ //
+ // If not successful, cleanup on the way out. Most of the errors
+ // can only occur as the result of an abnormal termination after
+ // successfully checking and locking the pages.
+ //
+
+ if (Result == FALSE) {
+
+ CcFreeVirtualAddress( *Vacb );
+ *Vacb = NULL;
+ *BaseAddress = NULL;
+ }
+ }
+
+ DebugTrace( 0, me, " <Vacb = %08lx\n", *Vacb );
+ DebugTrace( 0, me, " <BaseAddress = %08lx\n", *BaseAddress );
+ DebugTrace(-1, me, "CcMapAndRead -> %02lx\n", Result );
+
+ return Result;
+}
+
+
+//
+// Internal Support Routine
+//
+
+VOID
+CcFreeActiveVacb (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PVACB ActiveVacb OPTIONAL,
+ IN ULONG ActivePage,
+ IN ULONG PageIsDirty
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to zero the end of a locked page or
+ free the ActiveVacb for a Shared Cache Map, if there is one.
+ Note that some callers are not synchronized with foreground
+ activity, and may therefore not have an ActiveVacb. Examples
+ of unsynchronized callers are CcZeroEndOfLastPage (which is
+ called by MM) and any flushing done by CcWriteBehind.
+
+Arguments:
+
+ SharedCacheMap - SharedCacheMap to examine for page to be zeroed.
+
+ ActiveVacb - Vacb to free
+
+ ActivePage - Page that was used
+
+ PageIsDirty - ACTIVE_PAGE_IS_DIRTY if the active page is dirty
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ LARGE_INTEGER ActiveOffset;
+ PVOID ActiveAddress;
+ ULONG BytesLeftInPage;
+ KIRQL OldIrql;
+
+ //
+ // If the page was locked, then unlock it.
+ //
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+
+ //
+ // Zero the rest of the page under spinlock control,
+ // and then clear the address field. This field makes
+ // zero->nonzero transitions only when the file is exclusive,
+ // but it can make nonzero->zero transitions any time the
+ // spinlock is not held.
+ //
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ //
+ // The address could already be gone.
+ //
+
+ ActiveAddress = SharedCacheMap->NeedToZero;
+ if (ActiveAddress != NULL) {
+
+ BytesLeftInPage = PAGE_SIZE - ((((ULONG)ActiveAddress - 1) & (PAGE_SIZE - 1)) + 1);
+ RtlZeroBytes( ActiveAddress, BytesLeftInPage );
+ SharedCacheMap->NeedToZero = NULL;
+ }
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+
+ //
+ // Now call MM to unlock the address. Note we will never store the
+ // address at the start of the page, but we can sometimes store
+ // the start of the next page when we have exactly filled the page.
+ //
+
+ if (ActiveAddress != NULL) {
+ MmUnlockCachedPage( (PVOID)((PCHAR)ActiveAddress - 1) );
+ }
+ }
+
+ //
+ // See if caller actually has an ActiveVacb
+ //
+
+ if (ActiveVacb != NULL) {
+
+ //
+ // See if the page is dirty
+ //
+
+ if (PageIsDirty) {
+
+ ActiveOffset.QuadPart = (LONGLONG)ActivePage << PAGE_SHIFT;
+ ActiveAddress = (PVOID)((PCHAR)ActiveVacb->BaseAddress +
+ (ActiveOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)));
+
+ //
+ // Tell the Lazy Writer to write the page.
+ //
+
+ CcSetDirtyInMask( SharedCacheMap, &ActiveOffset, PAGE_SIZE );
+
+ //
+ // Now we need to clear the flag and decrement some counts if there is
+ // no other active Vacb which snuck in.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
+ if ((SharedCacheMap->ActiveVacb == NULL) &&
+ FlagOn(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY)) {
+
+ ClearFlag(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY);
+ SharedCacheMap->DirtyPages -= 1;
+ CcTotalDirtyPages -= 1;
+ }
+ ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock );
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // Now free the Vacb.
+ //
+
+ CcFreeVirtualAddress( ActiveVacb );
+ }
+}
+
+
+//
+// Internal Support Routine
+//
+
+VOID
+CcMapAndCopy(
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PVOID UserBuffer,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN ULONG ZeroFlags,
+ IN BOOLEAN WriteThrough
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to copy the specified user data to the
+ cache via a special Mm routine which copies the data to uninitialized
+ pages and returns.
+
+Arguments:
+
+ SharedCacheMap - Supplies the address of the SharedCacheMap for the
+ data.
+
+ UserBuffer - unsafe buffer supplying the user's data to be written
+
+ FileOffset - Supplies the file offset to be modified
+
+ Length - Supplies the total amount of data
+
+ ZeroFlags - Defines which pages may be zeroed if not resident.
+
+ WriteThrough - Supplies whether the data is to be written through or not
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ ULONG ReceivedLength;
+ ULONG ZeroCase;
+ PVOID CacheBuffer;
+ PVOID SavedMappedBuffer;
+ ULONG SavedMappedLength;
+ ULONG ActivePage;
+ KIRQL OldIrql;
+ LARGE_INTEGER PFileOffset;
+ IO_STATUS_BLOCK IoStatus;
+ NTSTATUS Status;
+ ULONG SavedState;
+ BOOLEAN MorePages;
+ ULONG SavedTotalLength = Length;
+ LARGE_INTEGER LocalOffset = *FileOffset;
+ ULONG PageOffset = FileOffset->LowPart & (PAGE_SIZE - 1);
+ PVACB Vacb = NULL;
+ PETHREAD Thread = PsGetCurrentThread();
+
+ //
+ // Initialize SavePage to TRUE to skip the finally clause on zero-length
+ // writes.
+ //
+
+ BOOLEAN SavePage = TRUE;
+
+ DebugTrace(+1, me, "CcMapAndCopy:\n", 0 );
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+ //
+ // try around everything for cleanup.
+ //
+
+ try {
+
+ while (Length != 0) {
+
+ CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
+ LocalOffset,
+ &Vacb,
+ &ReceivedLength );
+
+ //
+ // If we got more than we need, make sure to only use
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+ SavedMappedBuffer = CacheBuffer;
+ SavedMappedLength = ReceivedLength;
+ Length -= ReceivedLength;
+
+ //
+ // Now loop to touch all of the pages, calling MM to insure
+ // that if we fault, we take in exactly the number of pages
+ // we need.
+ //
+
+ CacheBuffer = (PVOID)((PCHAR)CacheBuffer - PageOffset);
+ ReceivedLength += PageOffset;
+
+ //
+ // Loop to touch or zero the pages.
+ //
+
+ ZeroCase = ZERO_FIRST_PAGE;
+
+ //
+ // Set up offset to page for use below.
+ //
+
+ PFileOffset = LocalOffset;
+ PFileOffset.LowPart -= PageOffset;
+
+ while (TRUE) {
+
+ //
+ // Calculate whether we wish to save an active page
+ // or not.
+ //
+
+ SavePage = ((Length == 0) &&
+ (ReceivedLength < PAGE_SIZE) &&
+ (SavedTotalLength <= (PAGE_SIZE / 2)) &&
+ !WriteThrough &&
+ (SharedCacheMap->FileObject->SectionObjectPointer->ImageSectionObject == NULL) &&
+ (SharedCacheMap->Mbcb != NULL) &&
+ ((ULONG)((ULONGLONG)PFileOffset.QuadPart >> PAGE_SHIFT) <
+ (SharedCacheMap->Mbcb->Bitmap.SizeOfBitMap - 1)));
+
+ MorePages = (ReceivedLength > PAGE_SIZE);
+
+ //
+ // Copy the data to the user buffer.
+ //
+
+ try {
+
+ //
+ // It is possible that there is a locked page
+ // hanging around, and so we need to nuke it here.
+ //
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+ CcFreeActiveVacb( SharedCacheMap, NULL, 0, 0 );
+ }
+
+ Status = STATUS_SUCCESS;
+ if (FlagOn(ZeroFlags, ZeroCase)) {
+
+ Status = MmCopyToCachedPage( CacheBuffer,
+ UserBuffer,
+ PageOffset,
+ MorePages ?
+ (PAGE_SIZE - PageOffset) :
+ (ReceivedLength - PageOffset),
+ SavePage );
+
+ if (!NT_SUCCESS(Status)) {
+
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_INVALID_USER_BUFFER ));
+ }
+
+ //
+ // Otherwise, we have to actually copy the data ourselves.
+ //
+
+ } else {
+
+ MmSetPageFaultReadAhead( Thread,
+ (MorePages && FlagOn(ZeroFlags, ZERO_LAST_PAGE)) ? 1 : 0);
+
+ RtlCopyBytes( (PVOID)((PCHAR)CacheBuffer + PageOffset),
+ UserBuffer,
+ MorePages ?
+ (PAGE_SIZE - PageOffset) :
+ (ReceivedLength - PageOffset) );
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ }
+
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Now get out quickly if it is a small write and we want
+ // to save the page.
+ //
+
+ if (SavePage) {
+
+ ActivePage = (ULONG)( (ULONGLONG)Vacb->Overlay.FileOffset.QuadPart >> PAGE_SHIFT ) +
+ (((PCHAR)CacheBuffer - (PCHAR)Vacb->BaseAddress) >>
+ PAGE_SHIFT);
+
+ PFileOffset.LowPart += ReceivedLength;
+
+ //
+ // If the cache page was not locked, then clear the address
+ // to zero from.
+ //
+
+ if (Status == STATUS_CACHE_PAGE_LOCKED) {
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ ASSERT(SharedCacheMap->NeedToZero == NULL);
+
+ SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer +
+ (PFileOffset.LowPart & (PAGE_SIZE - 1)));
+ SharedCacheMap->NeedToZeroPage = ActivePage;
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+ }
+
+ SetActiveVacb( SharedCacheMap,
+ OldIrql,
+ Vacb,
+ ActivePage,
+ ACTIVE_PAGE_IS_DIRTY );
+
+ try_return( NOTHING );
+ }
+
+ //
+ // If it looks like we may save a page and exit on the next loop,
+ // then we must make sure to mark the current page dirty. Note
+ // that Cc[Fast]CopyWrite will finish the last part of any page
+ // before allowing us to free the Active Vacb above, therefore
+ // this case only occurs for a small random write.
+ //
+
+ if ((SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough) {
+
+ CcSetDirtyInMask( SharedCacheMap, &PFileOffset, ReceivedLength );
+ }
+
+ UserBuffer = (PVOID)((PCHAR)UserBuffer + (PAGE_SIZE - PageOffset));
+ PageOffset = 0;
+
+ //
+ // If there is more than a page to go (including what we just
+ // copied), then adjust our buffer pointer and counts, and
+ // determine if we are to the last page yet.
+ //
+
+ if (MorePages) {
+
+ CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE;
+ ReceivedLength -= PAGE_SIZE;
+
+ //
+ // Update our offset to the page. Note that 32-bit
+ // add is ok since we cannot cross a Vacb boundary
+ // and we reinitialize this offset before entering
+ // this loop again.
+ //
+
+ PFileOffset.LowPart += PAGE_SIZE;
+
+ if (ReceivedLength > PAGE_SIZE) {
+ ZeroCase = ZERO_MIDDLE_PAGES;
+ } else {
+ ZeroCase = ZERO_LAST_PAGE;
+ }
+
+ } else {
+
+ break;
+ }
+ }
+
+ //
+ // If there is still more to write (ie. we are going to step
+ // onto the next vacb) AND we just dirtied more than 64K, then
+ // do a vicarious MmFlushSection here. This prevents us from
+ // creating unlimited dirty pages while holding the file
+ // resource exclusive. We also do not need to set the pages
+ // dirty in the mask in this case.
+ //
+
+ if (Length > CcMaxDirtyWrite) {
+
+ MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
+ MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer,
+ &LocalOffset,
+ SavedMappedLength,
+ &IoStatus,
+ TRUE );
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+
+ //
+ // For write through files, call Mm to propagate the dirty bits
+ // here while we have the view mapped, so we know the flush will
+ // work below. Again - do not set dirty in the mask.
+ //
+
+ } else if (WriteThrough) {
+
+ MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength );
+
+ //
+ // For the normal case, just set the pages dirty for the Lazy Writer
+ // now.
+ //
+
+ } else {
+
+ CcSetDirtyInMask( SharedCacheMap, &LocalOffset, SavedMappedLength );
+ }
+
+ CcFreeVirtualAddress( Vacb );
+ Vacb = NULL;
+
+ //
+ // If we have to loop back to get at least a page, it will be ok to
+ // zero the first page. If we are not getting at least a page, we
+ // must make sure we clear the ZeroFlags if we cannot zero the last
+ // page.
+ //
+
+ if (Length >= PAGE_SIZE) {
+ ZeroFlags |= ZERO_FIRST_PAGE;
+ } else if ((ZeroFlags & ZERO_LAST_PAGE) == 0) {
+ ZeroFlags = 0;
+ }
+
+ //
+ // Note that if ReceivedLength (and therefore SavedMappedLength)
+ // was truncated to the transfer size then the new LocalOffset
+ // computed below is not correct. This is not an issue since
+ // in that case (Length == 0) and we would never get here.
+ //
+
+ LocalOffset.QuadPart = LocalOffset.QuadPart + (LONGLONG)SavedMappedLength;
+ }
+ try_exit: NOTHING;
+ }
+
+ //
+ // Cleanup on the way out.
+ //
+
+ finally {
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ //
+ // We have no work to do if we have squirreled away the Vacb.
+ //
+
+ if (!SavePage || AbnormalTermination()) {
+
+ //
+ // Make sure we do not leave anything mapped or dirty in the PTE
+ // on the way out.
+ //
+
+ if (Vacb != NULL) {
+
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ //
+ // Either flush the whole range because of write through, or
+ // mark it dirty for the lazy writer.
+ //
+
+ if (WriteThrough) {
+
+ MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer,
+ FileOffset,
+ SavedTotalLength,
+ &IoStatus,
+ TRUE );
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+
+ //
+ // Advance ValidDataGoal
+ //
+
+ LocalOffset.QuadPart = FileOffset->QuadPart + (LONGLONG)SavedTotalLength;
+ if (LocalOffset.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) {
+ SharedCacheMap->ValidDataGoal = LocalOffset;
+ }
+ }
+ }
+ }
+
+ DebugTrace(-1, me, "CcMapAndCopy -> %02lx\n", Result );
+
+ return;
+}
+
+
+#ifdef CCDBG
+VOID
+CcDump (
+ IN PVOID Ptr
+ )
+
+{
+ PVOID Junk = Ptr;
+}
+#endif
diff --git a/private/ntos/cache/cc.h b/private/ntos/cache/cc.h
new file mode 100644
index 000000000..aff15f746
--- /dev/null
+++ b/private/ntos/cache/cc.h
@@ -0,0 +1,1746 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ cc.h
+
+Abstract:
+
+ This module is a header file for the Memory Management based cache
+ management routines for the common Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#ifndef _CCh_
+#define _CCh_
+
+#include <ntos.h>
+
+#ifdef MEMPRINT
+#include <memprint.h>
+#endif
+
+//
+// This turns on the Bcb list debugging in a debug system. Set value
+// to 0 to turn off.
+//
+
+#if DBG
+#define LIST_DBG 1
+#endif
+
+#include <FsRtl.h>
+
+#include <string.h>
+
+//
+// Tag all of our allocations if tagging is turned on
+//
+
+#undef FsRtlAllocatePool
+#undef FsRtlAllocatePoolWithQuota
+
+#define FsRtlAllocatePool(a,b) FsRtlAllocatePoolWithTag(a,b,' cC')
+#define FsRtlAllocatePoolWithQuota(a,b) FsRtlAllocatePoolWithQuotaTag(a,b,' cC')
+
+#undef ExAllocatePool
+#undef ExAllocatePoolWithQuota
+
+#define ExAllocatePool(a,b) ExAllocatePoolWithTag(a,b,' cC')
+#define ExAllocatePoolWithQuota(a,b) ExAllocatePoolWithQuotaTag(a,b,' cC')
+
+//
+// Peek at number of available pages.
+//
+
+extern ULONG MmAvailablePages;
+
+#if DBG
+// #define MIPS_PREFILL 0
+#endif
+
+#ifdef MIPS
+#ifdef MIPS_PREFILL
+VOID
+KeSweepDcache (
+ IN BOOLEAN AllProcessors
+ );
+#endif
+#endif
+
+//
+// Define our node type codes.
+//
+
+#define CACHE_NTC_SHARED_CACHE_MAP (0x2FF)
+#define CACHE_NTC_PRIVATE_CACHE_MAP (0x2FE)
+#define CACHE_NTC_BCB (0x2FD)
+#define CACHE_NTC_DEFERRED_WRITE (0x2FC)
+#define CACHE_NTC_MBCB (0x2FB)
+#define CACHE_NTC_OBCB (0x2FA)
+
+//
+// The following definitions are used to generate meaningful blue bugcheck
+// screens. On a bugcheck the file system can output 4 ulongs of useful
+// information. The first ulong will have encoded in it a source file id
+// (in the high word) and the line number of the bugcheck (in the low word).
+// The other values can be whatever the caller of the bugcheck routine deems
+// necessary.
+//
+// Each individual file that calls bugcheck needs to have defined at the
+// start of the file a constant called BugCheckFileId with one of the
+// CACHE_BUG_CHECK_ values defined below and then use CcBugCheck to bugcheck
+// the system.
+//
+
+#define CACHE_BUG_CHECK_CACHEDAT (0x00010000)
+#define CACHE_BUG_CHECK_CACHESUB (0x00020000)
+#define CACHE_BUG_CHECK_COPYSUP (0x00030000)
+#define CACHE_BUG_CHECK_FSSUP (0x00040000)
+#define CACHE_BUG_CHECK_LAZYRITE (0x00050000)
+#define CACHE_BUG_CHECK_LOGSUP (0x00060000)
+#define CACHE_BUG_CHECK_MDLSUP (0x00070000)
+#define CACHE_BUG_CHECK_PINSUP (0x00080000)
+#define CACHE_BUG_CHECK_VACBSUP (0x00090000)
+
+#define CcBugCheck(A,B,C) { KeBugCheckEx(CACHE_MANAGER, BugCheckFileId | __LINE__, A, B, C ); }
+
+//
+// Define maximum View Size (These constants are currently so chosen so
+// as to be exactly a page worth of PTEs.
+//
+
+#define DEFAULT_CREATE_MODULO ((ULONG)(0x00100000))
+#define DEFAULT_EXTEND_MODULO ((ULONG)(0x00100000))
+
+//
+// For FO_SEQUENTIAL_ONLY files, define how far we go before umapping
+// views.
+//
+
+#define SEQUENTIAL_ONLY_MAP_LIMIT ((ULONG)(0x00080000))
+
+//
+// Define some constants to drive read ahead
+//
+
+//
+// Set max read ahead (some drivers, such as AT break up transfers >= 128kb)
+//
+
+#define MAX_READ_AHEAD (MM_MAXIMUM_DISK_IO_SIZE)
+
+//
+// Set maximum write behind / lazy write (most drivers break up transfers >= 64kb)
+//
+
+#define MAX_WRITE_BEHIND (MM_MAXIMUM_DISK_IO_SIZE)
+
+//
+// Define constants to control zeroing of file data: one constant to control
+// how much data we will actually zero ahead in the cache, and another to
+// control what the maximum transfer size is that we will use to write zeros.
+//
+
+#define MAX_ZERO_TRANSFER (MM_MAXIMUM_DISK_IO_SIZE)
+#define MAX_ZEROS_IN_CACHE (0x10000)
+
+//
+// Define constants controlling when the Bcb list is broken into a
+// pendaflex-style array of listheads, and how the correct listhead
+// is found. Begin when file size exceeds 2MB, and cover 512KB per
+// listhead. At 512KB per listhead, the BcbListArray is the same
+// size as the Vacb array, i.e., it doubles the size.
+//
+
+#define BEGIN_BCB_LIST_ARRAY (0x200000)
+#define SIZE_PER_BCB_LIST (VACB_MAPPING_GRANULARITY * 2)
+#define BCB_LIST_SHIFT (VACB_OFFSET_SHIFT + 1)
+
+#define GetBcbListHead(SCM,OFF) ( \
+ (((SCM)->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && \
+ FlagOn((SCM)->Flags, MODIFIED_WRITE_DISABLED)) ? \
+ ((PLIST_ENTRY)((SCM)->Vacbs) + (((SCM)->SectionSize.QuadPart + (OFF)) >> BCB_LIST_SHIFT)) : \
+ &(SCM)->BcbList \
+ )
+
+//
+// NOISE_BITS defines how many bits are masked off when testing for
+// sequential reads. This allows the reader to skip up to 7 bytes
+// for alignment purposes, and we still consider the next read to be
+// sequential. Starting and ending addresses are masked by this pattern
+// before comparison.
+//
+
+#define NOISE_BITS (0x7)
+
+//
+// Define some constants to drive the Lazy Writer
+//
+
+#define LAZY_WRITER_IDLE_DELAY ((LONG)(10000000))
+#define LAZY_WRITER_COLLISION_DELAY ((LONG)(1000000))
+
+//
+// The following target should best be a power of 2
+//
+
+#define LAZY_WRITER_MAX_AGE_TARGET ((ULONG)(8))
+
+//
+// The global Cache Manager debug level variable, its values are:
+//
+// 0x00000000 Always gets printed (used when about to bug check)
+//
+// 0x00000001 FsSup
+// 0x00000002 CacheSub
+// 0x00000004 CopySup
+// 0x00000008 PinSup
+//
+// 0x00000010 MdlSup
+// 0x00000020 LazyRite
+// 0x00000040
+// 0x00000080
+//
+// 0x00000100 Trace all Mm calls
+//
+
+#define mm (0x100)
+
+//
+// Miscellaneous support macros.
+//
+// ULONG
+// FlagOn (
+// IN ULONG Flags,
+// IN ULONG SingleFlag
+// );
+//
+// BOOLEAN
+// BooleanFlagOn (
+// IN ULONG Flags,
+// IN ULONG SingleFlag
+// );
+//
+// VOID
+// SetFlag (
+// IN ULONG Flags,
+// IN ULONG SingleFlag
+// );
+//
+// VOID
+// ClearFlag (
+// IN ULONG Flags,
+// IN ULONG SingleFlag
+// );
+//
+
+#define FlagOn(F,SF) ( \
+ (((F) & (SF))) \
+)
+
+#define BooleanFlagOn(F,SF) ( \
+ (BOOLEAN)(((F) & (SF)) != 0) \
+)
+
+#define SetFlag(F,SF) { \
+ (F) |= (SF); \
+}
+
+#define ClearFlag(F,SF) { \
+ (F) &= ~(SF); \
+}
+
+
+//
+// Define the Virtual Address Control Block, which controls all mapping
+// performed by the Cache Manager.
+//
+
+//
+// First some constants
+//
+
+#define PREALLOCATED_VACBS (4)
+
+//
+// Virtual Address Control Block
+//
+
+typedef struct _VACB {
+
+ //
+ // Base Address for this control block.
+ //
+
+ PVOID BaseAddress;
+
+ //
+ // Pointer to the Shared Cache Map using this Vacb.
+ //
+
+ struct _SHARED_CACHE_MAP *SharedCacheMap;
+
+ //
+ // Overlay for remembering mapped offset within the Shared Cache Map,
+ // and the count of the number of times this Vacb is in use.
+ //
+
+ union {
+
+ //
+ // File Offset within Shared Cache Map
+ //
+
+ LARGE_INTEGER FileOffset;
+
+ //
+ // Count of number of times this Vacb is in use. The size of this
+ // count is calculated to be adequate, while never large enough to
+ // overwrite nonzero bits of the MappedOffset, which is a multiple
+ // of VACB_MAPPING_GRANULARITY.
+ //
+
+ USHORT ActiveCount;
+
+ } Overlay;
+
+} VACB, *PVACB;
+
+
+//
+// The Private Cache Map is a structure pointed to by the File Object, whenever
+// a file is opened with caching enabled (default).
+//
+
+typedef struct _PRIVATE_CACHE_MAP {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeByteSize;
+
+ //
+ // Pointer to FileObject for this PrivateCacheMap.
+ //
+
+ PFILE_OBJECT FileObject;
+
+ //
+ // READ AHEAD CONTROL
+ //
+ // Read ahead history for determining when read ahead might be
+ // beneficial.
+ //
+
+ LARGE_INTEGER FileOffset1;
+ LARGE_INTEGER BeyondLastByte1;
+
+ LARGE_INTEGER FileOffset2;
+ LARGE_INTEGER BeyondLastByte2;
+
+ //
+ // Current read ahead requirements.
+ //
+ // Array element 0 is optionally used for recording remaining bytes
+ // required for satisfying a large Mdl read.
+ //
+ // Array element 1 is used for predicted read ahead.
+ //
+
+ LARGE_INTEGER ReadAheadOffset[2];
+ ULONG ReadAheadLength[2];
+
+ //
+ // SpinLock controlling access to following fields
+ //
+
+ KSPIN_LOCK ReadAheadSpinLock;
+
+ //
+ // Read Ahead mask formed from Read Ahead granularity - 1
+ //
+
+ ULONG ReadAheadMask;
+
+ //
+ // Links for list of all PrivateCacheMaps linked to the same
+ // SharedCacheMap.
+ //
+
+ LIST_ENTRY PrivateLinks;
+
+ //
+ // This flag says read ahead is currently active, which means either
+ // a file system call to CcReadAhead is still determining if the
+ // desired data is already resident, or else a request to do read ahead
+ // has been queued to a worker thread.
+ //
+
+ BOOLEAN ReadAheadActive;
+
+ //
+ // Flag to say whether read ahead is currently enabled for this
+ // FileObject/PrivateCacheMap. On read misses it is enabled on
+ // read ahead hits it will be disabled. Initially disabled.
+ //
+
+ BOOLEAN ReadAheadEnabled;
+
+} PRIVATE_CACHE_MAP;
+
+typedef PRIVATE_CACHE_MAP *PPRIVATE_CACHE_MAP;
+
+
+//
+// The Shared Cache Map is a per-file structure pointed to indirectly by
+// each File Object. The File Object points to a pointer in a single
+// FS-private structure for the file (Fcb). The SharedCacheMap maps the
+// first part of the file for common access by all callers.
+//
+
+typedef struct _SHARED_CACHE_MAP {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeByteSize;
+
+ //
+ // Number of times this file has been opened cached.
+ //
+
+ ULONG OpenCount;
+
+ //
+ // Actual size of file, primarily for restricting Read Ahead. Initialized
+ // on creation and maintained by extend and truncate operations.
+ //
+ // NOTE: This field may never be moved, thanks to the late DavidGoe,
+ // who should have written this comment himself :-( cache.h
+ // exports a macro which "knows" that FileSize is the second
+ // longword in the Cache Map!
+ //
+
+ LARGE_INTEGER FileSize;
+
+ //
+ // Bcb Listhead. The BcbList is ordered by descending
+ // FileOffsets, to optimize misses in the sequential I/O case.
+ //
+
+ LIST_ENTRY BcbList;
+
+ //
+ // Size of section created.
+ //
+
+ LARGE_INTEGER SectionSize;
+
+ //
+ // ValidDataLength for file, as currently stored by the file system.
+ //
+
+ LARGE_INTEGER ValidDataLength;
+
+ //
+ // Goal for ValidDataLength, when current dirty data is written.
+ //
+
+ LARGE_INTEGER ValidDataGoal;
+
+ //
+ // Pointer to a contiguous array of Vacb pointers which control mapping
+ // to this file, along with Vacbs (currently) for a 1MB file.
+ //
+
+ PVACB InitialVacbs[PREALLOCATED_VACBS];
+ PVACB * Vacbs;
+
+ //
+ // Referenced pointer to original File Object on which the SharedCacheMap
+ // was created.
+ //
+
+ PFILE_OBJECT FileObject;
+
+ //
+ // Describe Active Vacb and Page for copysup optimizations.
+ //
+
+ volatile PVACB ActiveVacb;
+ ULONG ActivePage;
+
+ //
+ // Virtual address needing zero to end of page
+ //
+
+ volatile PVOID NeedToZero;
+ ULONG NeedToZeroPage;
+
+ //
+ // Fields for synchronizing on active requests.
+ //
+
+ KSPIN_LOCK ActiveVacbSpinLock;
+ ULONG VacbActiveCount;
+
+ //
+ // THE NEXT TWO FIELDS MUST BE ADJACENT, TO SUPPORT
+ // SHARED_CACHE_MAP_LIST_CURSOR!
+ //
+ // Links for Global SharedCacheMap List
+ //
+
+ LIST_ENTRY SharedCacheMapLinks;
+
+ //
+ // Shared Cache Map flags (defined below)
+ //
+
+ ULONG Flags;
+
+ //
+ // Mask Bcb for this SharedCacheMap, if there is one.
+ //
+
+ struct _MBCB *Mbcb;
+
+ //
+ // Number of dirty pages in this SharedCacheMap. Used to trigger
+ // write behind.
+ //
+
+ ULONG DirtyPages;
+
+ //
+ // Pointer to the common Section Object used by the file system.
+ //
+
+ PVOID Section;
+
+ //
+ // Status variable set by creator of SharedCacheMap
+ //
+
+ NTSTATUS Status;
+
+ //
+ // This event pointer is used to handle creation collisions.
+ // If a second thread tries to call CcInitializeCacheMap for the
+ // same file, while BeingCreated (below) is TRUE, then that thread
+ // will allocate an event store it here (if not already allocated),
+ // and wait on it. The first creator will set this event when it
+ // is done. The event is not deleted until CcUninitializedCacheMap
+ // is called, to avoid possible race conditions. (Note that normally
+ // the event never has to be allocated.
+ //
+
+ PKEVENT CreateEvent;
+
+ //
+ // This points to an event used to wait for active count to go to zero
+ //
+
+ PKEVENT WaitOnActiveCount;
+
+ //
+ // These two fields control the writing of large metadata
+ // streams. The first field gives a target for the current
+ // flush interval, and the second field stores the end of
+ // the last flush that occurred on this file.
+ //
+
+ ULONG PagesToWrite;
+ LONGLONG BeyondLastFlush;
+
+ //
+ // Pointer to structure of routines used by the Lazy Writer to Acquire
+ // and Release the file for Lazy Write and Close, to avoid deadlocks,
+ // and the context to call them with.
+ //
+
+ PCACHE_MANAGER_CALLBACKS Callbacks;
+
+ PVOID LazyWriteContext;
+
+ //
+ // Listhead of all PrivateCacheMaps linked to this SharedCacheMap.
+ //
+
+ LIST_ENTRY PrivateList;
+
+ //
+ // Log handle specified for this shared cache map, for support of routines
+ // in logsup.c
+ //
+
+ PVOID LogHandle;
+
+ //
+ // Callback routine specified for flushing to Lsn.
+ //
+
+ PFLUSH_TO_LSN FlushToLsnRoutine;
+
+ //
+ // Dirty Page Threshold for this stream
+ //
+
+ ULONG DirtyPageThreshold;
+
+ //
+ // Lazy Writer pass count. Used by the Lazy Writer for
+ // no modified write streams, which are not serviced on
+ // every pass in order to avoid contention with foreground
+ // activity.
+ //
+
+ ULONG LazyWritePassCount;
+
+ //
+ // This event pointer is used to allow a file system to be notified when
+ // the deletion of a shared cache map.
+ //
+ // This has to be provided here because the cache manager may decide to
+ // "Lazy Delete" the shared cache map, and some network file systems
+ // will want to know when the lazy delete completes.
+ //
+
+ PCACHE_UNINITIALIZE_EVENT UninitializeEvent;
+
+ //
+ // Reserved for alignment
+ //
+
+ ULONG Reserved;
+
+ //
+ // This is a scratch event which can be used either for
+ // a CreateEvent or a WaitOnActiveCount event. It is
+ // difficult to share this event, because of the very
+ // careful semantics by which they are cleared. On the
+ // other hand, both events are relatively rarely used
+ // (especially the CreateEvent), so it will be rare that
+ // we will actually use both for the same file, and have
+ // to allocate one.
+ //
+
+ KEVENT Event;
+
+ //
+ // Preallocate on PrivateCacheMap to reduce pool allocations.
+ //
+
+ PRIVATE_CACHE_MAP PrivateCacheMap;
+
+} SHARED_CACHE_MAP;
+
+typedef SHARED_CACHE_MAP *PSHARED_CACHE_MAP;
+
+//
+// Shared Cache Map Flags
+//
+
+//
+// Read ahead has been disabled on this file.
+//
+
+#define DISABLE_READ_AHEAD 0x0001
+
+//
+// Write behind has been disabled on this file.
+//
+
+#define DISABLE_WRITE_BEHIND 0x0002
+
+//
+// This flag indicates whether CcInitializeCacheMap was called with
+// PinAccess = TRUE.
+//
+
+#define PIN_ACCESS 0x0004
+
+//
+// This flag indicates that a truncate is required when OpenCount
+// goes to 0.
+//
+
+#define TRUNCATE_REQUIRED 0x0010
+
+//
+// This flag indicates that a LazyWrite request is queued.
+//
+
+#define WRITE_QUEUED 0x0020
+
+//
+// This flag indicates that we have never seen anyone cache
+// the file except for with FO_SEQUENTIAL_ONLY, so we should
+// tell MM to dump pages when we unmap.
+//
+
+#define ONLY_SEQUENTIAL_ONLY_SEEN 0x0040
+
+//
+// Active Page is locked
+//
+
+#define ACTIVE_PAGE_IS_DIRTY 0x0080
+
+//
+// Flag to say that a create is in progress.
+//
+
+#define BEING_CREATED 0x0100
+
+//
+// Flag to say that modified write was disabled on the section.
+//
+
+#define MODIFIED_WRITE_DISABLED 0x0200
+
+//
+// Flag that indicates if a lazy write ever occurred on this file.
+//
+
+#define LAZY_WRITE_OCCURRED 0x0400
+
+//
+// Flag that indicates this structure is only a cursor, only the
+// SharedCacheMapLinks and Flags are valid!
+//
+
+#define IS_CURSOR 0x0800
+
+//
+// Cursor structure for traversing the SharedCacheMap lists. Anyone
+// scanning these lists must verify that the IS_CURSOR flag is clear
+// before looking at other SharedCacheMap fields.
+//
+
+
+typedef struct _SHARED_CACHE_MAP_LIST_CURSOR {
+
+ //
+ // Links for Global SharedCacheMap List
+ //
+
+ LIST_ENTRY SharedCacheMapLinks;
+
+ //
+ // Shared Cache Map flags, IS_CURSOR must be set.
+ //
+
+ ULONG Flags;
+
+} SHARED_CACHE_MAP_LIST_CURSOR, *PSHARED_CACHE_MAP_LIST_CURSOR;
+
+
+
+//
+// This structure is a "mask" Bcb. For fast simple write operations,
+// a mask Bcb is used so that we basically only have to set bits to remember
+// where the dirty data is.
+//
+
+typedef struct _MBCB {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeIsInZone;
+
+ //
+ // Number of dirty pages (set bits) in the bitmap below.
+ //
+
+ ULONG DirtyPages;
+
+ //
+ // First and last dirty pages
+ //
+
+ ULONG FirstDirtyPage;
+ ULONG LastDirtyPage;
+
+ //
+ // This is a hint on where to resume writing, since we will not
+ // always write all of the dirty data at once.
+ //
+
+ ULONG ResumeWritePage;
+
+ //
+ // This field is used as a scratch area for the Lazy Writer to
+ // guide how much he will write each time he wakes up.
+ //
+
+ ULONG PagesToWrite;
+
+ //
+ // Rtl Bitmap structure to describe the bits to follow.
+ //
+
+ RTL_BITMAP Bitmap;
+
+} MBCB;
+
+typedef MBCB *PMBCB;
+
+
+//
+// This is the Buffer Control Block structure for representing data which
+// is "pinned" in memory by one or more active requests and/or dirty. This
+// structure is created the first time that a call to CcPinFileData specifies
+// a particular integral range of pages. It is deallocated whenever the Pin
+// Count reaches 0 and the Bcb is not Dirty.
+//
+// NOTE: The first four fields must be the same as the PUBLIC_BCB.
+//
+
+typedef struct _BCB {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeIsInZone;
+
+ //
+ // Byte FileOffset and and length of entire buffer
+ //
+
+ ULONG ByteLength;
+ LARGE_INTEGER FileOffset;
+
+ //
+ // Links for BcbList in SharedCacheMap
+ //
+
+ LIST_ENTRY BcbLinks;
+
+ //
+ // Byte FileOffset of last byte in buffer (used for searching)
+ //
+
+ LARGE_INTEGER BeyondLastByte;
+
+ //
+ // Oldest Lsn (if specified) when this buffer was set dirty.
+ //
+
+ LARGE_INTEGER OldestLsn;
+
+ //
+ // Most recent Lsn specified when this buffer was set dirty.
+ // The FlushToLsnRoutine is called with this Lsn.
+ //
+
+ LARGE_INTEGER NewestLsn;
+
+ //
+ // Pointer to Vacb via which this Bcb is mapped.
+ //
+
+ PVACB Vacb;
+
+ //
+ // Links and caller addresses for the global Bcb list (for debug only)
+ //
+
+#if LIST_DBG
+ LIST_ENTRY CcBcbLinks;
+ PVOID CallerAddress;
+ PVOID CallersCallerAddress;
+#endif
+
+ //
+ // Count of threads actively using this Bcb to process a request.
+ // This must be manipulated under protection of the BcbListSpinLock
+ // in the SharedCacheMap.
+ //
+
+ ULONG PinCount;
+
+ //
+ // Resource to synchronize buffer access. Pinning Readers and all Writers
+ // of the described buffer take out shared access (synchronization of
+ // buffer modifications is strictly up to the caller). Note that pinning
+ // readers do not declare if they are going to modify the buffer or not.
+ // Anyone writing to disk takes out exclusive access, to prevent the buffer
+ // from changing while it is being written out.
+ //
+
+ ERESOURCE Resource;
+
+ //
+ // Pointer to SharedCacheMap for this Bcb.
+ //
+
+ PSHARED_CACHE_MAP SharedCacheMap;
+
+ //
+ // This is the Base Address at which the buffer can be seen in
+ // system space. All access to buffer data should go through this
+ // address.
+ //
+
+ PVOID BaseAddress;
+
+ //
+ // Flags
+ //
+
+ BOOLEAN Dirty;
+
+} BCB;
+
+typedef BCB *PBCB;
+
+//
+// This is the Overlap Buffer Control Block structure for representing data which
+// is "pinned" in memory and must be represented by multiple Bcbs due to overlaps.
+//
+// NOTE: The first four fields must be the same as the PUBLIC_BCB.
+//
+
+typedef struct _OBCB {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeByteSize;
+
+ //
+ // Byte FileOffset and and length of entire buffer
+ //
+
+ ULONG ByteLength;
+ LARGE_INTEGER FileOffset;
+
+ //
+ // Vector of Bcb pointers.
+ //
+
+ PBCB Bcbs[ANYSIZE_ARRAY];
+
+} OBCB;
+
+typedef OBCB *POBCB;
+
+
+//
+// Struct for remembering deferred writes for later posting.
+//
+
+typedef struct _DEFERRED_WRITE {
+
+ //
+ // Type and size of this record
+ //
+
+ CSHORT NodeTypeCode;
+ CSHORT NodeByteSize;
+
+ //
+ // The file to be written.
+ //
+
+ PFILE_OBJECT FileObject;
+
+ //
+ // Number of bytes the caller intends to write
+ //
+
+ ULONG BytesToWrite;
+
+ //
+ // Links for the deferred write queue.
+ //
+
+ LIST_ENTRY DeferredWriteLinks;
+
+ //
+ // If this event pointer is not NULL, then this event will
+ // be signalled when the write is ok, rather than calling
+ // the PostRoutine below.
+ //
+
+ PKEVENT Event;
+
+ //
+ // The posting routine and its parameters
+ //
+
+ PCC_POST_DEFERRED_WRITE PostRoutine;
+ PVOID Context1;
+ PVOID Context2;
+
+ BOOLEAN LimitModifiedPages;
+
+} DEFERRED_WRITE, *PDEFERRED_WRITE;
+
+
+//
+// Struct controlling the Lazy Writer algorithms
+//
+
+typedef struct _LAZY_WRITER {
+
+ //
+ // A few Mm routines still require a process.
+ //
+
+ PEPROCESS OurProcess;
+
+ //
+ // Work queue.
+ //
+
+ LIST_ENTRY WorkQueue;
+
+ //
+ // Zone for Bcbs.
+ //
+
+ ZONE_HEADER BcbZone;
+
+ //
+ // Dpc and Timer Structures used for activating periodic scan when active.
+ //
+
+ KDPC ScanDpc;
+ KTIMER ScanTimer;
+
+ //
+ // Boolean to say whether Lazy Writer scan is active or not.
+ //
+
+ BOOLEAN ScanActive;
+
+ //
+ // Boolean indicating if there is any other reason for Lazy Writer to
+ // wake up.
+ //
+
+ BOOLEAN OtherWork;
+
+} LAZY_WRITER;
+
+
+//
+// Work queue entry for the worker threads, with an enumerated
+// function code.
+//
+// NOTE: THIS STRUCTURE MUST REMAIN 64-bit ALIGNED IN SIZE, SINCE
+// IT IS ZONE ALLOCATED.
+//
+
+typedef enum _WORKER_FUNCTION {
+ Noop = 0,
+ ReadAhead,
+ WriteBehind,
+ LazyWriteScan
+ } WORKER_FUNCTION;
+
+typedef struct _WORK_QUEUE_ENTRY {
+
+ //
+ // List entry for our work queues.
+ //
+
+ LIST_ENTRY WorkQueueLinks;
+
+ //
+ // Define a union to contain function-specific parameters.
+ //
+
+ union {
+
+ //
+ // Read parameters (for read ahead)
+ //
+
+ struct {
+ PFILE_OBJECT FileObject;
+ } Read;
+
+ //
+ // Write parameters (for write behind)
+ //
+
+ struct {
+ PSHARED_CACHE_MAP SharedCacheMap;
+ } Write;
+
+ } Parameters;
+
+ //
+ // Function code for this entry:
+ //
+
+ UCHAR Function;
+
+} WORK_QUEUE_ENTRY, *PWORK_QUEUE_ENTRY;
+
+//
+// This is a structure apended to the end of an MDL
+//
+
+typedef struct _MDL_WRITE {
+
+ //
+ // This field is for the use of the Server to stash anything interesting
+ //
+
+ PVOID ServerContext;
+
+ //
+ // This is the resource to release when the write is complete.
+ //
+
+ PERESOURCE Resource;
+
+ //
+ // This is thread caller's thread, and the thread that must release
+ // the resource.
+ //
+
+ ERESOURCE_THREAD Thread;
+
+ //
+ // This links all the pending MDLs through the shared cache map.
+ //
+
+ LIST_ENTRY MdlLinks;
+
+} MDL_WRITE, *PMDL_WRITE;
+
+
+//
+// Common Private routine definitions for the Cache Manager
+//
+
+#define GetActiveVacb(SCM,IRQ,V,P,D) { \
+ ExAcquireFastLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \
+ (V) = (SCM)->ActiveVacb; \
+ if ((V) != NULL) { \
+ (P) = (SCM)->ActivePage; \
+ (SCM)->ActiveVacb = NULL; \
+ (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \
+ } \
+ ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \
+}
+
+#define GetActiveVacbAtDpcLevel(SCM,V,P,D) { \
+ ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \
+ (V) = (SCM)->ActiveVacb; \
+ if ((V) != NULL) { \
+ (P) = (SCM)->ActivePage; \
+ (SCM)->ActiveVacb = NULL; \
+ (D) = (SCM)->Flags & ACTIVE_PAGE_IS_DIRTY; \
+ } \
+ ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \
+}
+
+//
+// When setting dirty, when we set ACTIVE_PAGE_IS_DIRTY the first time,
+// we increment the dirty counts, and they never get decremented until
+// CcFreeActiveVacb. If we are trying to set and there is already an
+// active Vacb *or* we are trying to set a clean one and the flag above
+// is set, we do not allow it, and we just free the vacb (we only want
+// to handle the clean transition in one place).
+//
+// MP & UP cases are separately defined, because I do not trust the compiler
+// to otherwise generate the optimal UP code.
+//
+
+
+//
+// In the MP case, we test if we are setting the page dirty, because then
+// we must acquire CcMasterSpinLock to diddle CcDirtyPages.
+//
+
+#if !defined(NT_UP) \
+
+#define SetActiveVacb(SCM,IRQ,V,P,D) { \
+ if (D) { \
+ ExAcquireSpinLock(&CcMasterSpinLock, &(IRQ)); \
+ ExAcquireSpinLockAtDpcLevel(&(SCM)->ActiveVacbSpinLock); \
+ } else { \
+ ExAcquireSpinLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \
+ } \
+ do { \
+ if ((SCM)->ActiveVacb == NULL) { \
+ if (((SCM)->Flags & ACTIVE_PAGE_IS_DIRTY) != (D)) { \
+ if (D) { \
+ (SCM)->ActiveVacb = (V); \
+ (SCM)->ActivePage = (P); \
+ (V) = NULL; \
+ SetFlag((SCM)->Flags, ACTIVE_PAGE_IS_DIRTY); \
+ CcTotalDirtyPages += 1; \
+ (SCM)->DirtyPages += 1; \
+ if ((SCM)->DirtyPages == 1) { \
+ PLIST_ENTRY Blink; \
+ PLIST_ENTRY Entry; \
+ PLIST_ENTRY Flink; \
+ PLIST_ENTRY Head; \
+ Entry = &(SCM)->SharedCacheMapLinks; \
+ Blink = Entry->Blink; \
+ Flink = Entry->Flink; \
+ Blink->Flink = Flink; \
+ Flink->Blink = Blink; \
+ Head = &CcDirtySharedCacheMapList.SharedCacheMapLinks; \
+ Blink = Head->Blink; \
+ Entry->Flink = Head; \
+ Entry->Blink = Blink; \
+ Blink->Flink = Entry; \
+ Head->Blink = Entry; \
+ if (!LazyWriter.ScanActive) { \
+ LazyWriter.ScanActive = TRUE; \
+ ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \
+ ExReleaseSpinLock(&CcMasterSpinLock, (IRQ)); \
+ KeSetTimer( &LazyWriter.ScanTimer, \
+ CcFirstDelay, \
+ &LazyWriter.ScanDpc ); \
+ break; \
+ } \
+ } \
+ } \
+ } else { \
+ (SCM)->ActiveVacb = (V); \
+ (SCM)->ActivePage = (P); \
+ (V) = NULL; \
+ } \
+ } \
+ if (D) { \
+ ExReleaseSpinLockFromDpcLevel(&(SCM)->ActiveVacbSpinLock); \
+ ExReleaseSpinLock(&CcMasterSpinLock, (IRQ)); \
+ } else { \
+ ExReleaseSpinLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \
+ } \
+ if ((V) != NULL) { \
+ CcFreeActiveVacb( (SCM), (V), (P), (D)); \
+ } \
+ } while (FALSE); \
+}
+
+//
+// In the UP case, any FastLock will do, so we just use the ActiveVacb lock, and do not
+// explicitly acquire CcMasterSpinLock.
+//
+
+#else
+
+#define SetActiveVacb(SCM,IRQ,V,P,D) { \
+ ExAcquireFastLock(&(SCM)->ActiveVacbSpinLock, &(IRQ)); \
+ do { \
+ if ((SCM)->ActiveVacb == NULL) { \
+ if (((SCM)->Flags & ACTIVE_PAGE_IS_DIRTY) != (D)) { \
+ if (D) { \
+ (SCM)->ActiveVacb = (V); \
+ (SCM)->ActivePage = (P); \
+ (V) = NULL; \
+ SetFlag((SCM)->Flags, ACTIVE_PAGE_IS_DIRTY); \
+ CcTotalDirtyPages += 1; \
+ (SCM)->DirtyPages += 1; \
+ if ((SCM)->DirtyPages == 1) { \
+ PLIST_ENTRY Blink; \
+ PLIST_ENTRY Entry; \
+ PLIST_ENTRY Flink; \
+ PLIST_ENTRY Head; \
+ Entry = &(SCM)->SharedCacheMapLinks; \
+ Blink = Entry->Blink; \
+ Flink = Entry->Flink; \
+ Blink->Flink = Flink; \
+ Flink->Blink = Blink; \
+ Head = &CcDirtySharedCacheMapList.SharedCacheMapLinks; \
+ Blink = Head->Blink; \
+ Entry->Flink = Head; \
+ Entry->Blink = Blink; \
+ Blink->Flink = Entry; \
+ Head->Blink = Entry; \
+ if (!LazyWriter.ScanActive) { \
+ LazyWriter.ScanActive = TRUE; \
+ ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \
+ KeSetTimer( &LazyWriter.ScanTimer, \
+ CcFirstDelay, \
+ &LazyWriter.ScanDpc ); \
+ break; \
+ } \
+ } \
+ } \
+ } else { \
+ (SCM)->ActiveVacb = (V); \
+ (SCM)->ActivePage = (P); \
+ (V) = NULL; \
+ } \
+ } \
+ ExReleaseFastLock(&(SCM)->ActiveVacbSpinLock, (IRQ)); \
+ if ((V) != NULL) { \
+ CcFreeActiveVacb( (SCM), (V), (P), (D)); \
+ } \
+ } while (FALSE); \
+}
+
+#endif
+
+VOID
+CcPostDeferredWrites (
+ );
+
+BOOLEAN
+CcPinFileData (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN ReadOnly,
+ IN BOOLEAN WriteOnly,
+ IN BOOLEAN Wait,
+ OUT PBCB *Bcb,
+ OUT PVOID *BaseAddress,
+ OUT PLARGE_INTEGER BeyondLastByte
+ );
+
+typedef enum {
+ UNPIN,
+ SET_CLEAN
+} UNMAP_ACTIONS;
+
+VOID
+FASTCALL
+CcUnpinFileData (
+ IN OUT PBCB Bcb,
+ IN BOOLEAN ReadOnly,
+ IN UNMAP_ACTIONS UnmapAction
+ );
+
+VOID
+FASTCALL
+CcDeallocateBcb (
+ IN PBCB Bcb
+ );
+
+VOID
+FASTCALL
+CcPerformReadAhead (
+ IN PFILE_OBJECT FileObject
+ );
+
+VOID
+CcSetDirtyInMask (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length
+ );
+
+NTSTATUS
+FASTCALL
+CcWriteBehind (
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ );
+
+#define ZERO_FIRST_PAGE 1
+#define ZERO_MIDDLE_PAGES 2
+#define ZERO_LAST_PAGE 4
+
+BOOLEAN
+CcMapAndRead(
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN ULONG ZeroFlags,
+ IN BOOLEAN Wait,
+ OUT PVACB *Vacb,
+ OUT PVOID *BaseAddress
+ );
+
+VOID
+CcFreeActiveVacb (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PVACB ActiveVacb OPTIONAL,
+ IN ULONG ActivePage,
+ IN ULONG PageIsDirty
+ );
+
+VOID
+CcMapAndCopy(
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PVOID UserBuffer,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN ULONG ZeroFlags,
+ IN BOOLEAN WriteThrough
+ );
+
+VOID
+CcScanDpc (
+ IN PKDPC Dpc,
+ IN PVOID DeferredContext,
+ IN PVOID SystemArgument1,
+ IN PVOID SystemArgument2
+ );
+
+VOID
+CcScheduleLazyWriteScan (
+ );
+
+VOID
+CcStartLazyWriter (
+ IN PVOID NotUsed
+ );
+
+#define CcAllocateWorkQueueEntry() \
+ (PWORK_QUEUE_ENTRY)ExAllocateFromNPagedLookasideList(&CcTwilightLookasideList)
+
+#define CcFreeWorkQueueEntry(_entry_) \
+ ExFreeToNPagedLookasideList(&CcTwilightLookasideList, (_entry_))
+
+VOID
+FASTCALL
+CcPostWorkQueue (
+ IN PWORK_QUEUE_ENTRY WorkQueueEntry,
+ IN PLIST_ENTRY WorkQueue
+ );
+
+VOID
+CcWorkerThread (
+ PVOID ExWorkQueueItem
+ );
+
+VOID
+FASTCALL
+CcDeleteSharedCacheMap (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN KIRQL ListIrql,
+ IN ULONG ReleaseFile
+ );
+
+//
+// This exception filter handles STATUS_IN_PAGE_ERROR correctly
+//
+
+LONG
+CcCopyReadExceptionFilter(
+ IN PEXCEPTION_POINTERS ExceptionPointer,
+ IN PNTSTATUS ExceptionCode
+ );
+
+//
+// Exception filter for Worker Threads in lazyrite.c
+//
+
+LONG
+CcExceptionFilter (
+ IN NTSTATUS ExceptionCode
+ );
+
+#ifdef CCDBG
+VOID
+CcDump (
+ IN PVOID Ptr
+ );
+#endif
+
+//
+// Vacb routines
+//
+
+VOID
+CcInitializeVacbs(
+ );
+
+PVOID
+CcGetVirtualAddressIfMapped (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LONGLONG FileOffset,
+ OUT PVACB *Vacb,
+ OUT PULONG ReceivedLength
+ );
+
+PVOID
+CcGetVirtualAddress (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER FileOffset,
+ OUT PVACB *Vacb,
+ OUT PULONG ReceivedLength
+ );
+
+VOID
+FASTCALL
+CcFreeVirtualAddress (
+ IN PVACB Vacb
+ );
+
+VOID
+CcWaitOnActiveCount (
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ );
+
+VOID
+FASTCALL
+CcCreateVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER NewSectionSize
+ );
+
+VOID
+CcExtendVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER NewSectionSize
+ );
+
+BOOLEAN
+FASTCALL
+CcUnmapVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset OPTIONAL,
+ IN ULONG Length
+ );
+
+//
+// Define references to global data
+//
+
+extern KSPIN_LOCK CcMasterSpinLock;
+extern LIST_ENTRY CcCleanSharedCacheMapList;
+extern SHARED_CACHE_MAP_LIST_CURSOR CcDirtySharedCacheMapList;
+extern SHARED_CACHE_MAP_LIST_CURSOR CcLazyWriterCursor;
+extern NPAGED_LOOKASIDE_LIST CcTwilightLookasideList;
+extern KSPIN_LOCK CcWorkQueueSpinlock;
+extern ULONG CcNumberWorkerThreads;
+extern LIST_ENTRY CcIdleWorkerThreadList;
+extern LIST_ENTRY CcExpressWorkQueue;
+extern LIST_ENTRY CcRegularWorkQueue;
+extern LARGE_INTEGER CcNoDelay;
+extern LARGE_INTEGER CcFirstDelay;
+extern LARGE_INTEGER CcIdleDelay;
+extern LARGE_INTEGER CcCollisionDelay;
+extern LARGE_INTEGER CcTargetCleanDelay;
+extern LAZY_WRITER LazyWriter;
+extern KSPIN_LOCK CcVacbSpinLock;
+extern ULONG CcNumberVacbs;
+extern PVACB CcVacbs;
+extern PVACB CcBeyondVacbs;
+extern PVACB CcNextVictimVacb;
+extern KSPIN_LOCK CcDeferredWriteSpinLock;
+extern LIST_ENTRY CcDeferredWrites;
+extern ULONG CcDirtyPageThreshold;
+extern ULONG CcDirtyPageTarget;
+extern ULONG CcDirtyPagesLastScan;
+extern ULONG CcPagesYetToWrite;
+extern ULONG CcPagesWrittenLastTime;
+extern ULONG CcAvailablePagesThreshold;
+extern ULONG CcTotalDirtyPages;
+extern ULONG CcTune;
+extern ULONG CcLazyWriteHotSpots;
+extern MM_SYSTEMSIZE CcCapturedSystemSize;
+
+
+//
+// Here is a page of macros stolen directly from Pinball...
+//
+
+//
+// The following macros are used to establish the semantics needed
+// to do a return from within a try-finally clause. As a rule every
+// try clause must end with a label call try_exit. For example,
+//
+// try {
+// :
+// :
+//
+// try_exit: NOTHING;
+// } finally {
+//
+// :
+// :
+// }
+//
+// Every return statement executed inside of a try clause should use the
+// try_return macro. If the compiler fully supports the try-finally construct
+// then the macro should be
+//
+// #define try_return(S) { return(S); }
+//
+// If the compiler does not support the try-finally construct then the macro
+// should be
+//
+// #define try_return(S) { S; goto try_exit; }
+//
+
+#define try_return(S) { S; goto try_exit; }
+
+#ifdef CCDBG
+
+extern LONG CcDebugTraceLevel;
+extern LONG CcDebugTraceIndent;
+
+#ifndef CCDBG_LOCK
+
+#define DebugTrace(INDENT,LEVEL,X,Y) { \
+ LONG _i; \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ DbgPrint("%08lx:",_i); \
+ if ((INDENT) < 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ if (CcDebugTraceIndent < 0) { \
+ CcDebugTraceIndent = 0; \
+ } \
+ for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
+ DbgPrint(" "); \
+ } \
+ DbgPrint(X,Y); \
+ if ((INDENT) > 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ } \
+}
+
+#define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
+ LONG _i; \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ DbgPrint("%08lx:",_i); \
+ if ((INDENT) < 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ if (CcDebugTraceIndent < 0) { \
+ CcDebugTraceIndent = 0; \
+ } \
+ for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
+ DbgPrint(" "); \
+ } \
+ DbgPrint(X,Y,Z); \
+ if ((INDENT) > 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ } \
+}
+
+#define DebugDump(STR,LEVEL,PTR) { \
+ LONG _i; \
+ VOID CcDump(); \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ DbgPrint("%08lx:",_i); \
+ DbgPrint(STR); \
+ if (PTR != NULL) {CcDump(PTR);} \
+ DbgBreakPoint(); \
+ } \
+}
+
+#else // ndef CCDBG_LOCK
+
+extern KSPIN_LOCK CcDebugTraceLock;
+
+#define DebugTrace(INDENT,LEVEL,X,Y) { \
+ LONG _i; \
+ KIRQL _oldIrql; \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
+ DbgPrint("%08lx:",_i); \
+ if ((INDENT) < 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ if (CcDebugTraceIndent < 0) { \
+ CcDebugTraceIndent = 0; \
+ } \
+ for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
+ DbgPrint(" "); \
+ } \
+ DbgPrint(X,Y); \
+ if ((INDENT) > 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
+ } \
+}
+
+#define DebugTrace2(INDENT,LEVEL,X,Y,Z) { \
+ LONG _i; \
+ KIRQL _oldIrql; \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
+ DbgPrint("%08lx:",_i); \
+ if ((INDENT) < 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ if (CcDebugTraceIndent < 0) { \
+ CcDebugTraceIndent = 0; \
+ } \
+ for (_i=0; _i<CcDebugTraceIndent; _i+=1) { \
+ DbgPrint(" "); \
+ } \
+ DbgPrint(X,Y,Z); \
+ if ((INDENT) > 0) { \
+ CcDebugTraceIndent += (INDENT); \
+ } \
+ ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
+ } \
+}
+
+#define DebugDump(STR,LEVEL,PTR) { \
+ LONG _i; \
+ KIRQL _oldIrql; \
+ VOID CcDump(); \
+ if (((LEVEL) == 0) || (CcDebugTraceLevel & (LEVEL))) { \
+ _i = (ULONG)PsGetCurrentThread(); \
+ ExAcquireSpinLock( &CcDebugTraceLock, &_oldIrql ); \
+ DbgPrint("%08lx:",_i); \
+ DbgPrint(STR); \
+ if (PTR != NULL) {CcDump(PTR);} \
+ DbgBreakPoint(); \
+ ExReleaseSpinLock( &CcDebugTraceLock, _oldIrql ); \
+ } \
+}
+
+#endif // else ndef CCDBG_LOCK
+
+#else
+
+#undef CCDBG_LOCK
+
+#define DebugTrace(INDENT,LEVEL,X,Y) {NOTHING;}
+
+#define DebugTrace2(INDENT,LEVEL,X,Y,Z) {NOTHING;}
+
+#define DebugDump(STR,LEVEL,PTR) {NOTHING;}
+
+#endif // CCDBG
+
+//
+// Global list of pinned Bcbs which may be examined for debug purposes
+//
+
+#if DBG
+
+extern ULONG CcBcbCount;
+extern LIST_ENTRY CcBcbList;
+extern KSPIN_LOCK CcBcbSpinLock;
+
+#endif
+
+#endif // _CCh_
diff --git a/private/ntos/cache/copysup.c b/private/ntos/cache/copysup.c
new file mode 100644
index 000000000..e462014b8
--- /dev/null
+++ b/private/ntos/cache/copysup.c
@@ -0,0 +1,2117 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ copysup.c
+
+Abstract:
+
+ This module implements the copy support routines for the Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Define our debug constant
+//
+
+#define me 0x00000004
+
+
+BOOLEAN
+CcCopyRead (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Wait,
+ OUT PVOID Buffer,
+ OUT PIO_STATUS_BLOCK IoStatus
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to copy the specified file data from the cache
+ into the output buffer, and deliver the correct I/O status. It is *not*
+ safe to call this routine from Dpc level.
+
+ If the caller does not want to block (such as for disk I/O), then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to supply all of the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ data is immediately accessible in the cache and no blocking is
+ required, this routine copies the data and returns TRUE.
+
+ If the caller supplies Wait as TRUE, then this routine is guaranteed
+ to copy the data and return TRUE. If the data is immediately
+ accessible in the cache, then no blocking will occur. Otherwise,
+ the the data transfer from the file into the cache will be initiated,
+ and the caller will be blocked until the data can be returned.
+
+ File system Fsd's should typically supply Wait = TRUE if they are
+ processing a synchronous I/O requests, or Wait = FALSE if they are
+ processing an asynchronous request.
+
+ File system or Server Fsp threads should supply Wait = TRUE.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+ Buffer - Pointer to output buffer to which data should be copied.
+
+ IoStatus - Pointer to standard I/O status block to receive the status
+ for the transfer. (STATUS_SUCCESS guaranteed for cache
+ hits, otherwise the actual I/O status is returned.)
+
+ Note that even if FALSE is returned, the IoStatus.Information
+ field will return the count of any bytes successfully
+ transferred before a blocking condition occured. The caller
+ may either choose to ignore this information, or resume
+ the copy later accounting for bytes transferred.
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not delivered
+
+ TRUE - if the data is being delivered
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ PVACB Vacb;
+ PBCB Bcb;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ ULONG SavedState;
+ KIRQL OldIrql;
+ NTSTATUS Status;
+ ULONG OriginalLength = Length;
+ ULONG PageCount = COMPUTE_PAGES_SPANNED(((PVOID)FileOffset->LowPart), Length);
+ PETHREAD Thread = PsGetCurrentThread();
+ BOOLEAN GotAMiss = FALSE;
+
+ DebugTrace(+1, me, "CcCopyRead\n", 0 );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+ //
+ // Get pointer to shared and private cache maps
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // Check for read past file size, the caller must filter this case out.
+ //
+
+ ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length) <= SharedCacheMap->FileSize.QuadPart );
+
+ //
+ // If read ahead is enabled, then do the read ahead here so it
+ // overlaps with the copy (otherwise we will do it below).
+ // Note that we are assuming that we will not get ahead of our
+ // current transfer - if read ahead is working it should either
+ // already be in memory or else underway.
+ //
+
+ if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) {
+ CcScheduleReadAhead( FileObject, FileOffset, Length );
+ }
+
+ FOffset = *FileOffset;
+
+ //
+ // Increment performance counters
+ //
+
+ if (Wait) {
+ HOT_STATISTIC(CcCopyReadWait) += 1;
+
+ //
+ // This is not an exact solution, but when IoPageRead gets a miss,
+ // it cannot tell whether it was CcCopyRead or CcMdlRead, but since
+ // the miss should occur very soon, by loading the pointer here
+ // probably the right counter will get incremented, and in any case,
+ // we hope the errrors average out!
+ //
+
+ CcMissCounter = &CcCopyReadWaitMiss;
+
+ } else {
+ HOT_STATISTIC(CcCopyReadNoWait) += 1;
+ }
+
+ //
+ // See if we have an active Vacb, that we can just copy to.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ if (ActiveVacb != NULL) {
+
+ if ((ULONG)(FOffset.QuadPart >> VACB_OFFSET_SHIFT) == (ActivePage >> (VACB_OFFSET_SHIFT - PAGE_SHIFT))) {
+
+ ULONG LengthToCopy = VACB_MAPPING_GRANULARITY - (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1));
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+
+ PVOID NeedToZero;
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ //
+ // Note that the NeedToZero could be cleared, since we
+ // tested it without the spinlock.
+ //
+
+ NeedToZero = SharedCacheMap->NeedToZero;
+ if (NeedToZero != NULL) {
+
+ RtlZeroMemory( NeedToZero, PAGE_SIZE - ((((ULONG)NeedToZero - 1) & (PAGE_SIZE - 1)) + 1) );
+ SharedCacheMap->NeedToZero = NULL;
+ }
+
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+
+ if (NeedToZero != NULL) {
+ MmUnlockCachedPage( (PVOID)((PCHAR)NeedToZero - 1) );
+ }
+ }
+
+ //
+ // Reduce LengthToCopy if it is greater than our caller's length.
+ //
+
+ if (LengthToCopy > Length) {
+ LengthToCopy = Length;
+ }
+
+ //
+ // Copy the data to the user buffer.
+ //
+
+ try {
+
+ MmSetPageFaultReadAhead( Thread, PageCount - 1 );
+ RtlCopyBytes( Buffer,
+ (PVOID)((PCHAR)ActiveVacb->BaseAddress +
+ (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1))),
+ LengthToCopy );
+
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Now adjust FOffset and Length by what we copied.
+ //
+
+ Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy);
+ FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)LengthToCopy;
+ Length -= LengthToCopy;
+
+ }
+
+ //
+ // If that was all the data, then remember the Vacb
+ //
+
+ if (Length == 0) {
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Otherwise we must free it because we will map other vacbs below.
+ //
+
+ } else {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+ }
+
+ //
+ // Not all of the transfer will come back at once, so we have to loop
+ // until the entire transfer is complete.
+ //
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ LARGE_INTEGER BeyondLastByte;
+
+ //
+ // Call local routine to Map or Access the file data, then move the data,
+ // then call another local routine to free the data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+ // Note that this call may result in an exception, however, if it
+ // does no Bcb is returned and this routine has absolutely no
+ // cleanup to perform. Therefore, we do not have a try-finally
+ // and we allow the possibility that we will simply be unwound
+ // without notice.
+ //
+
+ if (Wait) {
+
+ CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
+ FOffset,
+ &Vacb,
+ &ReceivedLength );
+
+ BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength;
+
+ } else if (!CcPinFileData( FileObject,
+ &FOffset,
+ Length,
+ TRUE,
+ FALSE,
+ FALSE,
+ &Bcb,
+ &CacheBuffer,
+ &BeyondLastByte )) {
+
+ DebugTrace(-1, me, "CcCopyRead -> FALSE\n", 0 );
+
+ HOT_STATISTIC(CcCopyReadNoWaitMiss) += 1;
+
+ //
+ // Enable ReadAhead if we missed.
+ //
+
+ PrivateCacheMap->ReadAheadEnabled = TRUE;
+
+ return FALSE;
+
+ } else {
+
+ //
+ // Calculate how much data is described by Bcb starting at our desired
+ // file offset.
+ //
+
+ ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart);
+ }
+
+ //
+ // If we got more than we need, make sure to only transfer
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ //
+ // It is possible for the user buffer to become no longer accessible
+ // since it was last checked by the I/O system. If we fail to access
+ // the buffer we must raise a status that the caller's exception
+ // filter considers as "expected". Also we unmap the Bcb here, since
+ // we otherwise would have no other reason to put a try-finally around
+ // this loop.
+ //
+
+ try {
+
+ ULONG PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer,
+ ReceivedLength ) - 1;
+
+ //
+ // We know exactly how much we want to read here, and we do not
+ // want to read any more in case the caller is doing random access.
+ // Our read ahead logic takes care of detecting sequential reads,
+ // and tends to do large asynchronous read aheads. So far we have
+ // only mapped the data and we have not forced any in. What we
+ // do now is get into a loop where we copy a page at a time and
+ // just prior to each move, we tell MM how many additional pages
+ // we would like to have read in, in the event that we take a
+ // fault. With this strategy, for cache hits we never make a single
+ // expensive call to MM to guarantee that the data is in, yet if we
+ // do take a fault, we are guaranteed to only take one fault because
+ // we will read all of the data in for the rest of the transfer.
+ //
+ // We test first for the multiple page case, to keep the small
+ // reads faster.
+ //
+
+ if (PagesToGo != 0) {
+
+ ULONG MoveLength;
+ ULONG LengthToGo = ReceivedLength;
+
+ while (LengthToGo != 0) {
+
+ MoveLength = (PCHAR)(ROUND_TO_PAGES(((PCHAR)CacheBuffer + 1))) -
+ (PCHAR)CacheBuffer;
+
+ if (MoveLength > LengthToGo) {
+ MoveLength = LengthToGo;
+ }
+
+ //
+ // Here's hoping that it is cheaper to call Mm to see if
+ // the page is valid. If not let Mm know how many pages
+ // we are after before doing the move.
+ //
+
+ MmSetPageFaultReadAhead( Thread, PagesToGo );
+ GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE );
+
+ RtlCopyBytes( Buffer, CacheBuffer, MoveLength );
+
+ PagesToGo -= 1;
+
+ LengthToGo -= MoveLength;
+ Buffer = (PCHAR)Buffer + MoveLength;
+ CacheBuffer = (PCHAR)CacheBuffer + MoveLength;
+ }
+
+ //
+ // Handle the read here that stays on a single page.
+ //
+
+ } else {
+
+ //
+ // Here's hoping that it is cheaper to call Mm to see if
+ // the page is valid. If not let Mm know how many pages
+ // we are after before doing the move.
+ //
+
+ MmSetPageFaultReadAhead( Thread, 0 );
+ GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE );
+
+ RtlCopyBytes( Buffer, CacheBuffer, ReceivedLength );
+
+ Buffer = (PCHAR)Buffer + ReceivedLength;
+ }
+
+ }
+ except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // If we get an exception, then we have to renable page fault
+ // clustering and unmap on the way out.
+ //
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+
+ if (Wait) {
+ CcFreeVirtualAddress( Vacb );
+ } else {
+ CcUnpinFileData( Bcb, TRUE, UNPIN );
+ }
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Update number of bytes transferred.
+ //
+
+ Length -= ReceivedLength;
+
+ //
+ // Unmap the data now, and calculate length left to transfer.
+ //
+
+ if (Wait) {
+
+ //
+ // If there is more to go, just free this vacb.
+ //
+
+ if (Length != 0) {
+
+ CcFreeVirtualAddress( Vacb );
+
+ //
+ // Otherwise save it for the next time through.
+ //
+
+ } else {
+
+ SetActiveVacb( SharedCacheMap, OldIrql, Vacb, (ULONG)(FOffset.QuadPart >> PAGE_SHIFT), 0 );
+ break;
+ }
+
+ } else {
+ CcUnpinFileData( Bcb, TRUE, UNPIN );
+ }
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data.
+ //
+
+ FOffset = BeyondLastByte;
+ }
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // Now enable read ahead if it looks like we got any misses, and do
+ // the first one.
+ //
+
+ if (GotAMiss && !PrivateCacheMap->ReadAheadEnabled) {
+
+ PrivateCacheMap->ReadAheadEnabled = TRUE;
+ CcScheduleReadAhead( FileObject, FileOffset, OriginalLength );
+ }
+
+ //
+ // Now that we have described our desired read ahead, let's
+ // shift the read history down.
+ //
+
+ PrivateCacheMap->FileOffset1 = PrivateCacheMap->FileOffset2;
+ PrivateCacheMap->BeyondLastByte1 = PrivateCacheMap->BeyondLastByte2;
+ PrivateCacheMap->FileOffset2 = *FileOffset;
+ PrivateCacheMap->BeyondLastByte2.QuadPart =
+ FileOffset->QuadPart + (LONGLONG)OriginalLength;
+
+ IoStatus->Status = STATUS_SUCCESS;
+ IoStatus->Information = OriginalLength;
+
+ DebugTrace(-1, me, "CcCopyRead -> TRUE\n", 0 );
+
+ return TRUE;
+}
+
+
+VOID
+CcFastCopyRead (
+ IN PFILE_OBJECT FileObject,
+ IN ULONG FileOffset,
+ IN ULONG Length,
+ IN ULONG PageCount,
+ OUT PVOID Buffer,
+ OUT PIO_STATUS_BLOCK IoStatus
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to copy the specified file data from the cache
+ into the output buffer, and deliver the correct I/O status.
+
+ This is a faster version of CcCopyRead which only supports 32-bit file
+ offsets and synchronicity (Wait = TRUE).
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ PageCount - Number of pages spanned by the read.
+
+ Buffer - Pointer to output buffer to which data should be copied.
+
+ IoStatus - Pointer to standard I/O status block to receive the status
+ for the transfer. (STATUS_SUCCESS guaranteed for cache
+ hits, otherwise the actual I/O status is returned.)
+
+ Note that even if FALSE is returned, the IoStatus.Information
+ field will return the count of any bytes successfully
+ transferred before a blocking condition occured. The caller
+ may either choose to ignore this information, or resume
+ the copy later accounting for bytes transferred.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ PVACB Vacb;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ ULONG SavedState;
+ KIRQL OldIrql;
+ NTSTATUS Status;
+ LARGE_INTEGER OriginalOffset;
+ ULONG OriginalLength = Length;
+ PETHREAD Thread = PsGetCurrentThread();
+ BOOLEAN GotAMiss = FALSE;
+
+ DebugTrace(+1, me, "CcFastCopyRead\n", 0 );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+ //
+ // Get pointer to shared and private cache maps
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // Check for read past file size, the caller must filter this case out.
+ //
+
+ ASSERT( (FileOffset + Length) <= SharedCacheMap->FileSize.LowPart );
+
+ //
+ // If read ahead is enabled, then do the read ahead here so it
+ // overlaps with the copy (otherwise we will do it below).
+ // Note that we are assuming that we will not get ahead of our
+ // current transfer - if read ahead is working it should either
+ // already be in memory or else underway.
+ //
+
+ OriginalOffset.LowPart = FileOffset;
+ OriginalOffset.HighPart = 0;
+
+ if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) {
+ CcScheduleReadAhead( FileObject, &OriginalOffset, Length );
+ }
+
+ //
+ // This is not an exact solution, but when IoPageRead gets a miss,
+ // it cannot tell whether it was CcCopyRead or CcMdlRead, but since
+ // the miss should occur very soon, by loading the pointer here
+ // probably the right counter will get incremented, and in any case,
+ // we hope the errrors average out!
+ //
+
+ CcMissCounter = &CcCopyReadWaitMiss;
+
+ //
+ // Increment performance counters
+ //
+
+ HOT_STATISTIC(CcCopyReadWait) += 1;
+
+ //
+ // See if we have an active Vacb, that we can just copy to.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ if (ActiveVacb != NULL) {
+
+ if ((FileOffset >> VACB_OFFSET_SHIFT) == (ActivePage >> (VACB_OFFSET_SHIFT - PAGE_SHIFT))) {
+
+ ULONG LengthToCopy = VACB_MAPPING_GRANULARITY - (FileOffset & (VACB_MAPPING_GRANULARITY - 1));
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+
+ PVOID NeedToZero;
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ //
+ // Note that the NeedToZero could be cleared, since we
+ // tested it without the spinlock.
+ //
+
+ NeedToZero = SharedCacheMap->NeedToZero;
+ if (NeedToZero != NULL) {
+
+ RtlZeroMemory( NeedToZero, PAGE_SIZE - ((((ULONG)NeedToZero - 1) & (PAGE_SIZE - 1)) + 1) );
+ SharedCacheMap->NeedToZero = NULL;
+ }
+
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+
+ if (NeedToZero != NULL) {
+ MmUnlockCachedPage( (PVOID)((PCHAR)NeedToZero - 1) );
+ }
+ }
+
+ //
+ // Reduce LengthToCopy if it is greater than our caller's length.
+ //
+
+ if (LengthToCopy > Length) {
+ LengthToCopy = Length;
+ }
+
+ //
+ // Copy the data to the user buffer.
+ //
+
+ try {
+
+ MmSetPageFaultReadAhead( Thread, PageCount - 1 );
+ RtlCopyBytes( Buffer,
+ (PVOID)((PCHAR)ActiveVacb->BaseAddress +
+ (FileOffset & (VACB_MAPPING_GRANULARITY - 1))),
+ LengthToCopy );
+
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Now adjust FileOffset and Length by what we copied.
+ //
+
+ Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy);
+ FileOffset += LengthToCopy;
+ Length -= LengthToCopy;
+ }
+
+ //
+ // If that was all the data, then remember the Vacb
+ //
+
+ if (Length == 0) {
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Otherwise we must free it because we will map other vacbs below.
+ //
+
+ } else {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+ }
+
+ //
+ // Not all of the transfer will come back at once, so we have to loop
+ // until the entire transfer is complete.
+ //
+
+ FOffset.HighPart = 0;
+ FOffset.LowPart = FileOffset;
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ ULONG BeyondLastByte;
+
+ //
+ // Call local routine to Map or Access the file data, then move the data,
+ // then call another local routine to free the data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+ // Note that this call may result in an exception, however, if it
+ // does no Bcb is returned and this routine has absolutely no
+ // cleanup to perform. Therefore, we do not have a try-finally
+ // and we allow the possibility that we will simply be unwound
+ // without notice.
+ //
+
+ CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
+ FOffset,
+ &Vacb,
+ &ReceivedLength );
+
+ BeyondLastByte = FOffset.LowPart + ReceivedLength;
+
+ //
+ // If we got more than we need, make sure to only transfer
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ //
+ // It is possible for the user buffer to become no longer accessible
+ // since it was last checked by the I/O system. If we fail to access
+ // the buffer we must raise a status that the caller's exception
+ // filter considers as "expected". Also we unmap the Bcb here, since
+ // we otherwise would have no other reason to put a try-finally around
+ // this loop.
+ //
+
+ try {
+
+ ULONG PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer,
+ ReceivedLength ) - 1;
+
+ //
+ // We know exactly how much we want to read here, and we do not
+ // want to read any more in case the caller is doing random access.
+ // Our read ahead logic takes care of detecting sequential reads,
+ // and tends to do large asynchronous read aheads. So far we have
+ // only mapped the data and we have not forced any in. What we
+ // do now is get into a loop where we copy a page at a time and
+ // just prior to each move, we tell MM how many additional pages
+ // we would like to have read in, in the event that we take a
+ // fault. With this strategy, for cache hits we never make a single
+ // expensive call to MM to guarantee that the data is in, yet if we
+ // do take a fault, we are guaranteed to only take one fault because
+ // we will read all of the data in for the rest of the transfer.
+ //
+ // We test first for the multiple page case, to keep the small
+ // reads faster.
+ //
+
+ if (PagesToGo != 0) {
+
+ ULONG MoveLength;
+ ULONG LengthToGo = ReceivedLength;
+
+ while (LengthToGo != 0) {
+
+ MoveLength = (PCHAR)(ROUND_TO_PAGES(((PCHAR)CacheBuffer + 1))) -
+ (PCHAR)CacheBuffer;
+
+ if (MoveLength > LengthToGo) {
+ MoveLength = LengthToGo;
+ }
+
+ //
+ // Here's hoping that it is cheaper to call Mm to see if
+ // the page is valid. If not let Mm know how many pages
+ // we are after before doing the move.
+ //
+
+ MmSetPageFaultReadAhead( Thread, PagesToGo );
+ GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE );
+
+ RtlCopyBytes( Buffer, CacheBuffer, MoveLength );
+
+ PagesToGo -= 1;
+
+ LengthToGo -= MoveLength;
+ Buffer = (PCHAR)Buffer + MoveLength;
+ CacheBuffer = (PCHAR)CacheBuffer + MoveLength;
+ }
+
+ //
+ // Handle the read here that stays on a single page.
+ //
+
+ } else {
+
+ //
+ // Here's hoping that it is cheaper to call Mm to see if
+ // the page is valid. If not let Mm know how many pages
+ // we are after before doing the move.
+ //
+
+ MmSetPageFaultReadAhead( Thread, 0 );
+ GotAMiss = (BOOLEAN)!MmCheckCachedPageState( CacheBuffer, FALSE );
+
+ RtlCopyBytes( Buffer, CacheBuffer, ReceivedLength );
+
+ Buffer = (PCHAR)Buffer + ReceivedLength;
+ }
+ }
+ except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // If we get an exception, then we have to renable page fault
+ // clustering and unmap on the way out.
+ //
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+
+ CcFreeVirtualAddress( Vacb );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Update number of bytes transferred.
+ //
+
+ Length -= ReceivedLength;
+
+ //
+ // Unmap the data now, and calculate length left to transfer.
+ //
+
+ if (Length != 0) {
+
+ //
+ // If there is more to go, just free this vacb.
+ //
+
+ CcFreeVirtualAddress( Vacb );
+
+ } else {
+
+ //
+ // Otherwise save it for the next time through.
+ //
+
+ SetActiveVacb( SharedCacheMap, OldIrql, Vacb, (FOffset.LowPart >> PAGE_SHIFT), 0 );
+ break;
+ }
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data.
+ //
+
+ FOffset.LowPart = BeyondLastByte;
+ }
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // Now enable read ahead if it looks like we got any misses, and do
+ // the first one.
+ //
+
+ if (GotAMiss && !PrivateCacheMap->ReadAheadEnabled) {
+
+ PrivateCacheMap->ReadAheadEnabled = TRUE;
+ CcScheduleReadAhead( FileObject, &OriginalOffset, OriginalLength );
+ }
+
+ //
+ // Now that we have described our desired read ahead, let's
+ // shift the read history down.
+ //
+
+ PrivateCacheMap->FileOffset1.LowPart = PrivateCacheMap->FileOffset2.LowPart;
+ PrivateCacheMap->BeyondLastByte1.LowPart = PrivateCacheMap->BeyondLastByte2.LowPart;
+ PrivateCacheMap->FileOffset2.LowPart = OriginalOffset.LowPart;
+ PrivateCacheMap->BeyondLastByte2.LowPart = OriginalOffset.LowPart + OriginalLength;
+
+ IoStatus->Status = STATUS_SUCCESS;
+ IoStatus->Information = OriginalLength;
+
+ DebugTrace(-1, me, "CcFastCopyRead -> VOID\n", 0 );
+}
+
+
+BOOLEAN
+CcCopyWrite (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Wait,
+ IN PVOID Buffer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to copy the specified file data from the specified
+ buffer into the Cache, and deliver the correct I/O status. It is *not*
+ safe to call this routine from Dpc level.
+
+ If the caller does not want to block (such as for disk I/O), then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to receive all of the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ correct space is immediately accessible in the cache and no blocking is
+ required, this routine copies the data and returns TRUE.
+
+ If the caller supplies Wait as TRUE, then this routine is guaranteed
+ to copy the data and return TRUE. If the correct space is immediately
+ accessible in the cache, then no blocking will occur. Otherwise,
+ the necessary work will be initiated to read and/or free cache data,
+ and the caller will be blocked until the data can be received.
+
+ File system Fsd's should typically supply Wait = TRUE if they are
+ processing a synchronous I/O requests, or Wait = FALSE if they are
+ processing an asynchronous request.
+
+ File system or Server Fsp threads should supply Wait = TRUE.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file to receive the data.
+
+ Length - Length of data in bytes.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+ Buffer - Pointer to input buffer from which data should be copied.
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not copied.
+
+ TRUE - if the data has been copied.
+
+Raises:
+
+ STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
+ This can only occur if Wait was specified as TRUE. (If Wait is
+ specified as FALSE, and an allocation failure occurs, this
+ routine simply returns FALSE.)
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ PVOID ActiveAddress;
+ ULONG PageIsDirty;
+ KIRQL OldIrql;
+ NTSTATUS Status;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ PBCB Bcb;
+ ULONG ZeroFlags;
+ LARGE_INTEGER Temp;
+
+ DebugTrace(+1, me, "CcCopyWrite\n", 0 );
+
+ //
+ // If the caller specified Wait == FALSE, but the FileObject is WriteThrough,
+ // then we need to just get out.
+ //
+
+ if ((FileObject->Flags & FO_WRITE_THROUGH) && !Wait) {
+
+ DebugTrace(-1, me, "CcCopyWrite->FALSE (WriteThrough && !Wait)\n", 0 );
+
+ return FALSE;
+ }
+
+ //
+ // Get pointer to shared cache map
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+ FOffset = *FileOffset;
+
+ //
+ // See if we have an active Vacb, that we can just copy to.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ if (ActiveVacb != NULL) {
+
+ //
+ // See if the request starts in the ActivePage. WriteThrough requests must
+ // go the longer route through CcMapAndCopy, where WriteThrough flushes are
+ // implemented.
+ //
+
+ if (((ULONG)(FOffset.QuadPart >> PAGE_SHIFT) == ActivePage) && (Length != 0) &&
+ !FlagOn( FileObject->Flags, FO_WRITE_THROUGH )) {
+
+ ULONG LengthToCopy = PAGE_SIZE - (FOffset.LowPart & (PAGE_SIZE - 1));
+
+ //
+ // Reduce LengthToCopy if it is greater than our caller's length.
+ //
+
+ if (LengthToCopy > Length) {
+ LengthToCopy = Length;
+ }
+
+ //
+ // Copy the data to the user buffer.
+ //
+
+ try {
+
+ //
+ // If we are copying to a page that is locked down, then
+ // we have to do it under our spinlock, and update the
+ // NeedToZero field.
+ //
+
+ OldIrql = 0xFF;
+
+ CacheBuffer = (PVOID)((PCHAR)ActiveVacb->BaseAddress +
+ (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)));
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+
+ //
+ // The FastLock may not write our "flag".
+ //
+
+ OldIrql = 0;
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ //
+ // Note that the NeedToZero could be cleared, since we
+ // tested it without the spinlock.
+ //
+
+ ActiveAddress = SharedCacheMap->NeedToZero;
+ if ((ActiveAddress != NULL) &&
+ (((PCHAR)CacheBuffer + LengthToCopy) > (PCHAR)ActiveAddress)) {
+
+ //
+ // If we are skipping some bytes in the page, then we need
+ // to zero them.
+ //
+
+ if ((PCHAR)CacheBuffer > (PCHAR)ActiveAddress) {
+
+ RtlZeroMemory( ActiveAddress, (PCHAR)CacheBuffer - (PCHAR)ActiveAddress );
+ }
+ SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + LengthToCopy);
+ }
+
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+ }
+
+ RtlCopyBytes( CacheBuffer, Buffer, LengthToCopy );
+
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ //
+ // If we failed to overwrite the uninitialized data,
+ // zero it now (we cannot safely restore NeedToZero).
+ //
+
+ if (OldIrql != 0xFF) {
+ RtlZeroBytes( CacheBuffer, LengthToCopy );
+ }
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Now adjust FOffset and Length by what we copied.
+ //
+
+ Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy);
+ FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)LengthToCopy;
+ Length -= LengthToCopy;
+
+ //
+ // If that was all the data, then get outski...
+ //
+
+ if (Length == 0) {
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY );
+ return TRUE;
+ }
+
+ //
+ // Remember that the page is dirty now.
+ //
+
+ PageIsDirty |= ACTIVE_PAGE_IS_DIRTY;
+ }
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Else someone else could have the active page, and may want to zero
+ // the range we plan to write!
+ //
+
+ } else if (SharedCacheMap->NeedToZero != NULL) {
+
+ CcFreeActiveVacb( SharedCacheMap, NULL, 0, FALSE );
+ }
+
+ //
+ // At this point we can calculate the ZeroFlags.
+ //
+
+ //
+ // We can always zero middle pages, if any.
+ //
+
+ ZeroFlags = ZERO_MIDDLE_PAGES;
+
+ if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) &&
+ (Length >= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_FIRST_PAGE;
+ }
+
+ if (((FOffset.LowPart + Length) & (PAGE_SIZE - 1)) == 0) {
+ ZeroFlags |= ZERO_LAST_PAGE;
+ }
+
+ Temp = FOffset;
+ Temp.LowPart &= ~(PAGE_SIZE -1);
+ Temp.QuadPart = ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.QuadPart -
+ Temp.QuadPart;
+
+ if (Temp.QuadPart <= 0) {
+ ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ } else if ((Temp.HighPart == 0) && (Temp.LowPart <= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ }
+
+ //
+ // Call a routine to map and copy the data in Mm and get out.
+ //
+
+ if (Wait) {
+
+ CcMapAndCopy( SharedCacheMap,
+ Buffer,
+ &FOffset,
+ Length,
+ ZeroFlags,
+ BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH ));
+
+ return TRUE;
+ }
+
+ //
+ // The rest of this routine is the Wait == FALSE case.
+ //
+ // Not all of the transfer will come back at once, so we have to loop
+ // until the entire transfer is complete.
+ //
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ LARGE_INTEGER BeyondLastByte;
+
+ if (!CcPinFileData( FileObject,
+ &FOffset,
+ Length,
+ FALSE,
+ TRUE,
+ FALSE,
+ &Bcb,
+ &CacheBuffer,
+ &BeyondLastByte )) {
+
+ DebugTrace(-1, me, "CcCopyWrite -> FALSE\n", 0 );
+
+ return FALSE;
+
+ } else {
+
+ //
+ // Calculate how much data is described by Bcb starting at our desired
+ // file offset.
+ //
+
+ ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart);
+
+ //
+ // If we got more than we need, make sure to only transfer
+ // the right amount.
+ //
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+ }
+
+ //
+ // It is possible for the user buffer to become no longer accessible
+ // since it was last checked by the I/O system. If we fail to access
+ // the buffer we must raise a status that the caller's exception
+ // filter considers as "expected". Also we unmap the Bcb here, since
+ // we otherwise would have no other reason to put a try-finally around
+ // this loop.
+ //
+
+ try {
+
+ RtlCopyBytes( CacheBuffer, Buffer, ReceivedLength );
+
+ CcSetDirtyPinnedData( Bcb, NULL );
+ CcUnpinFileData( Bcb, FALSE, UNPIN );
+ }
+ except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ CcUnpinFileData( Bcb, TRUE, UNPIN );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+
+ ExRaiseStatus(FsRtlNormalizeNtstatus( Status, STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data and adjust the Buffer and Length.
+ //
+
+ FOffset = BeyondLastByte;
+ Buffer = (PCHAR)Buffer + ReceivedLength;
+ Length -= ReceivedLength;
+ }
+
+ DebugTrace(-1, me, "CcCopyWrite -> TRUE\n", 0 );
+
+ return TRUE;
+}
+
+
+VOID
+CcFastCopyWrite (
+ IN PFILE_OBJECT FileObject,
+ IN ULONG FileOffset,
+ IN ULONG Length,
+ IN PVOID Buffer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to copy the specified file data from the specified
+ buffer into the Cache, and deliver the correct I/O status.
+
+ This is a faster version of CcCopyWrite which only supports 32-bit file
+ offsets and synchronicity (Wait = TRUE) and no Write Through.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file to receive the data.
+
+ Length - Length of data in bytes.
+
+ Buffer - Pointer to input buffer from which data should be copied.
+
+Return Value:
+
+ None
+
+Raises:
+
+ STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
+ This can only occur if Wait was specified as TRUE. (If Wait is
+ specified as FALSE, and an allocation failure occurs, this
+ routine simply returns FALSE.)
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVOID CacheBuffer;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ PVOID ActiveAddress;
+ ULONG PageIsDirty;
+ KIRQL OldIrql;
+ NTSTATUS Status;
+ ULONG ZeroFlags;
+ ULONG ValidDataLength;
+ LARGE_INTEGER FOffset;
+
+ DebugTrace(+1, me, "CcFastCopyWrite\n", 0 );
+
+ //
+ // Get pointer to shared cache map and a copy of valid data length
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // See if we have an active Vacb, that we can just copy to.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ if (ActiveVacb != NULL) {
+
+ //
+ // See if the request starts in the ActivePage. WriteThrough requests must
+ // go the longer route through CcMapAndCopy, where WriteThrough flushes are
+ // implemented.
+ //
+
+ if (((FileOffset >> PAGE_SHIFT) == ActivePage) && (Length != 0) &&
+ !FlagOn( FileObject->Flags, FO_WRITE_THROUGH )) {
+
+ ULONG LengthToCopy = PAGE_SIZE - (FileOffset & (PAGE_SIZE - 1));
+
+ //
+ // Reduce LengthToCopy if it is greater than our caller's length.
+ //
+
+ if (LengthToCopy > Length) {
+ LengthToCopy = Length;
+ }
+
+ //
+ // Copy the data to the user buffer.
+ //
+
+ try {
+
+ //
+ // If we are copying to a page that is locked down, then
+ // we have to do it under our spinlock, and update the
+ // NeedToZero field.
+ //
+
+ OldIrql = 0xFF;
+
+ CacheBuffer = (PVOID)((PCHAR)ActiveVacb->BaseAddress +
+ (FileOffset & (VACB_MAPPING_GRANULARITY - 1)));
+
+ if (SharedCacheMap->NeedToZero != NULL) {
+
+ //
+ // The FastLock may not write our "flag".
+ //
+
+ OldIrql = 0;
+
+ ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql );
+
+ //
+ // Note that the NeedToZero could be cleared, since we
+ // tested it without the spinlock.
+ //
+
+ ActiveAddress = SharedCacheMap->NeedToZero;
+ if ((ActiveAddress != NULL) &&
+ (((PCHAR)CacheBuffer + LengthToCopy) > (PCHAR)ActiveAddress)) {
+
+ //
+ // If we are skipping some bytes in the page, then we need
+ // to zero them.
+ //
+
+ if ((PCHAR)CacheBuffer > (PCHAR)ActiveAddress) {
+
+ RtlZeroMemory( ActiveAddress, (PCHAR)CacheBuffer - (PCHAR)ActiveAddress );
+ }
+ SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + LengthToCopy);
+ }
+
+ ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql );
+ }
+
+ RtlCopyBytes( CacheBuffer, Buffer, LengthToCopy );
+
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(),
+ &Status ) ) {
+
+ //
+ // If we failed to overwrite the uninitialized data,
+ // zero it now (we cannot safely restore NeedToZero).
+ //
+
+ if (OldIrql != 0xFF) {
+ RtlZeroBytes( CacheBuffer, LengthToCopy );
+ }
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY );
+
+ //
+ // If we got an access violation, then the user buffer went
+ // away. Otherwise we must have gotten an I/O error trying
+ // to bring the data in.
+ //
+
+ if (Status == STATUS_ACCESS_VIOLATION) {
+ ExRaiseStatus( STATUS_INVALID_USER_BUFFER );
+ }
+ else {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_IO_ERROR ));
+ }
+ }
+
+ //
+ // Now adjust FileOffset and Length by what we copied.
+ //
+
+ Buffer = (PVOID)((PCHAR)Buffer + LengthToCopy);
+ FileOffset += LengthToCopy;
+ Length -= LengthToCopy;
+
+ //
+ // If that was all the data, then get outski...
+ //
+
+ if (Length == 0) {
+
+ SetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, ACTIVE_PAGE_IS_DIRTY );
+ return;
+ }
+
+ //
+ // Remember that the page is dirty now.
+ //
+
+ PageIsDirty |= ACTIVE_PAGE_IS_DIRTY;
+ }
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Else someone else could have the active page, and may want to zero
+ // the range we plan to write!
+ //
+
+ } else if (SharedCacheMap->NeedToZero != NULL) {
+
+ CcFreeActiveVacb( SharedCacheMap, NULL, 0, FALSE );
+ }
+
+ //
+ // Set up for call to CcMapAndCopy
+ //
+
+ FOffset.LowPart = FileOffset;
+ FOffset.HighPart = 0;
+
+ ValidDataLength = ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.LowPart;
+
+ ASSERT((ValidDataLength == MAXULONG) ||
+ (((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->ValidDataLength.HighPart == 0));
+
+ //
+ // At this point we can calculate the ReadOnly flag for
+ // the purposes of whether to use the Bcb resource, and
+ // we can calculate the ZeroFlags.
+ //
+
+ //
+ // We can always zero middle pages, if any.
+ //
+
+ ZeroFlags = ZERO_MIDDLE_PAGES;
+
+ if (((FileOffset & (PAGE_SIZE - 1)) == 0) &&
+ (Length >= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_FIRST_PAGE;
+ }
+
+ if (((FileOffset + Length) & (PAGE_SIZE - 1)) == 0) {
+ ZeroFlags |= ZERO_LAST_PAGE;
+ }
+
+ if ((FileOffset & ~(PAGE_SIZE - 1)) >= ValidDataLength) {
+ ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ } else if (((FileOffset & ~(PAGE_SIZE - 1)) + PAGE_SIZE) >= ValidDataLength) {
+ ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ }
+
+ //
+ // Call a routine to map and copy the data in Mm and get out.
+ //
+
+ CcMapAndCopy( SharedCacheMap,
+ Buffer,
+ &FOffset,
+ Length,
+ ZeroFlags,
+ BooleanFlagOn( FileObject->Flags, FO_WRITE_THROUGH ));
+
+ DebugTrace(-1, me, "CcFastCopyWrite -> VOID\n", 0 );
+}
+
+
+LONG
+CcCopyReadExceptionFilter(
+ IN PEXCEPTION_POINTERS ExceptionPointer,
+ IN PNTSTATUS ExceptionCode
+ )
+
+/*++
+
+Routine Description:
+
+ This routine serves as a exception filter and has the special job of
+ extracting the "real" I/O error when Mm raises STATUS_IN_PAGE_ERROR
+ beneath us.
+
+Arguments:
+
+ ExceptionPointer - A pointer to the exception record that contains
+ the real Io Status.
+
+ ExceptionCode - A pointer to an NTSTATUS that is to receive the real
+ status.
+
+Return Value:
+
+ EXCEPTION_EXECUTE_HANDLER
+
+--*/
+
+{
+ *ExceptionCode = ExceptionPointer->ExceptionRecord->ExceptionCode;
+
+ if ( (*ExceptionCode == STATUS_IN_PAGE_ERROR) &&
+ (ExceptionPointer->ExceptionRecord->NumberParameters >= 3) ) {
+
+ *ExceptionCode = ExceptionPointer->ExceptionRecord->ExceptionInformation[2];
+ }
+
+ ASSERT( !NT_SUCCESS(*ExceptionCode) );
+
+ return EXCEPTION_EXECUTE_HANDLER;
+}
+
+
+BOOLEAN
+CcCanIWrite (
+ IN PFILE_OBJECT FileObject,
+ IN ULONG BytesToWrite,
+ IN BOOLEAN Wait,
+ IN UCHAR Retrying
+ )
+
+/*++
+
+Routine Description:
+
+ This routine tests whether it is ok to do a write to the cache
+ or not, according to the Thresholds of dirty bytes and available
+ pages. The first time this routine is called for a request (Retrying
+ FALSE), we automatically make the new request queue if there are other
+ requests in the queue.
+
+ Note that the ListEmpty test is important to prevent small requests from sneaking
+ in and starving large requests.
+
+Arguments:
+
+ FileObject - for the file to be written
+
+ BytesToWrite - Number of bytes caller wishes to write to the Cache.
+
+ Wait - TRUE if the caller owns no resources, and can block inside this routine
+ until it is ok to write.
+
+ Retrying - Specified as FALSE when the request is first received, and
+ otherwise specified as TRUE if this write has already entered
+ the queue. Special non-zero value of MAXUCHAR indicates that
+ we were called within the cache manager with a MasterSpinLock held,
+ so do not attempt to acquire it here. MAXUCHAR - 1 means we
+ were called within the Cache Manager with some other spinlock
+ held. For either of these two special values, we do not touch
+ the FsRtl header.
+
+Return Value:
+
+ TRUE if it is ok to write.
+ FALSE if the caller should defer the write via a call to CcDeferWrite.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ KEVENT Event;
+ KIRQL OldIrql;
+ ULONG PagesToWrite;
+ BOOLEAN ExceededPerFileThreshold;
+ DEFERRED_WRITE DeferredWrite;
+ PSECTION_OBJECT_POINTERS SectionObjectPointers;
+
+ //
+ // Do a special test here for file objects that keep track of dirty
+ // pages on a per-file basis. This is used mainly for slow links.
+ //
+
+ ExceededPerFileThreshold = FALSE;
+
+ PagesToWrite = ((BytesToWrite < 0x40000 ?
+ BytesToWrite : 0x40000) + (PAGE_SIZE - 1)) / PAGE_SIZE;
+
+ //
+ // Don't dereference the FsContext field if we were called while holding
+ // a spinlock.
+ //
+
+ if ((Retrying >= MAXUCHAR - 1) ||
+
+ FlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags,
+ FSRTL_FLAG_LIMIT_MODIFIED_PAGES)) {
+
+ if (Retrying != MAXUCHAR) {
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ if (((SectionObjectPointers = FileObject->SectionObjectPointer) != NULL) &&
+ ((SharedCacheMap = SectionObjectPointers->SharedCacheMap) != NULL) &&
+ (SharedCacheMap->DirtyPageThreshold != 0) &&
+ (SharedCacheMap->DirtyPages != 0) &&
+ ((PagesToWrite + SharedCacheMap->DirtyPages) >
+ SharedCacheMap->DirtyPageThreshold)) {
+
+ ExceededPerFileThreshold = TRUE;
+ }
+
+ if (Retrying != MAXUCHAR) {
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ }
+ }
+
+ //
+ // See if it is ok to do the write right now
+ //
+
+ if ((Retrying || IsListEmpty(&CcDeferredWrites))
+
+ &&
+
+ (CcTotalDirtyPages + PagesToWrite < CcDirtyPageThreshold)
+
+ &&
+
+ MmEnoughMemoryForWrite()
+
+ &&
+
+ !ExceededPerFileThreshold) {
+
+ return TRUE;
+
+ //
+ // Otherwise, if our caller is synchronous, we will just wait here.
+ //
+
+ }
+
+ if (IsListEmpty(&CcDeferredWrites) ) {
+
+ //
+ // Get a write scan to occur NOW
+ //
+
+ KeSetTimer( &LazyWriter.ScanTimer, CcNoDelay, &LazyWriter.ScanDpc );
+ }
+
+ if (Wait) {
+
+ KeInitializeEvent( &Event, NotificationEvent, FALSE );
+
+ //
+ // Fill in the block. Note that we can access the Fsrtl Common Header
+ // even if it's paged because Wait will be FALSE if called from
+ // within the cache.
+ //
+
+ DeferredWrite.NodeTypeCode = CACHE_NTC_DEFERRED_WRITE;
+ DeferredWrite.NodeByteSize = sizeof(DEFERRED_WRITE);
+ DeferredWrite.FileObject = FileObject;
+ DeferredWrite.BytesToWrite = BytesToWrite;
+ DeferredWrite.Event = &Event;
+ DeferredWrite.LimitModifiedPages = BooleanFlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags,
+ FSRTL_FLAG_LIMIT_MODIFIED_PAGES);
+
+ //
+ // Now insert at the appropriate end of the list
+ //
+
+ if (Retrying) {
+ (VOID)ExInterlockedInsertHeadList( &CcDeferredWrites,
+ &DeferredWrite.DeferredWriteLinks,
+ &CcDeferredWriteSpinLock );
+ } else {
+ (VOID)ExInterlockedInsertTailList( &CcDeferredWrites,
+ &DeferredWrite.DeferredWriteLinks,
+ &CcDeferredWriteSpinLock );
+ }
+
+ while (TRUE) {
+
+ //
+ // Now since we really didn't synchronize anything but the insertion,
+ // we call the post routine to make sure that in some wierd case we
+ // do not leave anyone hanging with no dirty bytes for the Lazy Writer.
+ //
+
+ CcPostDeferredWrites();
+
+ //
+ // Finally wait until the event is signalled and we can write
+ // and return to tell the guy he can write.
+ //
+
+ if (KeWaitForSingleObject( &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ &CcIdleDelay ) == STATUS_SUCCESS) {
+
+
+ return TRUE;
+ }
+ }
+
+ } else {
+ return FALSE;
+ }
+}
+
+
+VOID
+CcDeferWrite (
+ IN PFILE_OBJECT FileObject,
+ IN PCC_POST_DEFERRED_WRITE PostRoutine,
+ IN PVOID Context1,
+ IN PVOID Context2,
+ IN ULONG BytesToWrite,
+ IN BOOLEAN Retrying
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to have the Cache Manager defer posting
+ of a write until the Lazy Writer makes some progress writing, or
+ there are more available pages. A file system would normally call
+ this routine after receiving FALSE from CcCanIWrite, and preparing
+ the request to be posted.
+
+Arguments:
+
+ FileObject - for the file to be written
+
+ PostRoutine - Address of the PostRoutine that the Cache Manager can
+ call to post the request when conditions are right. Note
+ that it is possible that this routine will be called
+ immediately from this routine.
+
+ Context1 - First context parameter for the post routine.
+
+ Context2 - Secont parameter for the post routine.
+
+ BytesToWrite - Number of bytes that the request is trying to write
+ to the cache.
+
+ Retrying - Supplied as FALSE if the request is being posted for the
+ first time, TRUE otherwise.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PDEFERRED_WRITE DeferredWrite;
+
+ //
+ // Attempt to allocate a deferred write block, and if we do not get
+ // one, just post it immediately rather than gobbling up must succeed
+ // pool.
+ //
+
+ DeferredWrite = ExAllocatePool( NonPagedPool, sizeof(DEFERRED_WRITE) );
+
+ if (DeferredWrite == NULL) {
+ (*PostRoutine)( Context1, Context2 );
+ return;
+ }
+
+ //
+ // Fill in the block.
+ //
+
+ DeferredWrite->NodeTypeCode = CACHE_NTC_DEFERRED_WRITE;
+ DeferredWrite->NodeByteSize = sizeof(DEFERRED_WRITE);
+ DeferredWrite->FileObject = FileObject;
+ DeferredWrite->BytesToWrite = BytesToWrite;
+ DeferredWrite->Event = NULL;
+ DeferredWrite->PostRoutine = PostRoutine;
+ DeferredWrite->Context1 = Context1;
+ DeferredWrite->Context2 = Context2;
+ DeferredWrite->LimitModifiedPages = BooleanFlagOn(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags,
+ FSRTL_FLAG_LIMIT_MODIFIED_PAGES);
+
+ //
+ // Now insert at the appropriate end of the list
+ //
+
+ if (Retrying) {
+ (VOID)ExInterlockedInsertHeadList( &CcDeferredWrites,
+ &DeferredWrite->DeferredWriteLinks,
+ &CcDeferredWriteSpinLock );
+ } else {
+ (VOID)ExInterlockedInsertTailList( &CcDeferredWrites,
+ &DeferredWrite->DeferredWriteLinks,
+ &CcDeferredWriteSpinLock );
+ }
+
+ //
+ // Now since we really didn't synchronize anything but the insertion,
+ // we call the post routine to make sure that in some wierd case we
+ // do not leave anyone hanging with no dirty bytes for the Lazy Writer.
+ //
+
+ CcPostDeferredWrites();
+}
+
+
+VOID
+CcPostDeferredWrites (
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to see if any deferred writes should be posted
+ now, and to post them. It should be called any time the status of the
+ queue may have changed, such as when a new entry has been added, or the
+ Lazy Writer has finished writing out buffers and set them clean.
+
+Arguments:
+
+ None
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PDEFERRED_WRITE DeferredWrite;
+ ULONG TotalBytesLetLoose = 0;
+ KIRQL OldIrql;
+
+ do {
+
+ //
+ // Initially clear the deferred write structure pointer
+ // and syncrhronize.
+ //
+
+ DeferredWrite = NULL;
+
+ ExAcquireSpinLock( &CcDeferredWriteSpinLock, &OldIrql );
+
+ //
+ // If the list is empty we are done.
+ //
+
+ if (!IsListEmpty(&CcDeferredWrites)) {
+
+ PLIST_ENTRY Entry;
+
+ Entry = CcDeferredWrites.Flink;
+
+ while (Entry != &CcDeferredWrites) {
+
+ DeferredWrite = CONTAINING_RECORD( Entry,
+ DEFERRED_WRITE,
+ DeferredWriteLinks );
+
+ //
+ // Check for a paranoid case here that TotalBytesLetLoose
+ // wraps. We stop processing the list at this time.
+ //
+
+ TotalBytesLetLoose += DeferredWrite->BytesToWrite;
+
+ if (TotalBytesLetLoose < DeferredWrite->BytesToWrite) {
+
+ DeferredWrite = NULL;
+ break;
+ }
+
+ //
+ // If it is now ok to post this write, remove him from
+ // the list.
+ //
+
+ if (CcCanIWrite( DeferredWrite->FileObject,
+ TotalBytesLetLoose,
+ FALSE,
+ MAXUCHAR - 1 )) {
+
+ RemoveEntryList( &DeferredWrite->DeferredWriteLinks );
+ break;
+
+ //
+ // Otherwise, it is time to stop processing the list, so
+ // we clear the pointer again unless we throttled this item
+ // because of a private dirty page limit.
+ //
+
+ } else {
+
+ //
+ // If this was a private throttle, skip over it and
+ // remove its byte count from the running total.
+ //
+
+ if (DeferredWrite->LimitModifiedPages) {
+
+ Entry = Entry->Flink;
+ TotalBytesLetLoose -= DeferredWrite->BytesToWrite;
+ DeferredWrite = NULL;
+ continue;
+
+ } else {
+
+ DeferredWrite = NULL;
+
+ break;
+ }
+ }
+ }
+ }
+
+ ExReleaseSpinLock( &CcDeferredWriteSpinLock, OldIrql );
+
+ //
+ // If we got something, set the event or call the post routine
+ // and deallocate the structure.
+ //
+
+ if (DeferredWrite != NULL) {
+
+ if (DeferredWrite->Event != NULL) {
+
+ KeSetEvent( DeferredWrite->Event, 0, FALSE );
+
+ } else {
+
+ (*DeferredWrite->PostRoutine)( DeferredWrite->Context1,
+ DeferredWrite->Context2 );
+ ExFreePool( DeferredWrite );
+ }
+ }
+
+ //
+ // Loop until we find no more work to do.
+ //
+
+ } while (DeferredWrite != NULL);
+}
diff --git a/private/ntos/cache/dirs b/private/ntos/cache/dirs
new file mode 100644
index 000000000..a2a38f0fd
--- /dev/null
+++ b/private/ntos/cache/dirs
@@ -0,0 +1,24 @@
+!IF 0
+
+Copyright (c) 1989 Microsoft Corporation
+
+Module Name:
+
+ dirs.
+
+Abstract:
+
+ This file specifies the subdirectories of the current directory that
+ contain component makefiles.
+
+
+Author:
+
+
+NOTE: Commented description of this file is in \nt\bak\bin\dirs.tpl
+
+!ENDIF
+
+DIRS=up
+
+OPTIONAL_DIRS=mp
diff --git a/private/ntos/cache/fssup.c b/private/ntos/cache/fssup.c
new file mode 100644
index 000000000..82990558a
--- /dev/null
+++ b/private/ntos/cache/fssup.c
@@ -0,0 +1,3343 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ fssup.c
+
+Abstract:
+
+ This module implements the File System support routines for the
+ Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// The Bug check file id for this module
+//
+
+#define BugCheckFileId (CACHE_BUG_CHECK_FSSUP)
+
+//
+// Define our debug constant
+//
+
+#define me 0x00000001
+
+//
+// For your debugging pleasure, if the flag doesn't move! (Currently not used)
+//
+
+#define IsSyscacheFile(FO) (((FO) != NULL) && \
+ (*(PUSHORT)(FO)->FsContext == 0X705) && \
+ FlagOn(*(PULONG)((PCHAR)(FO)->FsContext + 0x48), 0x80000000))
+
+extern POBJECT_TYPE IoFileObjectType;
+extern ULONG MmLargeSystemCache;
+
+VOID
+CcUnmapAndPurge(
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ );
+
+VOID
+CcPurgeAndClearCacheSection (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset
+ );
+
+#ifdef ALLOC_PRAGMA
+#pragma alloc_text(INIT,CcInitializeCacheManager)
+#endif
+
+
+BOOLEAN
+CcInitializeCacheManager (
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called during system initialization before the
+ first call to any file system, to allow the Cache Manager to initialize
+ its global data structures. This routine has no dependencies on other
+ system components being initialized.
+
+Arguments:
+
+ None
+
+Return Value:
+
+ TRUE if initialization was successful
+
+--*/
+
+{
+ CLONG i;
+ USHORT NumberOfItems;
+ PWORK_QUEUE_ITEM WorkItem;
+
+#ifdef CCDBG_LOCK
+ KeInitializeSpinLock( &CcDebugTraceLock );
+#endif
+
+#if DBG
+ CcBcbCount = 0;
+ InitializeListHead( &CcBcbList );
+ KeInitializeSpinLock( &CcBcbSpinLock );
+#endif
+
+ //
+ // Initialize shared cache map list structures
+ //
+
+ KeInitializeSpinLock( &CcMasterSpinLock );
+ InitializeListHead( &CcCleanSharedCacheMapList );
+ InitializeListHead( &CcDirtySharedCacheMapList.SharedCacheMapLinks );
+ CcDirtySharedCacheMapList.Flags = IS_CURSOR;
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &CcLazyWriterCursor.SharedCacheMapLinks );
+ CcLazyWriterCursor.Flags = IS_CURSOR;
+
+ //
+ // Initialize worker thread structures
+ //
+
+ KeInitializeSpinLock( &CcWorkQueueSpinlock );
+ InitializeListHead( &CcIdleWorkerThreadList );
+ InitializeListHead( &CcExpressWorkQueue );
+ InitializeListHead( &CcRegularWorkQueue );
+
+ //
+ // Set the number of worker threads based on the system size.
+ //
+
+ CcCapturedSystemSize = MmQuerySystemSize();
+ if (CcNumberWorkerThreads == 0) {
+
+ switch (CcCapturedSystemSize) {
+ case MmSmallSystem:
+ CcNumberWorkerThreads = ExCriticalWorkerThreads - 1;
+ CcDirtyPageThreshold = MmNumberOfPhysicalPages / 8;
+ break;
+
+ case MmMediumSystem:
+ CcNumberWorkerThreads = ExCriticalWorkerThreads - 1;
+ CcDirtyPageThreshold = MmNumberOfPhysicalPages / 4;
+ break;
+
+ case MmLargeSystem:
+ CcNumberWorkerThreads = ExCriticalWorkerThreads - 2;
+ CcDirtyPageThreshold = MmNumberOfPhysicalPages / 4 +
+ MmNumberOfPhysicalPages / 8;
+
+#if 0
+ //
+ // Use more memory if we are a large server.
+ //
+
+ if ((MmLargeSystemCache != 0) &&
+ (CcDirtyPageThreshold < (MmNumberOfPhysicalPages - (0xE00000 / PAGE_SIZE)))) {
+
+ CcDirtyPageThreshold = MmNumberOfPhysicalPages - (0xE00000 / PAGE_SIZE);
+ }
+#endif
+ break;
+
+ default:
+ CcNumberWorkerThreads = 1;
+ CcDirtyPageThreshold = MmNumberOfPhysicalPages / 8;
+ }
+
+// CcDirtyPageThreshold = (2*1024*1024)/PAGE_SIZE;
+
+ if (MmSystemCacheWs.MaximumWorkingSetSize > ((4*1024*1024)/PAGE_SIZE)) {
+ CcDirtyPageThreshold = MmSystemCacheWs.MaximumWorkingSetSize -
+ ((2*1024*1024)/PAGE_SIZE);
+ }
+
+ CcDirtyPageTarget = CcDirtyPageThreshold / 2 +
+ CcDirtyPageThreshold / 4;
+ }
+
+ //
+ // Now allocate and initialize the above number of worker thread
+ // items.
+ //
+
+ for (i = 0; i < CcNumberWorkerThreads; i++) {
+
+ WorkItem = ExAllocatePool( NonPagedPool, sizeof(WORK_QUEUE_ITEM) );
+
+ //
+ // Initialize the work queue item and insert in our queue
+ // of potential worker threads.
+ //
+
+ ExInitializeWorkItem( WorkItem, CcWorkerThread, WorkItem );
+ InsertTailList( &CcIdleWorkerThreadList, &WorkItem->List );
+ }
+
+ //
+ // Initialize the Lazy Writer thread structure, and start him up.
+ //
+
+ RtlZeroMemory( &LazyWriter, sizeof(LAZY_WRITER) );
+
+ KeInitializeSpinLock( &CcWorkQueueSpinlock );
+ InitializeListHead( &LazyWriter.WorkQueue );
+
+ //
+ // Store process address
+ //
+
+ LazyWriter.OurProcess = PsGetCurrentProcess();
+
+ //
+ // Initialize the Scan Dpc and Timer.
+ //
+
+ KeInitializeDpc( &LazyWriter.ScanDpc, &CcScanDpc, NULL );
+ KeInitializeTimer( &LazyWriter.ScanTimer );
+
+ //
+ // Now initialize the lookaside list for allocating Work Queue entries.
+ //
+
+ switch ( CcCapturedSystemSize ) {
+
+ //
+ // ~512 bytes
+ //
+
+ case MmSmallSystem :
+ NumberOfItems = 32;
+ break;
+
+ //
+ // ~1k bytes
+ //
+
+ case MmMediumSystem :
+ NumberOfItems = 64;
+ break;
+
+ //
+ // ~2k bytes
+ //
+
+ case MmLargeSystem :
+ NumberOfItems = 128;
+ if (MmIsThisAnNtAsSystem()) {
+ NumberOfItems += 128;
+ }
+
+ break;
+ }
+
+ ExInitializeNPagedLookasideList( &CcTwilightLookasideList,
+ NULL,
+ NULL,
+ 0,
+ sizeof( WORK_QUEUE_ENTRY ),
+ 'kwcC',
+ NumberOfItems );
+
+ //
+ // Now initialize the Bcb zone
+ //
+
+ {
+ PVOID InitialSegment;
+ ULONG InitialSegmentSize;
+ ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7;
+ ULONG NumberOfItems;
+
+
+ switch ( CcCapturedSystemSize ) {
+
+ //
+ // ~1.5k bytes
+ //
+
+ case MmSmallSystem :
+ NumberOfItems = 8;
+ break;
+
+ //
+ // ~4k bytes
+ //
+
+ case MmMediumSystem :
+ NumberOfItems = 20;
+ break;
+
+ //
+ // ~12k bytes
+ //
+
+ case MmLargeSystem :
+ NumberOfItems = 64;
+ break;
+ }
+
+ InitialSegmentSize = sizeof(ZONE_SEGMENT_HEADER) + RoundedBcbSize * NumberOfItems;
+
+ //
+ // Allocate the initial allocation for the zone. If we cannot get it,
+ // something must really be wrong, so we will just bugcheck.
+ //
+
+ if ((InitialSegment = ExAllocatePool( NonPagedPool,
+ InitialSegmentSize)) == NULL) {
+
+ CcBugCheck( 0, 0, 0 );
+ }
+
+ if (!NT_SUCCESS(ExInitializeZone( &LazyWriter.BcbZone,
+ RoundedBcbSize,
+ InitialSegment,
+ InitialSegmentSize ))) {
+ CcBugCheck( 0, 0, 0 );
+ }
+ }
+
+ //
+ // Initialize the Deferred Write List.
+ //
+
+ KeInitializeSpinLock( &CcDeferredWriteSpinLock );
+ InitializeListHead( &CcDeferredWrites );
+
+ //
+ // Initialize the Vacbs.
+ //
+
+ CcInitializeVacbs();
+
+ return TRUE;
+}
+
+
+VOID
+CcInitializeCacheMap (
+ IN PFILE_OBJECT FileObject,
+ IN PCC_FILE_SIZES FileSizes,
+ IN BOOLEAN PinAccess,
+ IN PCACHE_MANAGER_CALLBACKS Callbacks,
+ IN PVOID LazyWriteContext
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is intended to be called by File Systems only. It
+ initializes the cache maps for data caching. It should be called
+ every time a file is open or created, and NO_INTERMEDIATE_BUFFERING
+ was specified as FALSE.
+
+Arguments:
+
+ FileObject - A pointer to the newly-created file object.
+
+ FileSizes - A pointer to AllocationSize, FileSize and ValidDataLength
+ for the file. ValidDataLength should contain MAXLONGLONG if
+ valid data length tracking and callbacks are not desired.
+
+ PinAccess - FALSE if file will be used exclusively for Copy and Mdl
+ access, or TRUE if file will be used for Pin access.
+ (Files for Pin access are not limited in size as the caller
+ must access multiple areas of the file at once.)
+
+ Callbacks - Structure of callbacks used by the Lazy Writer
+
+ LazyWriteContext - Parameter to be passed in to above routine.
+
+Return Value:
+
+ None. If an error occurs, this routine will Raise the status.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap = NULL;
+ PVOID CacheMapToFree = NULL;
+ CC_FILE_SIZES LocalSizes;
+ BOOLEAN WeSetBeingCreated = FALSE;
+ BOOLEAN SharedListOwned = FALSE;
+ BOOLEAN MustUninitialize = FALSE;
+ BOOLEAN WeCreated = FALSE;
+
+ DebugTrace(+1, me, "CcInitializeCacheMap:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace( 0, me, " FileSizes = %08lx\n", FileSizes );
+
+ //
+ // Make a local copy of the passed in file sizes before acquiring
+ // the spin lock.
+ //
+
+ LocalSizes = *FileSizes;
+
+ //
+ // If no FileSize was given, set to one byte before maximizing below.
+ //
+
+ if (LocalSizes.AllocationSize.QuadPart == 0) {
+ LocalSizes.AllocationSize.LowPart += 1;
+ }
+
+ //
+ // If caller has Write access or will allow write, then round
+ // size to next create modulo. (***Temp*** there may be too many
+ // apps that end up allowing shared write, thanks to our Dos heritage,
+ // to keep that part of the check in.)
+ //
+
+ if (FileObject->WriteAccess /*|| FileObject->SharedWrite */) {
+
+ LocalSizes.AllocationSize.QuadPart = LocalSizes.AllocationSize.QuadPart + (LONGLONG)(DEFAULT_CREATE_MODULO - 1);
+ LocalSizes.AllocationSize.LowPart &= ~(DEFAULT_CREATE_MODULO - 1);
+
+ } else {
+
+ LocalSizes.AllocationSize.QuadPart = LocalSizes.AllocationSize.QuadPart + (LONGLONG)(VACB_MAPPING_GRANULARITY - 1);
+ LocalSizes.AllocationSize.LowPart &= ~(VACB_MAPPING_GRANULARITY - 1);
+ }
+
+ //
+ // Do the allocate of the SharedCacheMap, based on an unsafe test,
+ // while not holding a spinlock. Allocation failures look like we
+ // never decided to allocate one here!
+ //
+
+ if (FileObject->SectionObjectPointer->SharedCacheMap == NULL) {
+ CacheMapToFree = ExAllocatePool( NonPagedPool, sizeof(SHARED_CACHE_MAP) );
+ }
+
+ //
+ // Serialize Creation/Deletion of all Shared CacheMaps
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SharedListOwned = TRUE;
+
+ //
+ // Insure release of our global resource
+ //
+
+ try {
+
+ //
+ // Check for second initialization of same file object
+ //
+
+ if (FileObject->PrivateCacheMap != NULL) {
+
+ DebugTrace( 0, 0, "CacheMap already initialized\n", 0 );
+ try_return( NOTHING );
+ }
+
+ //
+ // Get current Shared Cache Map pointer indirectly off of the file object.
+ // (The actual pointer is typically in a file system data structure, such
+ // as an Fcb.)
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // If there is no SharedCacheMap, then we must create a section and
+ // the SharedCacheMap structure.
+ //
+
+ if (SharedCacheMap == NULL) {
+
+ //
+ // After successfully creating the section, allocate the SharedCacheMap.
+ //
+
+ WeCreated = TRUE;
+
+ if (CacheMapToFree == NULL) {
+ CacheMapToFree = (PSHARED_CACHE_MAP)ExAllocatePool( NonPagedPool,
+ sizeof(SHARED_CACHE_MAP) );
+ }
+
+ SharedCacheMap = CacheMapToFree;
+ CacheMapToFree = NULL;
+
+ if (SharedCacheMap == NULL) {
+
+ DebugTrace( 0, 0, "Failed to allocate SharedCacheMap\n", 0 );
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // Zero the SharedCacheMap and fill in the nonzero portions later.
+ //
+
+ RtlZeroMemory( SharedCacheMap, sizeof(SHARED_CACHE_MAP) );
+
+ //
+ // Now initialize the Shared Cache Map.
+ //
+
+ SharedCacheMap->NodeTypeCode = CACHE_NTC_SHARED_CACHE_MAP;
+ SharedCacheMap->NodeByteSize = sizeof(SHARED_CACHE_MAP);
+ SharedCacheMap->FileSize = LocalSizes.FileSize;
+ SharedCacheMap->ValidDataLength =
+ SharedCacheMap->ValidDataGoal = LocalSizes.ValidDataLength;
+ SharedCacheMap->FileObject = FileObject;
+ // SharedCacheMap->Section set below
+
+ //
+ // Initialize the ActiveVacbSpinLock.
+ //
+
+ KeInitializeSpinLock( &SharedCacheMap->ActiveVacbSpinLock );
+
+ if (PinAccess) {
+ SetFlag(SharedCacheMap->Flags, PIN_ACCESS);
+ }
+
+ //
+ // If this file has FO_SEQUENTIAL_ONLY set, then remember that
+ // in the SharedCacheMap.
+ //
+
+ if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
+ SetFlag(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN);
+ }
+
+ //
+ // Do the round-robin allocation of the spinlock for the shared
+ // cache map. Note the manipulation of the next
+ // counter is safe, since we have the CcMasterSpinLock
+ // exclusive.
+ //
+
+ InitializeListHead( &SharedCacheMap->BcbList );
+ SharedCacheMap->Callbacks = Callbacks;
+ SharedCacheMap->LazyWriteContext = LazyWriteContext;
+
+ //
+ // Initialize the pointer to the uninitialize event chain.
+ //
+
+ SharedCacheMap->UninitializeEvent = NULL;
+
+ //
+ // Initialize listhead for all PrivateCacheMaps
+ //
+
+ InitializeListHead( &SharedCacheMap->PrivateList );
+
+ //
+ // Insert the new Shared Cache Map in the global list
+ //
+
+ InsertTailList( &CcCleanSharedCacheMapList,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Finally, store the pointer to the Shared Cache Map back
+ // via the indirect pointer in the File Object.
+ //
+
+ FileObject->SectionObjectPointer->SharedCacheMap = SharedCacheMap;
+
+ //
+ // We must reference this file object so that it cannot go away
+ // until we do CcUninitializeCacheMap below. Note we cannot
+ // find or rely on the FileObject that Memory Management has,
+ // although normally it will be this same one anyway.
+ //
+
+ ObReferenceObject ( FileObject );
+
+ } else {
+
+ //
+ // If this file has FO_SEQUENTIAL_ONLY clear, then remember that
+ // in the SharedCacheMap.
+ //
+
+ if (!FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) {
+ ClearFlag(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN);
+ }
+ }
+
+ //
+ // Make sure that no one is trying to lazy delete it in the case
+ // that the Cache Map was already there.
+ //
+
+ ClearFlag(SharedCacheMap->Flags, TRUNCATE_REQUIRED);
+
+ //
+ // In case there has been a CcUnmapAndPurge call, we check here if we
+ // if we need to recreate the section and map it.
+ //
+
+ if ((SharedCacheMap->Vacbs == NULL) &&
+ !FlagOn(SharedCacheMap->Flags, BEING_CREATED)) {
+
+ //
+ // Increment the OpenCount on the CacheMap.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+ MustUninitialize = TRUE;
+
+ //
+ // We still want anyone else to wait.
+ //
+
+ SetFlag(SharedCacheMap->Flags, BEING_CREATED);
+ WeSetBeingCreated = TRUE;
+
+ //
+ // If there is a create event, then this must be the path where we
+ // we were only unmapped. We will just clear it here again in case
+ // someone needs to wait again this time too.
+ //
+
+ if (SharedCacheMap->CreateEvent != NULL) {
+
+ KeInitializeEvent( SharedCacheMap->CreateEvent,
+ NotificationEvent,
+ FALSE );
+ }
+
+ //
+ // Release global resource
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+
+ //
+ // We have to test this, because the section may only be unmapped.
+ //
+
+ if (SharedCacheMap->Section == NULL) {
+
+ LARGE_INTEGER LargeZero = {0,0};
+
+ //
+ // Call MM to create a section for this file, for the calculated
+ // section size. Note that we have the choice in this service to
+ // pass in a FileHandle or a FileObject pointer, but not both.
+ // Naturally we want to pass in the handle.
+ //
+
+ DebugTrace( 0, mm, "MmCreateSection:\n", 0 );
+ DebugTrace2(0, mm, " MaximumSize = %08lx, %08lx\n",
+ LocalSizes.AllocationSize.LowPart,
+ LocalSizes.AllocationSize.HighPart );
+ DebugTrace( 0, mm, " FileObject = %08lx\n", FileObject );
+
+ SharedCacheMap->Status = MmCreateSection( &SharedCacheMap->Section,
+ SECTION_MAP_READ
+ | SECTION_MAP_WRITE
+ | SECTION_QUERY,
+ NULL,
+ &LocalSizes.AllocationSize,
+ PAGE_READWRITE,
+ SEC_COMMIT,
+ NULL,
+ FileObject );
+
+ DebugTrace( 0, mm, " <Section = %08lx\n", SharedCacheMap->Section );
+
+ if (!NT_SUCCESS( SharedCacheMap->Status )){
+ DebugTrace( 0, 0, "Error from MmCreateSection = %08lx\n",
+ SharedCacheMap->Status );
+
+ SharedCacheMap->Section = NULL;
+ ExRaiseStatus( FsRtlNormalizeNtstatus( SharedCacheMap->Status,
+ STATUS_UNEXPECTED_MM_CREATE_ERR ));
+ }
+
+ ObDeleteCapturedInsertInfo(SharedCacheMap->Section);
+
+ //
+ // If this is a stream file object, then no user can map it,
+ // and we should keep the modified page writer out of it.
+ //
+
+ if (!FlagOn(((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags2,
+ FSRTL_FLAG2_DO_MODIFIED_WRITE) &&
+ (FileObject->FsContext2 == NULL)) {
+
+ BOOLEAN Disabled;
+
+ Disabled = MmDisableModifiedWriteOfSection( FileObject->SectionObjectPointer );
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SetFlag(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED);
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ //**** ASSERT( Disabled );
+ }
+
+ //
+ // Create the Vacb array.
+ //
+
+ CcCreateVacbArray( SharedCacheMap, LocalSizes.AllocationSize );
+ }
+
+ //
+ // If the section already exists, we still have to call MM to
+ // extend, in case it is not large enough.
+ //
+
+ else {
+
+ if ( LocalSizes.AllocationSize.QuadPart > SharedCacheMap->SectionSize.QuadPart ) {
+
+ NTSTATUS Status;
+
+ DebugTrace( 0, mm, "MmExtendSection:\n", 0 );
+ DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section );
+ DebugTrace2(0, mm, " Size = %08lx, %08lx\n",
+ LocalSizes.AllocationSize.LowPart,
+ LocalSizes.AllocationSize.HighPart );
+
+ Status = MmExtendSection( SharedCacheMap->Section,
+ &LocalSizes.AllocationSize,
+ TRUE );
+
+ if (!NT_SUCCESS(Status)) {
+
+ DebugTrace( 0, 0, "Error from MmExtendSection, Status = %08lx\n",
+ Status );
+
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_MM_EXTEND_ERR ));
+ }
+ }
+
+ //
+ // Extend the Vacb array.
+ //
+
+ CcExtendVacbArray( SharedCacheMap, LocalSizes.AllocationSize );
+ }
+
+ //
+ // Now show that we are all done and resume any waiters.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ ClearFlag(SharedCacheMap->Flags, BEING_CREATED);
+ WeSetBeingCreated = FALSE;
+ if (SharedCacheMap->CreateEvent != NULL) {
+ KeSetEvent( SharedCacheMap->CreateEvent, 0, FALSE );
+ }
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // Else if the section is already there, we make sure it is large
+ // enough by calling CcExtendCacheSection.
+ //
+
+ else {
+
+ //
+ // If the SharedCacheMap is currently being created we have
+ // to optionally create and wait on an event for it. Note that
+ // the only safe time to delete the event is in
+ // CcUninitializeCacheMap, because we otherwise have no way of
+ // knowing when everyone has reached the KeWaitForSingleObject.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, BEING_CREATED)) {
+ if (SharedCacheMap->CreateEvent == NULL) {
+
+ //
+ // We create for the loacl event with the WaitOnActiveCount
+ // event, and we synchronize the claiming of that event with
+ // CcVacbSpinLock.
+ //
+
+ ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock );
+
+ //
+ // If the local even is not being used as a create event,
+ // then we can use it. (Should be quite rare that it is in use.)
+ //
+
+ if (SharedCacheMap->WaitOnActiveCount == NULL) {
+
+ SharedCacheMap->CreateEvent = &SharedCacheMap->Event;
+
+ } else {
+
+ SharedCacheMap->CreateEvent = (PKEVENT)ExAllocatePool( NonPagedPool, sizeof(KEVENT) );
+ }
+
+ ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock );
+
+ if (SharedCacheMap->CreateEvent == NULL) {
+ DebugTrace( 0, 0, "Failed to allocate CreateEvent\n", 0 );
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+
+ ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES);
+ }
+
+ KeInitializeEvent( SharedCacheMap->CreateEvent,
+ NotificationEvent,
+ FALSE );
+ }
+
+ //
+ // Increment the OpenCount on the CacheMap.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+ MustUninitialize = TRUE;
+
+ //
+ // Release global resource before waiting
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+
+ DebugTrace( 0, 0, "Waiting on CreateEvent\n", 0 );
+
+ KeWaitForSingleObject( SharedCacheMap->CreateEvent,
+ Executive,
+ KernelMode,
+ FALSE,
+ (PLARGE_INTEGER)NULL);
+
+ //
+ // If the real creator got an error, then we must bomb
+ // out too.
+ //
+
+ if (!NT_SUCCESS(SharedCacheMap->Status)) {
+ ExRaiseStatus( FsRtlNormalizeNtstatus( SharedCacheMap->Status,
+ STATUS_UNEXPECTED_MM_CREATE_ERR ));
+ }
+ }
+ else {
+
+ PCACHE_UNINITIALIZE_EVENT CUEvent;
+
+ //
+ // Increment the OpenCount on the CacheMap.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+ MustUninitialize = TRUE;
+
+ //
+ // If there is a process waiting on an uninitialize on this
+ // cache map to complete, let the thread that is waiting go,
+ // since the uninitialize is now complete.
+ //
+ CUEvent = SharedCacheMap->UninitializeEvent;
+
+ while (CUEvent != NULL) {
+ PCACHE_UNINITIALIZE_EVENT EventNext = CUEvent->Next;
+ KeSetEvent(&CUEvent->Event, 0, FALSE);
+ CUEvent = EventNext;
+ }
+
+ SharedCacheMap->UninitializeEvent = NULL;
+
+ //
+ // Release global resource
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+ }
+ }
+
+ {
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+
+ //
+ // Now allocate (if local one already in use) and initialize
+ // the Private Cache Map.
+ //
+
+ PrivateCacheMap = &SharedCacheMap->PrivateCacheMap;
+
+ //
+ // See if we should allocate a PrivateCacheMap while not holding
+ // a spinlock.
+ //
+
+ if (CacheMapToFree != NULL) {
+ ExFreePool( CacheMapToFree );
+ CacheMapToFree = NULL;
+ }
+
+ if (PrivateCacheMap->NodeTypeCode != 0) {
+ CacheMapToFree = ExAllocatePool( NonPagedPool, sizeof(PRIVATE_CACHE_MAP) );
+ }
+
+ //
+ // Insert the new PrivateCacheMap in the list off the SharedCacheMap.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SharedListOwned = TRUE;
+
+ //
+ // Now make sure there is still no PrivateCacheMap, and if so just get out.
+ //
+
+ if (FileObject->PrivateCacheMap == NULL) {
+
+ //
+ // Is the local one already in use?
+ //
+
+ if (PrivateCacheMap->NodeTypeCode != 0) {
+
+ //
+ // Use the one allocated above, if there is one, else go to pool now.
+ //
+
+ if (CacheMapToFree == NULL) {
+ CacheMapToFree =
+ (PPRIVATE_CACHE_MAP)ExAllocatePool( NonPagedPool,
+ sizeof(PRIVATE_CACHE_MAP) );
+ }
+ PrivateCacheMap = CacheMapToFree;
+ CacheMapToFree = NULL;
+ }
+
+ if (PrivateCacheMap == NULL) {
+
+ DebugTrace( 0, 0, "Failed to allocate PrivateCacheMap\n", 0 );
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListOwned = FALSE;
+
+ ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES);
+ }
+
+ RtlZeroMemory( PrivateCacheMap, sizeof(PRIVATE_CACHE_MAP) );
+
+ PrivateCacheMap->NodeTypeCode = CACHE_NTC_PRIVATE_CACHE_MAP;
+ PrivateCacheMap->NodeByteSize = sizeof(PRIVATE_CACHE_MAP);
+ PrivateCacheMap->FileObject = FileObject;
+ PrivateCacheMap->ReadAheadMask = PAGE_SIZE - 1;
+
+ //
+ // Initialize the spin lock.
+ //
+
+ KeInitializeSpinLock( &PrivateCacheMap->ReadAheadSpinLock );
+
+ InsertTailList( &SharedCacheMap->PrivateList, &PrivateCacheMap->PrivateLinks );
+
+ FileObject->PrivateCacheMap = PrivateCacheMap;
+ }
+ }
+
+ MustUninitialize = FALSE;
+ try_exit: NOTHING;
+ }
+ finally {
+
+ //
+ // See if we got an error and must uninitialize the SharedCacheMap
+ //
+
+ if (MustUninitialize) {
+
+ if (!SharedListOwned) {
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+ if (WeSetBeingCreated) {
+ if (SharedCacheMap->CreateEvent != NULL) {
+ KeSetEvent( SharedCacheMap->CreateEvent, 0, FALSE );
+ }
+ ClearFlag(SharedCacheMap->Flags, BEING_CREATED);
+ }
+
+ //
+ // Now release our open count.
+ //
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // On PinAccess it is safe and necessary to eliminate
+ // the structure immediately.
+ //
+
+ if (PinAccess) {
+
+ CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FALSE );
+
+ //
+ // If it is not PinAccess, we must lazy delete, because
+ // we could get into a deadlock trying to acquire the
+ // stream exclusive when we dereference the file object.
+ //
+
+ } else {
+
+ //
+ // Move it to the dirty list so the lazy write scan will
+ // see it.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ } else {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ SharedListOwned = FALSE;
+
+ //
+ // If we did not create this SharedCacheMap, then there is a
+ // possibility that it is in the dirty list. Once we are sure
+ // we have the spinlock, just make sure it is in the clean list
+ // if there are no dirty bytes and the open count is nonzero.
+ // (The latter test is almost guaranteed, of course, but we check
+ // it to be safe.)
+ //
+
+ } else if (!WeCreated &&
+ (SharedCacheMap != NULL)) {
+
+ if (!SharedListOwned) {
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SharedListOwned = TRUE;
+ }
+
+ if ((SharedCacheMap->DirtyPages == 0) &&
+ (SharedCacheMap->OpenCount != 0)) {
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcCleanSharedCacheMapList,
+ &SharedCacheMap->SharedCacheMapLinks );
+ }
+ }
+
+ //
+ // Release global resource
+ //
+
+ if (SharedListOwned) {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ if (CacheMapToFree != NULL) {
+ ExFreePool(CacheMapToFree);
+ }
+
+ }
+
+ DebugTrace(-1, me, "CcInitializeCacheMap -> VOID\n", 0 );
+
+ return;
+}
+
+
+BOOLEAN
+CcUninitializeCacheMap (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER TruncateSize OPTIONAL,
+ IN PCACHE_UNINITIALIZE_EVENT UninitializeEvent OPTIONAL
+ )
+
+/*++
+
+Routine Description:
+
+ This routine uninitializes the previously initialized Shared and Private
+ Cache Maps. This routine is only intended to be called by File Systems.
+ It should be called when the File System receives a cleanup call on the
+ File Object.
+
+ A File System which supports data caching must always call this routine
+ whenever it closes a file, whether the caller opened the file with
+ NO_INTERMEDIATE_BUFFERING as FALSE or not. This is because the final
+ cleanup of a file related to truncation or deletion of the file, can
+ only occur on the last close, whether the last closer cached the file
+ or not. When CcUnitializeCacheMap is called on a file object for which
+ CcInitializeCacheMap was never called, the call has a benign effect
+ iff no one has truncated or deleted the file; otherwise the necessary
+ cleanup relating to the truncate or close is performed.
+
+ In summary, CcUnitializeCacheMap does the following:
+
+ If the caller had Write or Delete access, the cache is flushed.
+ (This could change with lazy writing.)
+
+ If a Cache Map was initialized on this File Object, it is
+ unitialized (unmap any views, delete section, and delete
+ Cache Map structures).
+
+ On the last Cleanup, if the file has been deleted, the
+ Section is forced closed. If the file has been truncated, then
+ the truncated pages are purged from the cache.
+
+Arguments:
+
+ FileObject - File Object which was previously supplied to
+ CcInitializeCacheMap.
+
+ TruncateSize - If specified, the file was truncated to the specified
+ size, and the cache should be purged accordingly.
+
+ UninitializeEvent - If specified, then the provided event
+ will be set to the signalled state when the actual flush is
+ completed. This is only of interest to file systems that
+ require that they be notified when a cache flush operation
+ has completed. Due to network protocol restrictions, it
+ is critical that network file systems know exactly when
+ a cache flush operation completes, by specifying this
+ event, they can be notified when the cache section is
+ finally purged if the section is "lazy-deleted".
+
+ReturnValue:
+
+ FALSE if Section was not closed.
+ TRUE if Section was closed.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PVACB ActiveVacb = NULL;
+ BOOLEAN SectionClosed = FALSE;
+ BOOLEAN SharedListAcquired = FALSE;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+
+ DebugTrace(+1, me, "CcUninitializeCacheMap:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace( 0, me, " &TruncateSize = %08lx\n", TruncateSize );
+
+ //
+ // Insure release of resources
+ //
+
+ try {
+
+ //
+ // Serialize Creation/Deletion of all Shared CacheMaps
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SharedListAcquired = TRUE;
+
+ //
+ // Get pointer to SharedCacheMap via File Object.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // Decrement Open Count on SharedCacheMap, if we did a cached open.
+ // Also unmap PrivateCacheMap if it is mapped and deallocate it.
+ //
+
+ if (PrivateCacheMap != NULL) {
+
+ SharedCacheMap->OpenCount -= 1;
+
+ //
+ // Remove PrivateCacheMap from list in SharedCacheMap.
+ //
+
+ RemoveEntryList( &PrivateCacheMap->PrivateLinks );
+
+ //
+ // Free local or allocated PrivateCacheMap
+ //
+
+ if (PrivateCacheMap == &SharedCacheMap->PrivateCacheMap) {
+ PrivateCacheMap->NodeTypeCode = 0;
+ PrivateCacheMap = NULL;
+ }
+
+ FileObject->PrivateCacheMap = (PPRIVATE_CACHE_MAP)NULL;
+ }
+
+ //
+ // Now if we have a SharedCacheMap whose Open Count went to 0, we
+ // have some additional cleanup.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // If a Truncate Size was specified, then remember that we want to
+ // truncate the FileSize and purge the unneeded pages when OpenCount
+ // goes to 0.
+ //
+
+ if (ARGUMENT_PRESENT(TruncateSize)) {
+
+ if ( (TruncateSize->QuadPart == 0) && (SharedCacheMap->FileSize.QuadPart != 0) ) {
+ SetFlag(SharedCacheMap->Flags, TRUNCATE_REQUIRED);
+ }
+
+ //
+ // If this is the last guy, I can drop the file size down
+ // now.
+ //
+
+ if (IsListEmpty(&SharedCacheMap->PrivateList)) {
+ SharedCacheMap->FileSize = *TruncateSize;
+ }
+ }
+
+ //
+ // If other file objects are still using this SharedCacheMap,
+ // then we are done now.
+ //
+
+ if (SharedCacheMap->OpenCount != 0) {
+
+ DebugTrace(-1, me, "SharedCacheMap OpenCount != 0\n", 0);
+
+ //
+ // If the caller specified an event to be set when
+ // the cache uninitialize is completed, set the event
+ // now, because the uninitialize is complete for this file.
+ // (Note, we make him wait if he is the last guy.)
+ //
+
+ if (ARGUMENT_PRESENT(UninitializeEvent)) {
+
+ if (!IsListEmpty(&SharedCacheMap->PrivateList)) {
+ KeSetEvent(&UninitializeEvent->Event, 0, FALSE);
+ } else {
+
+ UninitializeEvent->Next = SharedCacheMap->UninitializeEvent;
+ SharedCacheMap->UninitializeEvent = UninitializeEvent;
+ }
+ }
+
+ try_return( SectionClosed = FALSE );
+ }
+
+ //
+ // Set the "uninitialize complete" in the shared cache map
+ // so that CcDeleteSharedCacheMap will delete it.
+ //
+
+ if (ARGUMENT_PRESENT(UninitializeEvent)) {
+ UninitializeEvent->Next = SharedCacheMap->UninitializeEvent;
+ SharedCacheMap->UninitializeEvent = UninitializeEvent;
+ }
+
+ //
+ // We are in the process of deleting this cache map. If the
+ // Lazy Writer is active or the Bcb list is not empty or the Lazy
+ // Writer will hit this SharedCacheMap because we are purging
+ // the file to 0, then get out and let the Lazy Writer clean
+ // up.
+ //
+
+ if ((!FlagOn(SharedCacheMap->Flags, PIN_ACCESS) &&
+ !ARGUMENT_PRESENT(UninitializeEvent))
+
+ ||
+
+ FlagOn(SharedCacheMap->Flags, WRITE_QUEUED)
+
+ ||
+
+ (SharedCacheMap->DirtyPages != 0)) {
+
+ //
+ // Move it to the dirty list so the lazy write scan will
+ // see it.
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED)) {
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+ }
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+
+ //
+ // Get the active Vacb if we are going to lazy delete, to
+ // free it for someone who can use it.
+ //
+
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ DebugTrace(-1, me, "SharedCacheMap has Bcbs and not purging to 0\n", 0);
+
+ try_return( SectionClosed = FALSE );
+ }
+
+ //
+ // Now we can delete the SharedCacheMap. If there are any Bcbs,
+ // then we must be truncating to 0, and they will also be deleted.
+ // On return the Shared Cache Map List Spinlock will be released.
+ //
+
+ CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FALSE );
+
+ SharedListAcquired = FALSE;
+
+ try_return( SectionClosed = TRUE );
+ }
+
+ //
+ // No Shared Cache Map. To make the file go away, we still need to
+ // purge the section, if one exists. (And we still need to release
+ // our global list first to avoid deadlocks.)
+ //
+
+ else {
+ if (ARGUMENT_PRESENT(TruncateSize) &&
+ ( TruncateSize->QuadPart == 0 ) &&
+ (*(PCHAR *)FileObject->SectionObjectPointer != NULL)) {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ SharedListAcquired = FALSE;
+
+ DebugTrace( 0, mm, "MmPurgeSection:\n", 0 );
+ DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n",
+ FileObject->SectionObjectPointer );
+ DebugTrace2(0, mm, " Offset = %08lx\n",
+ TruncateSize->LowPart,
+ TruncateSize->HighPart );
+
+ //
+ // 0 Length means to purge from the TruncateSize on.
+ //
+
+ CcPurgeCacheSection( FileObject->SectionObjectPointer,
+ TruncateSize,
+ 0,
+ FALSE );
+ }
+
+ //
+ // If the caller specified an event to be set when
+ // the cache uninitialize is completed, set the event
+ // now, because the uninitialize is complete for this file.
+ //
+
+ if (ARGUMENT_PRESENT(UninitializeEvent)) {
+ KeSetEvent(&UninitializeEvent->Event, 0, FALSE);
+ }
+
+ }
+
+ try_exit: NOTHING;
+ }
+ finally {
+
+ //
+ // Release global resources
+ //
+
+ if (SharedListAcquired) {
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // Free the active vacb, if we found one.
+ //
+
+ if (ActiveVacb != NULL) {
+
+ CcFreeActiveVacb( ActiveVacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Free PrivateCacheMap now that we no longer have the spinlock.
+ //
+
+ if (PrivateCacheMap != NULL) {
+ ExFreePool( PrivateCacheMap );
+ }
+ }
+
+ DebugTrace(-1, me, "CcUnitializeCacheMap -> %02lx\n", SectionClosed );
+
+ return SectionClosed;
+
+}
+
+
+//
+// Internal support routine.
+//
+
+VOID
+FASTCALL
+CcDeleteSharedCacheMap (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN KIRQL ListIrql,
+ IN ULONG ReleaseFile
+ )
+
+/*++
+
+Routine Description:
+
+ The specified SharedCacheMap is removed from the global list of
+ SharedCacheMap's and deleted with all of its related structures.
+ Other objects which were referenced in CcInitializeCacheMap are
+ dereferenced here.
+
+ NOTE: The CcMasterSpinLock must already be acquired
+ on entry. It is released on return.
+
+Arguments:
+
+ SharedCacheMap - Pointer to Cache Map to delete
+
+ ListIrql - priority to restore to when releasing shared cache map list
+
+ ReleaseFile - Supplied as nonzero if file was acquired exclusive and
+ should be released.
+
+ReturnValue:
+
+ None.
+
+--*/
+
+{
+ LIST_ENTRY LocalList;
+ PFILE_OBJECT FileObject;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ KIRQL OldIrql;
+ PMBCB Mbcb;
+
+ DebugTrace(+1, me, "CcDeleteSharedCacheMap:\n", 0 );
+ DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap );
+
+ //
+ // Remove it from the global list and clear the pointer to it via
+ // the File Object.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Zero pointer to SharedCacheMap. Once we have cleared the pointer,
+ // we can/must release the global list to avoid deadlocks.
+ //
+
+ FileObject = SharedCacheMap->FileObject;
+
+ FileObject->SectionObjectPointer->SharedCacheMap = (PSHARED_CACHE_MAP)NULL;
+ SetFlag( SharedCacheMap->Flags, WRITE_QUEUED );
+
+ //
+ // The OpenCount is 0, but we still need to flush out any dangling
+ // cache read or writes.
+ //
+
+ if ((SharedCacheMap->VacbActiveCount != 0) || (SharedCacheMap->NeedToZero != NULL)) {
+
+ //
+ // We will put it in a local list and set a flag
+ // to keep the Lazy Writer away from it, so that we can wrip it out
+ // below if someone manages to sneak in and set something dirty, etc.
+ // If the file system does not synchronize cleanup calls with an
+ // exclusive on the stream, then this case is possible.
+ //
+
+ InitializeListHead( &LocalList );
+ InsertTailList( &LocalList, &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // If there is an active Vacb, then nuke it now (before waiting!).
+ //
+
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ ExReleaseSpinLock( &CcMasterSpinLock, ListIrql );
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ while (SharedCacheMap->VacbActiveCount != 0) {
+ CcWaitOnActiveCount( SharedCacheMap );
+ }
+
+ //
+ // Now in case we hit the rare path where someone moved the
+ // SharedCacheMap again, do a remove again now. It may be
+ // from our local list or it may be from the dirty list,
+ // but who cares? The important thing is to remove it in
+ // the case it was the dirty list, since we will delete it
+ // below.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &ListIrql );
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ }
+
+ //
+ // If there are Bcbs, then empty the list, asserting that none of them
+ // can be pinned now if we have gotten this far!
+ //
+
+ while (!IsListEmpty( &SharedCacheMap->BcbList )) {
+
+ PBCB Bcb;
+
+ Bcb = (PBCB)CONTAINING_RECORD( SharedCacheMap->BcbList.Flink,
+ BCB,
+ BcbLinks );
+
+ RemoveEntryList( &Bcb->BcbLinks );
+
+ //
+ // Skip over the pendaflex entries
+ //
+
+ if (Bcb->NodeTypeCode == CACHE_NTC_BCB) {
+
+ ASSERT( Bcb->PinCount == 0 );
+
+ //
+ // If the Bcb is dirty, we have to synchronize with the Lazy Writer
+ // and reduce the total number of dirty.
+ //
+
+ if (Bcb->Dirty) {
+
+ CcTotalDirtyPages -= Bcb->ByteLength >> PAGE_SHIFT;
+ }
+
+ //
+ // There is a small window where the data could still be mapped
+ // if (for example) the Lazy Writer collides with a CcCopyWrite
+ // in the foreground, and then someone calls CcUninitializeCacheMap
+ // while the Lazy Writer is active. This is because the Lazy
+ // Writer biases the pin count. Deal with that here.
+ //
+
+ if (Bcb->BaseAddress != NULL) {
+ CcFreeVirtualAddress( Bcb->Vacb );
+ }
+
+ //
+ // Debug routines used to remove Bcbs from the global list
+ //
+
+#if LIST_DBG
+
+ {
+ KIRQL OldIrql;
+
+ ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql );
+
+ if (Bcb->CcBcbLinks.Flink != NULL) {
+
+ RemoveEntryList( &Bcb->CcBcbLinks );
+ CcBcbCount -= 1;
+ }
+
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ }
+
+#endif
+
+ CcDeallocateBcb( Bcb );
+ }
+ }
+ ExReleaseSpinLock( &CcMasterSpinLock, ListIrql );
+
+ //
+ // Call local routine to unmap, and purge if necessary.
+ //
+
+ CcUnmapAndPurge( SharedCacheMap );
+
+ //
+ // Now release the file now that the purge is done.
+ //
+
+ if (ReleaseFile) {
+ FsRtlReleaseFile( SharedCacheMap->FileObject );
+ }
+
+ //
+ // Dereference our pointer to the Section and FileObject
+ // (We have to test the Section pointer since CcInitializeCacheMap
+ // calls this routine for error recovery. Release our global
+ // resource before dereferencing the FileObject to avoid deadlocks.
+ //
+
+ if (SharedCacheMap->Section != NULL) {
+ ObDereferenceObject( SharedCacheMap->Section );
+ }
+ ObDereferenceObject( FileObject );
+
+ //
+ // If there is an Mbcb, deduct any dirty pages and deallocate.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ Mbcb = SharedCacheMap->Mbcb;
+ if (Mbcb != NULL) {
+
+ if (Mbcb->DirtyPages != 0) {
+
+ CcTotalDirtyPages -= Mbcb->DirtyPages;
+ }
+
+ CcDeallocateBcb( (PBCB)Mbcb );
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // If there was an uninitialize event specified for this shared cache
+ // map, then set it to the signalled state, indicating that we are
+ // removing the section and deleting the shared cache map.
+ //
+
+ if (SharedCacheMap->UninitializeEvent != NULL) {
+ PCACHE_UNINITIALIZE_EVENT CUEvent = SharedCacheMap->UninitializeEvent;
+
+ while (CUEvent != NULL) {
+ PCACHE_UNINITIALIZE_EVENT EventNext = CUEvent->Next;
+
+ KeSetEvent(&CUEvent->Event, 0, FALSE);
+
+ CUEvent = EventNext;
+ }
+ }
+
+ //
+ // Now delete the Vacb vector.
+ //
+
+ if ((SharedCacheMap->Vacbs != &SharedCacheMap->InitialVacbs[0])
+
+ &&
+
+ (SharedCacheMap->Vacbs != NULL)) {
+
+ ExFreePool( SharedCacheMap->Vacbs );
+ }
+
+ //
+ // If an event had to be allocated for this SharedCacheMap,
+ // deallocate it.
+ //
+
+ if ((SharedCacheMap->CreateEvent != NULL) && (SharedCacheMap->CreateEvent != &SharedCacheMap->Event)) {
+ ExFreePool( SharedCacheMap->CreateEvent );
+ }
+
+ if ((SharedCacheMap->WaitOnActiveCount != NULL) && (SharedCacheMap->WaitOnActiveCount != &SharedCacheMap->Event)) {
+ ExFreePool( SharedCacheMap->WaitOnActiveCount );
+ }
+
+ //
+ // Deallocate the storeage for the SharedCacheMap.
+ //
+
+ ExFreePool( SharedCacheMap );
+
+ DebugTrace(-1, me, "CcDeleteSharedCacheMap -> VOID\n", 0 );
+
+ return;
+
+}
+
+
+VOID
+CcSetFileSizes (
+ IN PFILE_OBJECT FileObject,
+ IN PCC_FILE_SIZES FileSizes
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called whenever a file has been extended to reflect
+ this extension in the cache maps and underlying section. Calling this
+ routine has a benign effect if the current size of the section is
+ already greater than or equal to the new AllocationSize.
+
+ This routine must also be called whenever the FileSize for a file changes
+ to reflect these changes in the Cache Manager.
+
+ This routine seems rather large, but in the normal case it only acquires
+ a spinlock, updates some fields, and exits. Less often it will either
+ extend the section, or truncate/purge the file, but it would be unexpected
+ to do both. On the other hand, the idea of this routine is that it does
+ "everything" required when AllocationSize or FileSize change.
+
+Arguments:
+
+ FileObject - A file object for which CcInitializeCacheMap has been
+ previously called.
+
+ FileSizes - A pointer to AllocationSize, FileSize and ValidDataLength
+ for the file. AllocationSize is ignored if it is not larger
+ than the current section size (i.e., it is ignored unless it
+ has grown). ValidDataLength is not used.
+
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ LARGE_INTEGER NewSectionSize;
+ LARGE_INTEGER NewFileSize;
+ IO_STATUS_BLOCK IoStatus;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ NTSTATUS Status;
+ KIRQL OldIrql;
+ PVACB ActiveVacb;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+
+ DebugTrace(+1, me, "CcSetFileSizes:\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace( 0, me, " FileSizes = %08lx\n", FileSizes );
+
+ //
+ // Make a local copy of the new file size and section size.
+ //
+
+ NewFileSize = FileSizes->FileSize;
+ NewSectionSize = FileSizes->AllocationSize;
+
+ //
+ // Serialize Creation/Deletion of all Shared CacheMaps
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // Get pointer to SharedCacheMap via File Object.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // If the file is not cached, just get out.
+ //
+
+ if ((SharedCacheMap == NULL) || (SharedCacheMap->Section == NULL)) {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Let's try to purge the file incase this is a truncate. In the
+ // vast majority of cases when there is no shared cache map, there
+ // is no data section either, so this call will eventually be
+ // no-oped in Mm.
+ //
+
+ //
+ // First flush the first page we are keeping, if it has data, before
+ // we throw it away.
+ //
+
+ if (NewFileSize.LowPart & (PAGE_SIZE - 1)) {
+ MmFlushSection( FileObject->SectionObjectPointer, &NewFileSize, 1, &IoStatus, FALSE );
+ }
+
+ CcPurgeCacheSection( FileObject->SectionObjectPointer,
+ &NewFileSize,
+ 0,
+ FALSE );
+
+ DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 );
+
+ return;
+ }
+
+ //
+ // Make call a Noop if file is not mapped, or section already big enough.
+ //
+
+ if ( NewSectionSize.QuadPart > SharedCacheMap->SectionSize.QuadPart ) {
+
+ //
+ // Increment open count to make sure the SharedCacheMap stays around,
+ // then release the spinlock so that we can call Mm.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Round new section size to pages.
+ //
+
+ NewSectionSize.QuadPart = NewSectionSize.QuadPart + (LONGLONG)(DEFAULT_EXTEND_MODULO - 1);
+ NewSectionSize.LowPart &= ~(DEFAULT_EXTEND_MODULO - 1);
+
+ //
+ // Use try-finally to make sure we get the open count decremented.
+ //
+
+ try {
+
+ //
+ // Call MM to extend the section.
+ //
+
+ DebugTrace( 0, mm, "MmExtendSection:\n", 0 );
+ DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section );
+ DebugTrace2(0, mm, " Size = %08lx, %08lx\n",
+ NewSectionSize.LowPart, NewSectionSize.HighPart );
+
+ Status = MmExtendSection( SharedCacheMap->Section, &NewSectionSize, TRUE );
+
+ if (!NT_SUCCESS(Status)) {
+
+ DebugTrace( 0, 0, "Error from MmExtendSection, Status = %08lx\n",
+ Status );
+
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_MM_EXTEND_ERR ));
+ }
+
+ //
+ // Extend the Vacb array.
+ //
+
+ CcExtendVacbArray( SharedCacheMap, NewSectionSize );
+
+ } finally {
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ //
+ // It is now very unlikely that we have any more work to do, but just
+ // in case we reacquire the spinlock and check again if we are cached.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // Get pointer to SharedCacheMap via File Object.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // If the file is not cached, just get out.
+ //
+
+ if (SharedCacheMap == NULL) {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 );
+
+ return;
+ }
+ }
+
+ //
+ // If we are shrinking either of these two sizes, then we must free the
+ // active page, since it may be locked.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+
+ try {
+
+ if ( ( NewFileSize.QuadPart < SharedCacheMap->ValidDataGoal.QuadPart ) ||
+ ( NewFileSize.QuadPart < SharedCacheMap->FileSize.QuadPart )) {
+
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Serialize again to reduce ValidDataLength. It cannot change
+ // because the caller must have the file exclusive.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+ }
+
+ //
+ // If the section did not grow, see if the file system supports ValidDataLength,
+ // then update the valid data length in the file system.
+ //
+
+ if ( SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG ) {
+
+ if ( NewFileSize.QuadPart < SharedCacheMap->ValidDataLength.QuadPart ) {
+ SharedCacheMap->ValidDataLength = NewFileSize;
+ }
+
+ //
+ // When truncating Valid Data Goal, remember that it must always
+ // stay rounded to the top of the page, to protect writes of user-mapped
+ // files. ** no longer rounding **
+ //
+
+ if ( NewFileSize.QuadPart < SharedCacheMap->ValidDataGoal.QuadPart ) {
+
+ SharedCacheMap->ValidDataGoal = NewFileSize;
+ }
+ }
+
+ //
+ // On truncate, be nice guys and actually purge away user data from
+ // the cache. However, the PinAccess check is important to avoid deadlocks
+ // in Ntfs.
+ //
+ // It is also important to check the Vacb Active count. The caller
+ // must have the file exclusive, therefore, no one else can be actively
+ // doing anything in the file. Normally the Active count will be zero
+ // (like in a normal call from Set File Info), and we can go ahead and truncate.
+ // However, if the active count is nonzero, chances are this very thread has
+ // something pinned or mapped, and we will deadlock if we try to purge and
+ // wait for the count to go zero. A rare case of this which deadlocked DaveC
+ // on Christmas Day of 1992, is where Ntfs was trying to convert an attribute
+ // from resident to nonresident - which is a good example of a case where the
+ // purge was not needed.
+ //
+
+ if ( (NewFileSize.QuadPart < SharedCacheMap->FileSize.QuadPart ) &&
+ !FlagOn(SharedCacheMap->Flags, PIN_ACCESS) &&
+ (SharedCacheMap->VacbActiveCount == 0)) {
+
+ //
+ // If we are actually truncating to zero (a size which has particular
+ // meaning to the Lazy Writer scan!), then we must reset the Mbcb if
+ // there is one, so that we do not keep dirty pages around forever.
+ //
+
+ if ((NewFileSize.QuadPart == 0) && (SharedCacheMap->Mbcb != NULL)) {
+
+ PMBCB Mbcb = SharedCacheMap->Mbcb;
+
+ CcTotalDirtyPages -= Mbcb->DirtyPages;
+ SharedCacheMap->DirtyPages -= Mbcb->DirtyPages;
+ Mbcb->DirtyPages = 0;
+ Mbcb->FirstDirtyPage = MAXULONG;
+ Mbcb->LastDirtyPage = 0;
+ Mbcb->ResumeWritePage = 0;
+ Mbcb->PagesToWrite = 0;
+ RtlZeroMemory( Mbcb->Bitmap.Buffer, Mbcb->Bitmap.SizeOfBitMap / 8 );
+ }
+
+ //
+ // Increment open count to make sure the SharedCacheMap stays around,
+ // then release the spinlock so that we can call Mm.
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ CcPurgeAndClearCacheSection( SharedCacheMap, &NewFileSize );
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ } finally {
+
+ //
+ // We should only be raising without owning the spinlock.
+ //
+
+ if (AbnormalTermination()) {
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ SharedCacheMap->OpenCount -= 1;
+
+ SharedCacheMap->FileSize = NewFileSize;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+
+ DebugTrace(-1, me, "CcSetFileSizes -> VOID\n", 0 );
+
+ return;
+}
+
+
+VOID
+CcPurgeAndClearCacheSection (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset
+ )
+
+/*++
+
+Routine Description:
+
+ This routine calls CcPurgeCacheSection after zeroing the end any
+ partial page at the start of the range. If the file is not cached
+ it flushes this page before the purge.
+
+Arguments:
+
+ SectionObjectPointer - A pointer to the Section Object Pointers
+ structure in the nonpaged Fcb.
+
+ FileOffset - Offset from which file should be purged - rounded down
+ to page boundary. If NULL, purge the entire file.
+
+ReturnValue:
+
+ FALSE - if the section was not successfully purged
+ TRUE - if the section was successfully purged
+
+--*/
+
+{
+ ULONG TempLength, Length;
+ LARGE_INTEGER LocalFileOffset;
+ IO_STATUS_BLOCK IoStatus;
+ PVOID TempVa;
+ PVACB Vacb;
+
+ //
+ // If a range was specified, then we have to see if we need to
+ // save any user data before purging.
+ //
+
+ if ((FileOffset->LowPart & (PAGE_SIZE - 1)) != 0) {
+
+ //
+ // Switch to LocalFileOffset. We do it this way because we
+ // still pass it on as an optional parameter.
+ //
+
+ LocalFileOffset = *FileOffset;
+ FileOffset = &LocalFileOffset;
+
+ //
+ // If the file is cached, then we can actually zero the data to
+ // be purged in memory, and not purge those pages. This is a huge
+ // savings, because sometimes the flushes in the other case cause
+ // us to kill lots of stack, time and I/O doing CcZeroData in especially
+ // large user-mapped files.
+ //
+
+ if ((SharedCacheMap->Section != NULL) &&
+ (SharedCacheMap->Vacbs != NULL)) {
+
+ //
+ // First zero the first page we are keeping, if it has data, and
+ // adjust FileOffset and Length to allow it to stay.
+ //
+
+ TempLength = PAGE_SIZE - (FileOffset->LowPart & (PAGE_SIZE - 1));
+
+ TempVa = CcGetVirtualAddress( SharedCacheMap, *FileOffset, &Vacb, &Length );
+
+ try {
+
+ //
+ // Do not map and zero the page if we are not reducing our notion
+ // of Valid Data, because that does two bad things. First CcSetDirtyInMask
+ // will arbitrarily smash up ValidDataGoal (causing a potential invalid
+ // CcSetValidData call). Secondly, if the Lazy Writer writes the last
+ // page ahead of another flush through MM, then the file system will
+ // never see a write from MM, and will not include the last page in
+ // ValidDataLength on disk.
+ //
+
+ RtlZeroMemory( TempVa, TempLength );
+
+ if (FileOffset->QuadPart <= SharedCacheMap->ValidDataGoal.QuadPart) {
+
+ //
+ // Make sure the Lazy Writer writes it.
+ //
+
+ CcSetDirtyInMask( SharedCacheMap, FileOffset, TempLength );
+
+ //
+ // Otherwise, we are mapped, so make sure at least that Mm
+ // knows the page is dirty since we zeroed it.
+ //
+
+ } else {
+
+ MmSetAddressRangeModified( TempVa, 1 );
+ }
+
+ FileOffset->QuadPart += (LONGLONG)TempLength;
+
+ //
+ // If we get any kind of error, like failing to read the page from
+ // the network, just charge on. Note that we only read it in order
+ // to zero it and avoid the flush below, so if we cannot read it
+ // there is really no stale data problem.
+ //
+
+ } except(EXCEPTION_EXECUTE_HANDLER) {
+
+ NOTHING;
+ }
+
+ CcFreeVirtualAddress( Vacb );
+
+ } else {
+
+ //
+ // First flush the first page we are keeping, if it has data, before
+ // we throw it away.
+ //
+
+ MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, FileOffset, 1, &IoStatus, FALSE );
+ }
+ }
+
+ CcPurgeCacheSection( SharedCacheMap->FileObject->SectionObjectPointer,
+ FileOffset,
+ 0,
+ FALSE );
+}
+
+
+BOOLEAN
+CcPurgeCacheSection (
+ IN PSECTION_OBJECT_POINTERS SectionObjectPointer,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN UninitializeCacheMaps
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to force a purge of the cache section,
+ even if it is cached. Note, if a user has the file mapped, then the purge
+ will *not* take effect, and this must be considered part of normal application
+ interaction. The purpose of purge is to throw away potentially nonzero
+ data, so that it will be read in again and presumably zeroed. This is
+ not really a security issue, but rather an effort to not confuse the
+ application when it sees nonzero data. We cannot help the fact that
+ a user-mapped view forces us to hang on to stale data.
+
+ This routine is intended to be called whenever previously written
+ data is being truncated from the file, and the file is not being
+ deleted.
+
+ The file must be acquired exclusive in order to call this routine.
+
+Arguments:
+
+ SectionObjectPointer - A pointer to the Section Object Pointers
+ structure in the nonpaged Fcb.
+
+ FileOffset - Offset from which file should be purged - rounded down
+ to page boundary. If NULL, purge the entire file.
+
+ Length - Defines the length of the byte range to purge, starting at
+ FileOffset. This parameter is ignored if FileOffset is
+ specified as NULL. If FileOffset is specified and Length
+ is 0, then purge from FileOffset to the end of the file.
+
+ UninitializeCacheMaps - If TRUE, we should uninitialize all the private
+ cache maps before purging the data.
+
+ReturnValue:
+
+ FALSE - if the section was not successfully purged
+ TRUE - if the section was successfully purged
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ BOOLEAN PurgeWorked = TRUE;
+ PVACB Vacb = NULL;
+
+ DebugTrace(+1, me, "CcPurgeCacheSection:\n", 0 );
+ DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n",
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart
+ : 0,
+ ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart
+ : 0 );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+
+ //
+ // If you want us to uninitialize cache maps, the RtlZeroMemory paths
+ // below depend on actually having to purge something after zeroing.
+ //
+
+ ASSERT(!UninitializeCacheMaps || (Length == 0) || (Length >= PAGE_SIZE * 2));
+
+ //
+ // Serialize Creation/Deletion of all Shared CacheMaps
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // Get pointer to SharedCacheMap via File Object.
+ //
+
+ SharedCacheMap = SectionObjectPointer->SharedCacheMap;
+
+ //
+ // Increment open count to make sure the SharedCacheMap stays around,
+ // then release the spinlock so that we can call Mm.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ SharedCacheMap->OpenCount += 1;
+
+ //
+ // If there is an active Vacb, then nuke it now (before waiting!).
+ //
+
+ GetActiveVacbAtDpcLevel( SharedCacheMap, Vacb, ActivePage, PageIsDirty );
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ if (Vacb != NULL) {
+
+ CcFreeActiveVacb( SharedCacheMap, Vacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Use try-finally to insure cleanup of the Open Count and Vacb on the
+ // way out.
+ //
+
+ try {
+
+ //
+ // Increment open count to make sure the SharedCacheMap stays around,
+ // then release the spinlock so that we can call Mm.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // Now loop to make sure that no one is currently caching the file.
+ //
+
+ if (UninitializeCacheMaps) {
+
+ while (!IsListEmpty( &SharedCacheMap->PrivateList )) {
+
+ PrivateCacheMap = CONTAINING_RECORD( SharedCacheMap->PrivateList.Flink,
+ PRIVATE_CACHE_MAP,
+ PrivateLinks );
+
+ CcUninitializeCacheMap( PrivateCacheMap->FileObject, NULL, NULL );
+ }
+ }
+
+ //
+ // Now, let's unmap and purge here.
+ //
+ // We still need to wait for any dangling cache read or writes.
+ //
+ // In fact we have to loop and wait because the lazy writer can
+ // sneak in and do an CcGetVirtualAddressIfMapped, and we are not
+ // synchronized.
+ //
+
+ while ((SharedCacheMap->Vacbs != NULL) &&
+ !CcUnmapVacbArray( SharedCacheMap, FileOffset, Length )) {
+
+ CcWaitOnActiveCount( SharedCacheMap );
+ }
+ }
+
+ //
+ // Purge failures are extremely rare if there are no user mapped sections.
+ // However, it is possible that we will get one from our own mapping, if
+ // the file is being lazy deleted from a previous open. For that case
+ // we wait here until the purge succeeds, so that we are not left with
+ // old user file data. Although Length is actually invariant in this loop,
+ // we do need to keep checking that we are allowed to truncate in case a
+ // user maps the file during a delay.
+ //
+
+ while (!(PurgeWorked = MmPurgeSection(SectionObjectPointer,
+ FileOffset,
+ Length,
+ (BOOLEAN)((SharedCacheMap !=NULL) &&
+ ARGUMENT_PRESENT(FileOffset)))) &&
+ (Length == 0) &&
+ MmCanFileBeTruncated(SectionObjectPointer, FileOffset)) {
+
+ (VOID)KeDelayExecutionThread( KernelMode, FALSE, &CcCollisionDelay );
+ }
+
+ } finally {
+
+ //
+ // Reduce the open count on the SharedCacheMap if there was one.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+ }
+
+ DebugTrace(-1, me, "CcPurgeCacheSection -> %02lx\n", PurgeWorked );
+
+ return PurgeWorked;
+}
+
+
+//
+// Internal support routine.
+//
+
+VOID
+CcUnmapAndPurge(
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to unmap and purge a section, causing Memory
+ Management to throw the pages out and reset his notion of file size.
+
+Arguments:
+
+ SharedCacheMap - Pointer to SharedCacheMap of section to purge.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PFILE_OBJECT FileObject;
+ KIRQL OldIrql;
+
+ FileObject = SharedCacheMap->FileObject;
+
+ //
+ // Unmap all Vacbs
+ //
+
+ if (SharedCacheMap->Vacbs != NULL) {
+ (VOID)CcUnmapVacbArray( SharedCacheMap, NULL, 0 );
+ }
+
+ //
+ // Now that the file is unmapped, we can purge the truncated
+ // pages from memory, if TRUNCATE_REQUIRED. Note that if all
+ // of the section is being purged (FileSize == 0), the purge
+ // and subsequent delete of the SharedCacheMap should drop
+ // all references on the section and file object clearing the
+ // way for the Close Call and actual file delete to occur
+ // immediately.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED)) {
+
+ DebugTrace( 0, mm, "MmPurgeSection:\n", 0 );
+ DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n",
+ FileObject->SectionObjectPointer );
+ DebugTrace2(0, mm, " Offset = %08lx\n",
+ SharedCacheMap->FileSize.LowPart,
+ SharedCacheMap->FileSize.HighPart );
+
+ //
+ // 0 Length means to purge from the TruncateSize on.
+ //
+
+ CcPurgeCacheSection( FileObject->SectionObjectPointer,
+ &SharedCacheMap->FileSize,
+ 0,
+ FALSE );
+ }
+}
+
+
+VOID
+CcSetDirtyPageThreshold (
+ IN PFILE_OBJECT FileObject,
+ IN ULONG DirtyPageThreshold
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to set a dirty page threshold for this
+ stream. The write throttling will kick in whenever the file system
+ attempts to exceed the dirty page threshold for this file.
+
+Arguments:
+
+ FileObject - Supplies file object for the stream
+
+ DirtyPageThreshold - Supplies the dirty page threshold for this stream,
+ or 0 for no threshold.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ if (SharedCacheMap != NULL) {
+
+ SharedCacheMap->DirtyPageThreshold = DirtyPageThreshold;
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SetFlag(((PFSRTL_COMMON_FCB_HEADER)(FileObject->FsContext))->Flags,
+ FSRTL_FLAG_LIMIT_MODIFIED_PAGES);
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ }
+}
+
+
+VOID
+CcZeroEndOfLastPage (
+ IN PFILE_OBJECT FileObject
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is only called by Mm before mapping a user view to
+ a section. If there is an uninitialized page at the end of the
+ file, we zero it by freeing that page.
+
+Parameters:
+
+ FileObject - File object for section to be mapped
+
+Return Value:
+
+ None
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ KIRQL OldIrql;
+ PVOID NeedToZero = NULL;
+ PVACB ActiveVacb = NULL;
+
+ //
+ // See if we have an active Vacb, that we need to free.
+ //
+
+ FsRtlAcquireFileExclusive( FileObject );
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ if (SharedCacheMap != NULL) {
+
+ //
+ // See if there is an active vacb.
+ //
+
+ if ((SharedCacheMap->ActiveVacb != NULL) || ((NeedToZero = SharedCacheMap->NeedToZero) != NULL)) {
+
+ SharedCacheMap->OpenCount += 1;
+ GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Remember in FsRtl header is there is a user section.
+ // If this is an advanced header then also acquire the mutex to access
+ // this field.
+ //
+
+ if (FlagOn( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags,
+ FSRTL_FLAG_ADVANCED_HEADER )) {
+
+ ExAcquireFastMutex( ((PFSRTL_ADVANCED_FCB_HEADER)FileObject->FsContext)->FastMutex );
+
+ SetFlag( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags,
+ FSRTL_FLAG_USER_MAPPED_FILE );
+
+ ExReleaseFastMutex( ((PFSRTL_ADVANCED_FCB_HEADER)FileObject->FsContext)->FastMutex );
+
+ } else {
+
+ SetFlag( ((PFSRTL_COMMON_FCB_HEADER)FileObject->FsContext)->Flags,
+ FSRTL_FLAG_USER_MAPPED_FILE );
+ }
+
+ FsRtlReleaseFile( FileObject );
+
+ //
+ // If the file is cached and we have a Vacb to free, we need to
+ // use the lazy writer callback to synchronize so no one will be
+ // extending valid data.
+ //
+
+ if ((ActiveVacb != NULL) || (NeedToZero != NULL)) {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // Serialize again to decrement the open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ }
+}
+
+
+BOOLEAN
+CcZeroData (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER StartOffset,
+ IN PLARGE_INTEGER EndOffset,
+ IN BOOLEAN Wait
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to zero the specified file data and deliver the
+ correct I/O status.
+
+ If the caller does not want to block (such as for disk I/O), then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to zero all of the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ required space is immediately accessible in the cache and no blocking is
+ required, this routine zeros the data and returns TRUE.
+
+ If the caller supplies Wait as TRUE, then this routine is guaranteed
+ to zero the data and return TRUE. If the correct space is immediately
+ accessible in the cache, then no blocking will occur. Otherwise,
+ the necessary work will be initiated to read and/or free cache data,
+ and the caller will be blocked until the data can be received.
+
+ File system Fsd's should typically supply Wait = TRUE if they are
+ processing a synchronous I/O requests, or Wait = FALSE if they are
+ processing an asynchronous request.
+
+ File system threads should supply Wait = TRUE.
+
+ IMPORTANT NOTE: File systems which call this routine must be prepared
+ to handle a special form of a write call where the Mdl is already
+ supplied. Namely, if Irp->MdlAddress is supplied, the file system
+ must check the low order bit of Irp->MdlAddress->ByteOffset. If it
+ is set, that means that the Irp was generated in this routine and
+ the file system must do two things:
+
+ Decrement Irp->MdlAddress->ByteOffset and Irp->UserBuffer
+
+ Clear Irp->MdlAddress immediately prior to completing the
+ request, as this routine expects to reuse the Mdl and
+ ultimately deallocate the Mdl itself.
+
+Arguments:
+
+ FileObject - pointer to the FileObject for which a range of bytes
+ is to be zeroed. This FileObject may either be for
+ a cached file or a noncached file. If the file is
+ not cached, then WriteThrough must be TRUE and
+ StartOffset and EndOffset must be on sector boundaries.
+
+ StartOffset - Start offset in file to be zeroed.
+
+ EndOffset - End offset in file to be zeroed.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not zeroed.
+
+ TRUE - if the data has been zeroed.
+
+Raises:
+
+ STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
+ This can only occur if Wait was specified as TRUE. (If Wait is
+ specified as FALSE, and an allocation failure occurs, this
+ routine simply returns FALSE.)
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ LARGE_INTEGER ToGo;
+ ULONG ZeroBytes, ZeroTransfer;
+ ULONG i;
+ BOOLEAN WriteThrough;
+ ULONG SavedState = 0;
+ ULONG MaxZerosInCache = MAX_ZEROS_IN_CACHE;
+
+ PBCB Bcb = NULL;
+ PCHAR Zeros = NULL;
+ PMDL ZeroMdl = NULL;
+ ULONG MaxBytesMappedInMdl = 0;
+ BOOLEAN Result = TRUE;
+
+ DebugTrace(+1, me, "CcZeroData\n", 0 );
+
+ WriteThrough = (BOOLEAN)(((FileObject->Flags & FO_WRITE_THROUGH) != 0) ||
+ (FileObject->PrivateCacheMap == NULL));
+
+ //
+ // If the caller specified Wait, but the FileObject is WriteThrough,
+ // then we need to just get out.
+ //
+
+ if (WriteThrough && !Wait) {
+
+ DebugTrace(-1, me, "CcZeroData->FALSE (WriteThrough && !Wait)\n", 0 );
+
+ return FALSE;
+ }
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ FOffset = *StartOffset;
+
+ //
+ // Calculate how much to zero this time.
+ //
+
+ ToGo.QuadPart = EndOffset->QuadPart - FOffset.QuadPart;
+
+ //
+ // We will only do zeroing in the cache if the caller is using a
+ // cached file object, and did not specify WriteThrough. We are
+ // willing to zero some data in the cache if our total is not too
+ // much, or there is sufficient available pages.
+ //
+
+ if (((ToGo.QuadPart <= 0x2000) ||
+ (MmAvailablePages >= ((MAX_ZEROS_IN_CACHE / PAGE_SIZE) * 4))) && !WriteThrough) {
+
+ try {
+
+ while (MaxZerosInCache != 0) {
+
+ ULONG ReceivedLength;
+ LARGE_INTEGER BeyondLastByte;
+
+ if ( ToGo.QuadPart > (LONGLONG)MaxZerosInCache ) {
+
+ //
+ // If Wait == FALSE, then there is no point in getting started,
+ // because we would have to start all over again zeroing with
+ // Wait == TRUE, since we would fall out of this loop and
+ // start synchronously writing pages to disk.
+ //
+
+ if (!Wait) {
+
+ DebugTrace(-1, me, "CcZeroData -> FALSE\n", 0 );
+
+ try_return( Result = FALSE );
+ }
+ }
+ else {
+ MaxZerosInCache = ToGo.LowPart;
+ }
+
+ //
+ // Call local routine to Map or Access the file data, then zero the data,
+ // then call another local routine to free the data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+ // Note that this call may result in an exception, however, if it
+ // does no Bcb is returned and this routine has absolutely no
+ // cleanup to perform. Therefore, we do not have a try-finally
+ // and we allow the possibility that we will simply be unwound
+ // without notice.
+ //
+
+ if (!CcPinFileData( FileObject,
+ &FOffset,
+ MaxZerosInCache,
+ FALSE,
+ TRUE,
+ Wait,
+ &Bcb,
+ &CacheBuffer,
+ &BeyondLastByte )) {
+
+ DebugTrace(-1, me, "CcZeroData -> FALSE\n", 0 );
+
+ try_return( Result = FALSE );
+ }
+
+ //
+ // Calculate how much data is described by Bcb starting at our desired
+ // file offset. If it is more than we need, we will zero the whole thing
+ // anyway.
+ //
+
+ ReceivedLength = (ULONG)(BeyondLastByte.QuadPart - FOffset.QuadPart );
+
+ //
+ // Now attempt to allocate an Mdl to describe the mapped data.
+ //
+
+ ZeroMdl = IoAllocateMdl( CacheBuffer,
+ ReceivedLength,
+ FALSE,
+ FALSE,
+ NULL );
+
+ if (ZeroMdl == NULL) {
+
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // It is necessary to probe and lock the pages, or else
+ // the pages may not still be in memory when we do the
+ // MmSetAddressRangeModified for the dirty Bcb.
+ //
+
+ MmDisablePageFaultClustering(&SavedState);
+ MmProbeAndLockPages( ZeroMdl, KernelMode, IoReadAccess );
+ MmEnablePageFaultClustering(SavedState);
+ SavedState = 0;
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data, and advance buffer pointer.
+ //
+
+ FOffset = BeyondLastByte;
+
+ //
+ // Figure out how many bytes we are allowed to zero in the cache.
+ // Note it is possible we have zeroed a little more than our maximum,
+ // because we hit an existing Bcb that extended beyond the range.
+ //
+
+ if (MaxZerosInCache <= ReceivedLength) {
+ MaxZerosInCache = 0;
+ }
+ else {
+ MaxZerosInCache -= ReceivedLength;
+ }
+
+ //
+ // Now set the Bcb dirty. We have to explicitly set the address
+ // range modified here, because that work otherwise gets deferred
+ // to the Lazy Writer.
+ //
+
+ MmSetAddressRangeModified( CacheBuffer, ReceivedLength );
+ CcSetDirtyPinnedData( Bcb, NULL );
+
+ //
+ // Unmap the data now
+ //
+
+ CcUnpinFileData( Bcb, FALSE, UNPIN );
+ Bcb = NULL;
+
+ //
+ // Unlock and free the Mdl (we only loop back if we crossed
+ // a 256KB boundary.
+ //
+
+ MmUnlockPages( ZeroMdl );
+ IoFreeMdl( ZeroMdl );
+ ZeroMdl = NULL;
+ }
+
+ try_exit: NOTHING;
+ } finally {
+
+ if (SavedState != 0) {
+ MmEnablePageFaultClustering(SavedState);
+ }
+
+ //
+ // Clean up only necessary in abnormal termination.
+ //
+
+ if (Bcb != NULL) {
+
+ CcUnpinFileData( Bcb, FALSE, UNPIN );
+ }
+
+ //
+ // Since the last thing in the above loop which can
+ // fail is the MmProbeAndLockPages, we only need to
+ // free the Mdl here.
+ //
+
+ if (ZeroMdl != NULL) {
+
+ IoFreeMdl( ZeroMdl );
+ }
+ }
+
+ //
+ // If hit a wait condition above, return it now.
+ //
+
+ if (!Result) {
+ return FALSE;
+ }
+
+ //
+ // If we finished, get out nbow.
+ //
+
+ if ( FOffset.QuadPart >= EndOffset->QuadPart ) {
+ return TRUE;
+ }
+ }
+
+ //
+ // We either get here because we decided above not to zero anything in
+ // the cache directly, or else we zeroed up to our maximum and still
+ // have some left to zero direct to the file on disk. In either case,
+ // we will now zero from FOffset to *EndOffset, and then flush this
+ // range in case the file is cached/mapped, and there are modified
+ // changes in memory.
+ //
+
+ //
+ // try-finally to guarantee cleanup.
+ //
+
+ try {
+ PULONG Page;
+ ULONG SavedByteCount;
+ LARGE_INTEGER SizeLeft;
+
+ //
+ // Round FOffset and EndOffset up to sector boundaries, since
+ // we will be doing disk I/O, and calculate size left.
+ //
+
+ i = IoGetRelatedDeviceObject(FileObject)->SectorSize - 1;
+ FOffset.QuadPart += (LONGLONG)i;
+ FOffset.LowPart &= ~i;
+ SizeLeft.QuadPart = EndOffset->QuadPart + (LONGLONG)i;
+ SizeLeft.LowPart &= ~i;
+ SizeLeft.QuadPart -= FOffset.QuadPart;
+
+ if (SizeLeft.QuadPart == 0) {
+ return TRUE;
+ }
+
+ //
+ // Allocate a page to hold the zeros we will write, and
+ // zero it.
+ //
+
+ ZeroBytes = MmNumberOfColors * PAGE_SIZE;
+
+ if (SizeLeft.QuadPart < (LONGLONG)ZeroBytes) {
+ ZeroBytes = SizeLeft.LowPart;
+ }
+
+ Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes );
+
+ if (Zeros != NULL) {
+
+ //
+ // Allocate and initialize an Mdl to describe the zeros
+ // we need to transfer. Allocate to cover the maximum
+ // size required, and we will use and reuse it in the
+ // loop below, initialized correctly.
+ //
+
+ ZeroTransfer = MAX_ZERO_TRANSFER;
+
+ if (ZeroBytes < MmNumberOfColors * PAGE_SIZE) {
+ ZeroTransfer = ZeroBytes;
+ }
+
+ ZeroMdl = IoAllocateMdl( Zeros, ZeroTransfer, FALSE, FALSE, NULL );
+
+ if (ZeroMdl == NULL) {
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // Now we will temporarily lock the allocated pages
+ // only, and then replicate the page frame numbers through
+ // the entire Mdl to keep writing the same pages of zeros.
+ //
+
+ SavedByteCount = ZeroMdl->ByteCount;
+ ZeroMdl->ByteCount = ZeroBytes;
+ MmBuildMdlForNonPagedPool( ZeroMdl );
+
+ ZeroMdl->MdlFlags &= ~MDL_SOURCE_IS_NONPAGED_POOL;
+ ZeroMdl->MdlFlags |= MDL_PAGES_LOCKED;
+ ZeroMdl->MappedSystemVa = NULL;
+ ZeroMdl->ByteCount = SavedByteCount;
+ Page = (PULONG)(ZeroMdl + 1);
+ for (i = MmNumberOfColors;
+ i < (COMPUTE_PAGES_SPANNED( 0, SavedByteCount ));
+ i++) {
+
+ *(Page + i) = *(Page + i - MmNumberOfColors);
+ }
+
+ //
+ // We failed to allocate the space we wanted, so we will go to
+ // half of page of must succeed pool.
+ //
+
+ } else {
+
+ ZeroBytes = PAGE_SIZE / 2;
+ Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes );
+
+ //
+ // If we cannot get even that much, then let's write a sector at a time.
+ //
+
+ if (Zeros == NULL) {
+ ZeroBytes = IoGetRelatedDeviceObject(FileObject)->SectorSize;
+ Zeros = (PCHAR)ExAllocatePool( NonPagedPoolCacheAligned, ZeroBytes );
+ }
+
+ //
+ // Allocate and initialize an Mdl to describe the zeros
+ // we need to transfer. Allocate to cover the maximum
+ // size required, and we will use and reuse it in the
+ // loop below, initialized correctly.
+ //
+
+ ZeroTransfer = ZeroBytes;
+ ZeroMdl = IoAllocateMdl( Zeros, ZeroBytes, FALSE, FALSE, NULL );
+
+ if ((Zeros == NULL) || (ZeroMdl == NULL)) {
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ //
+ // Now we will lock the allocated pages
+ //
+
+ MmBuildMdlForNonPagedPool( ZeroMdl );
+ }
+
+#ifdef MIPS
+#ifdef MIPS_PREFILL
+ RtlFillMemory( Zeros, ZeroBytes, 0xDD );
+ KeSweepDcache( TRUE );
+#endif
+#endif
+
+ //
+ // Zero the buffer now.
+ //
+
+ RtlZeroMemory( Zeros, ZeroBytes );
+
+ //
+ // Map the full Mdl even if we will only use a part of it. This
+ // allow the unmapping operation to be deterministic.
+ //
+
+ (VOID)MmGetSystemAddressForMdl(ZeroMdl);
+ MaxBytesMappedInMdl = ZeroMdl->ByteCount;
+
+ //
+ // Now loop to write buffers full of zeros through to the file
+ // until we reach the starting Vbn for the transfer.
+ //
+
+ while ( SizeLeft.QuadPart != 0 ) {
+
+ IO_STATUS_BLOCK IoStatus;
+ NTSTATUS Status;
+ KEVENT Event;
+
+ //
+ // See if we really need to write that many zeros, and
+ // trim the size back if not.
+ //
+
+ if ( (LONGLONG)ZeroTransfer > SizeLeft.QuadPart ) {
+
+ ZeroTransfer = SizeLeft.LowPart;
+ }
+
+ //
+ // (Re)initialize the kernel event to FALSE.
+ //
+
+ KeInitializeEvent( &Event, NotificationEvent, FALSE );
+
+ //
+ // Initiate and wait for the synchronous transfer.
+ //
+
+ ZeroMdl->ByteCount = ZeroTransfer;
+
+ Status = IoSynchronousPageWrite( FileObject,
+ ZeroMdl,
+ &FOffset,
+ &Event,
+ &IoStatus );
+
+ //
+ // If pending is returned (which is a successful status),
+ // we must wait for the request to complete.
+ //
+
+ if (Status == STATUS_PENDING) {
+ KeWaitForSingleObject( &Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ (PLARGE_INTEGER)NULL);
+ }
+
+
+ //
+ // If we got an error back in Status, then the Iosb
+ // was not written, so we will just copy the status
+ // there, then test the final status after that.
+ //
+
+ if (!NT_SUCCESS(Status)) {
+ ExRaiseStatus( Status );
+ }
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ ExRaiseStatus( IoStatus.Status );
+ }
+
+ //
+ // If we succeeded, then update where we are at by how much
+ // we wrote, and loop back to see if there is more.
+ //
+
+ FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)ZeroTransfer;
+ SizeLeft.QuadPart = SizeLeft.QuadPart - (LONGLONG)ZeroTransfer;
+ }
+ }
+ finally{
+
+ //
+ // Clean up anything from zeroing pages on a noncached
+ // write.
+ //
+
+ if (ZeroMdl != NULL) {
+
+ if ((MaxBytesMappedInMdl != 0) &&
+ !FlagOn(ZeroMdl->MdlFlags, MDL_SOURCE_IS_NONPAGED_POOL)) {
+ ZeroMdl->ByteCount = MaxBytesMappedInMdl;
+ MmUnmapLockedPages (ZeroMdl->MappedSystemVa, ZeroMdl);
+ }
+
+ IoFreeMdl( ZeroMdl );
+ }
+
+ if (Zeros != NULL) {
+ ExFreePool( Zeros );
+ }
+
+ DebugTrace(-1, me, "CcZeroData -> TRUE\n", 0 );
+ }
+
+ return TRUE;
+}
+
+
+PFILE_OBJECT
+CcGetFileObjectFromSectionPtrs (
+ IN PSECTION_OBJECT_POINTERS SectionObjectPointer
+ )
+
+/*++
+
+This routine may be used to retrieve a pointer to the FileObject that the
+Cache Manager is using for a given file from the Section Object Pointers
+in the nonpaged File System structure Fcb. The use of this function is
+intended for exceptional use unrelated to the processing of user requests,
+when the File System would otherwise not have a FileObject at its disposal.
+An example is for mount verification.
+
+Note that the File System is responsible for insuring that the File
+Object does not go away while in use. It is impossible for the Cache
+Manager to guarantee this.
+
+Arguments:
+
+ SectionObjectPointer - A pointer to the Section Object Pointers
+ structure in the nonpaged Fcb.
+
+Return Value:
+
+ Pointer to the File Object, or NULL if the file is not cached or no
+ longer cached
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PFILE_OBJECT FileObject = NULL;
+
+ //
+ // Serialize with Creation/Deletion of all Shared CacheMaps
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ if (SectionObjectPointer->SharedCacheMap != NULL) {
+
+ FileObject = ((PSHARED_CACHE_MAP)SectionObjectPointer->SharedCacheMap)->FileObject;
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ return FileObject;
+}
+
+
+PFILE_OBJECT
+CcGetFileObjectFromBcb (
+ IN PVOID Bcb
+ )
+
+/*++
+
+This routine may be used to retrieve a pointer to the FileObject that the
+Cache Manager is using for a given file from a Bcb of that file.
+
+Note that the File System is responsible for insuring that the File
+Object does not go away while in use. It is impossible for the Cache
+Manager to guarantee this.
+
+Arguments:
+
+ Bcb - A pointer to the pinned Bcb.
+
+Return Value:
+
+ Pointer to the File Object, or NULL if the file is not cached or no
+ longer cached
+
+--*/
+
+{
+ return ((PBCB)Bcb)->SharedCacheMap->FileObject;
+}
diff --git a/private/ntos/cache/lazyrite.c b/private/ntos/cache/lazyrite.c
new file mode 100644
index 000000000..d61b0864d
--- /dev/null
+++ b/private/ntos/cache/lazyrite.c
@@ -0,0 +1,732 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ lazyrite.c
+
+Abstract:
+
+ This module implements the lazy writer for the Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 22-July-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// The Bug check file id for this module
+//
+
+#define BugCheckFileId (CACHE_BUG_CHECK_LAZYRITE)
+
+//
+// Define our debug constant
+//
+
+#define me 0x00000020
+
+//
+// Local support routines
+//
+
+PWORK_QUEUE_ENTRY
+CcReadWorkQueue (
+ );
+
+VOID
+CcLazyWriteScan (
+ );
+
+
+VOID
+CcScheduleLazyWriteScan (
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to schedule the next lazy writer scan,
+ during which lazy write and lazy close activity is posted to other
+ worker threads. Callers should acquire the lazy writer spin lock
+ to see if the scan is currently active, and then call this routine
+ still holding the spin lock if not. One special call is used at
+ the end of the lazy write scan to propagate lazy write active once
+ we go active. This call is "the" scan thread, and it can therefore
+ safely schedule the next scan without taking out the spin lock.
+
+Arguments:
+
+ None
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ //
+ // It is important to set the active flag TRUE first for the propagate
+ // case, because it is conceivable that once the timer is set, another
+ // thread could actually run and make the scan go idle before we then
+ // jam the flag TRUE.
+ //
+ // When going from idle to active, we delay a little longer to let the
+ // app finish saving its file.
+ //
+
+ if (LazyWriter.ScanActive) {
+
+ KeSetTimer( &LazyWriter.ScanTimer, CcIdleDelay, &LazyWriter.ScanDpc );
+
+ } else {
+
+ LazyWriter.ScanActive = TRUE;
+ KeSetTimer( &LazyWriter.ScanTimer, CcFirstDelay, &LazyWriter.ScanDpc );
+ }
+}
+
+
+VOID
+CcScanDpc (
+ IN PKDPC Dpc,
+ IN PVOID DeferredContext,
+ IN PVOID SystemArgument1,
+ IN PVOID SystemArgument2
+ )
+
+/*++
+
+Routine Description:
+
+ This is the Dpc routine which runs when the scan timer goes off. It
+ simply posts an element for an Ex Worker thread to do the scan.
+
+Arguments:
+
+ (All are ignored)
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PWORK_QUEUE_ENTRY WorkQueueEntry;
+
+ UNREFERENCED_PARAMETER(Dpc);
+ UNREFERENCED_PARAMETER(DeferredContext);
+ UNREFERENCED_PARAMETER(SystemArgument1);
+ UNREFERENCED_PARAMETER(SystemArgument2);
+
+ WorkQueueEntry = CcAllocateWorkQueueEntry();
+
+ //
+ // If we failed to allocate a WorkQueueEntry, things must
+ // be in pretty bad shape. However, all we have to do is
+ // say we are not active, and wait for another event to
+ // wake things up again.
+ //
+
+ if (WorkQueueEntry == NULL) {
+
+ LazyWriter.ScanActive = FALSE;
+
+ } else {
+
+ //
+ // Otherwise post a work queue entry to do the scan.
+ //
+
+ WorkQueueEntry->Function = (UCHAR)LazyWriteScan;
+
+ CcPostWorkQueue( WorkQueueEntry, &CcRegularWorkQueue );
+ }
+}
+
+
+VOID
+CcLazyWriteScan (
+ )
+
+/*++
+
+Routine Description:
+
+ This routine implements the Lazy Writer scan for dirty data to flush
+ or any other work to do (lazy close). This routine is scheduled by
+ calling CcScheduleLazyWriteScan.
+
+Arguments:
+
+ None.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ ULONG PagesToWrite, ForegroundRate, EstimatedDirtyNextInterval;
+ PSHARED_CACHE_MAP SharedCacheMap, FirstVisited;
+ KIRQL OldIrql;
+ ULONG LoopsWithLockHeld = 0;
+ BOOLEAN AlreadyMoved = FALSE;
+
+ //
+ // Top of Lazy Writer scan.
+ //
+
+ try {
+
+ //
+ // If there is no work to do, then we will go inactive, and return.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ if ((CcTotalDirtyPages == 0) && !LazyWriter.OtherWork) {
+
+ LazyWriter.ScanActive = FALSE;
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ return;
+ }
+
+ //
+ // Acquire the Lazy Writer spinlock, calculate the next sweep time
+ // stamp, then update all relevant fields for the next time around.
+ // Also we can clear the OtherWork flag.
+ //
+
+ LazyWriter.OtherWork = FALSE;
+
+ //
+ // Assume we will write our usual fraction of dirty pages. Do not do the
+ // divide if there is not enough dirty pages, or else we will never write
+ // the last few pages.
+ //
+
+ PagesToWrite = CcTotalDirtyPages;
+ if (PagesToWrite > LAZY_WRITER_MAX_AGE_TARGET) {
+ PagesToWrite /= LAZY_WRITER_MAX_AGE_TARGET;
+ }
+
+ //
+ // Estimate the rate of dirty pages being produced in the foreground.
+ // This is the total number of dirty pages now plus the number of dirty
+ // pages we scheduled to write last time, minus the number of dirty
+ // pages we have now. Throw out any cases which would not produce a
+ // positive rate.
+ //
+
+ ForegroundRate = 0;
+
+ if ((CcTotalDirtyPages + CcPagesWrittenLastTime) > CcDirtyPagesLastScan) {
+ ForegroundRate = (CcTotalDirtyPages + CcPagesWrittenLastTime) -
+ CcDirtyPagesLastScan;
+ }
+
+ //
+ // If we estimate that we will exceed our dirty page target by the end
+ // of this interval, then we must write more. Try to arrive on target.
+ //
+
+ EstimatedDirtyNextInterval = CcTotalDirtyPages - PagesToWrite + ForegroundRate;
+
+ if (EstimatedDirtyNextInterval > CcDirtyPageTarget) {
+ PagesToWrite += EstimatedDirtyNextInterval - CcDirtyPageTarget;
+ }
+
+ //
+ // Now save away the number of dirty pages and the number of pages we
+ // just calculated to write.
+ //
+
+ CcDirtyPagesLastScan = CcTotalDirtyPages;
+ CcPagesYetToWrite = CcPagesWrittenLastTime = PagesToWrite;
+
+ //
+ // Loop to flush enough Shared Cache Maps to write the number of pages
+ // we just calculated.
+ //
+
+ SharedCacheMap = CONTAINING_RECORD( CcLazyWriterCursor.SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+
+ DebugTrace( 0, me, "Start of Lazy Writer Scan\n", 0 );
+
+ //
+ // Normally we would just like to visit every Cache Map once on each scan,
+ // so the scan will terminate normally when we return to FirstVisited. But
+ // in the off chance that FirstVisited gets deleted, we are guaranteed to stop
+ // when we get back to our own listhead.
+ //
+
+ FirstVisited = NULL;
+ while ((SharedCacheMap != FirstVisited) &&
+ (&SharedCacheMap->SharedCacheMapLinks != &CcLazyWriterCursor.SharedCacheMapLinks)) {
+
+ if (FirstVisited == NULL) {
+ FirstVisited = SharedCacheMap;
+ }
+
+ //
+ // Skip the SharedCacheMap if a write behind request is
+ // already queued, write behind has been disabled, or
+ // if there is no work to do (either dirty data to be written
+ // or a delete is required).
+ //
+ // Note that for streams where modified writing is disabled, we
+ // need to take out Bcbs exclusive, which serializes with foreground
+ // activity. Therefore we use a special counter in the SharedCacheMap
+ // to only service these once every n intervals.
+ //
+ // Skip temporary files unless we currently could not write 196KB
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)
+
+ &&
+
+ (((PagesToWrite != 0) && (SharedCacheMap->DirtyPages != 0) &&
+ (((++SharedCacheMap->LazyWritePassCount & 0xF) == 0) ||
+ !FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) ||
+ (CcCapturedSystemSize == MmSmallSystem) ||
+ (SharedCacheMap->DirtyPages >= (4 * (MAX_WRITE_BEHIND / PAGE_SIZE)))) &&
+ (!FlagOn(SharedCacheMap->FileObject->Flags, FO_TEMPORARY_FILE) ||
+ !CcCanIWrite(SharedCacheMap->FileObject, 0x30000, FALSE, MAXUCHAR)))
+
+ ||
+
+ (SharedCacheMap->OpenCount == 0))) {
+
+ PWORK_QUEUE_ENTRY WorkQueueEntry;
+
+ //
+ // If this is a metadata stream with at least 4 times
+ // the maximum write behind I/O size, then let's tell
+ // this guy to write 1/8 of his dirty data on this pass
+ // so it doesn't build up.
+ //
+ // Else assume we can write everything (PagesToWrite only affects
+ // metadata streams - otherwise writing is controlled by the Mbcb).
+ //
+
+ SharedCacheMap->PagesToWrite = SharedCacheMap->DirtyPages;
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
+ (SharedCacheMap->PagesToWrite >= (4 * (MAX_WRITE_BEHIND / PAGE_SIZE))) &&
+ (CcCapturedSystemSize != MmSmallSystem)) {
+
+ SharedCacheMap->PagesToWrite /= 8;
+ }
+
+ //
+ // See if he exhausts the number of pages to write. (We
+ // keep going in case there are any closes to do.)
+ //
+
+ if ((SharedCacheMap->PagesToWrite >= PagesToWrite) && !AlreadyMoved) {
+
+ //
+ // If we met our write quota on a given SharedCacheMap, then make sure
+ // we start at him on the next scan, unless it is a metadata stream.
+ //
+
+ RemoveEntryList( &CcLazyWriterCursor.SharedCacheMapLinks );
+
+ //
+ // For Metadata streams, set up to resume on the next stream on the
+ // next scan.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) {
+ InsertHeadList( &SharedCacheMap->SharedCacheMapLinks, &CcLazyWriterCursor.SharedCacheMapLinks );
+
+ //
+ // For other streams, set up to resume on the same stream on the
+ // next scan.
+ //
+
+ } else {
+ InsertTailList( &SharedCacheMap->SharedCacheMapLinks, &CcLazyWriterCursor.SharedCacheMapLinks );
+ }
+
+ PagesToWrite = 0;
+ AlreadyMoved = TRUE;
+
+ } else {
+
+ PagesToWrite -= SharedCacheMap->PagesToWrite;
+ }
+
+ //
+ // Otherwise show we are actively writing, and keep it in the dirty
+ // list.
+ //
+
+ SetFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages += 1;
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Queue the request to do the work to a worker thread.
+ //
+
+ WorkQueueEntry = CcAllocateWorkQueueEntry();
+
+ //
+ // If we failed to allocate a WorkQueueEntry, things must
+ // be in pretty bad shape. However, all we have to do is
+ // break out of our current loop, and try to go back and
+ // delay a while. Even if the current guy should have gone
+ // away when we clear WRITE_QUEUED, we will find him again
+ // in the LW scan.
+ //
+
+ if (WorkQueueEntry == NULL) {
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages -= 1;
+ break;
+ }
+
+ WorkQueueEntry->Function = (UCHAR)WriteBehind;
+ WorkQueueEntry->Parameters.Write.SharedCacheMap = SharedCacheMap;
+
+ //
+ // Post it to the regular work queue.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ SharedCacheMap->DirtyPages -= 1;
+ CcPostWorkQueue( WorkQueueEntry, &CcRegularWorkQueue );
+
+ LoopsWithLockHeld = 0;
+
+ //
+ // Make sure we occassionally drop the lock. Set WRITE_QUEUED
+ // to keep the guy from going away.
+ //
+
+ } else if ((++LoopsWithLockHeld >= 20) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) {
+
+ SetFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages += 1;
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ LoopsWithLockHeld = 0;
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages -= 1;
+ }
+
+ //
+ // Now loop back.
+ //
+
+ SharedCacheMap =
+ CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+ }
+
+ DebugTrace( 0, me, "End of Lazy Writer Scan\n", 0 );
+
+ //
+ // Now we can release the global list and loop back, per chance to sleep.
+ //
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Now go ahead and schedule the next scan.
+ //
+
+ CcScheduleLazyWriteScan();
+
+ //
+ // Basically, the Lazy Writer thread should never get an exception,
+ // so we put a try-except around it that bug checks one way or the other.
+ // Better we bug check here than worry about what happens if we let one
+ // get by.
+ //
+
+ } except( CcExceptionFilter( GetExceptionCode() )) {
+
+ CcBugCheck( GetExceptionCode(), 0, 0 );
+ }
+}
+
+
+//
+// Internal support routine
+//
+
+LONG
+CcExceptionFilter (
+ IN NTSTATUS ExceptionCode
+ )
+
+/*++
+
+Routine Description:
+
+ This is the standard exception filter for worker threads which simply
+ calls an FsRtl routine to see if an expected status is being raised.
+ If so, the exception is handled, else we bug check.
+
+Arguments:
+
+ ExceptionCode - the exception code which was raised.
+
+Return Value:
+
+ EXCEPTION_EXECUTE_HANDLER if expected, else a Bug Check occurs.
+
+--*/
+
+{
+ DebugTrace(0, 0, "CcExceptionFilter %08lx\n", ExceptionCode);
+// DbgBreakPoint();
+
+ if (FsRtlIsNtstatusExpected( ExceptionCode )) {
+
+ return EXCEPTION_EXECUTE_HANDLER;
+
+ } else {
+
+ return EXCEPTION_CONTINUE_SEARCH;
+ }
+}
+
+
+
+//
+// Internal support routine
+//
+
+VOID
+FASTCALL
+CcPostWorkQueue (
+ IN PWORK_QUEUE_ENTRY WorkQueueEntry,
+ IN PLIST_ENTRY WorkQueue
+ )
+
+/*++
+
+Routine Description:
+
+ This routine queues a WorkQueueEntry, which has been allocated and
+ initialized by the caller, to the WorkQueue for FIFO processing by
+ the work threads.
+
+Arguments:
+
+ WorkQueueEntry - supplies a pointer to the entry to queue
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PLIST_ENTRY WorkerThreadEntry = NULL;
+
+ ASSERT(FIELD_OFFSET(WORK_QUEUE_ITEM, List) == 0);
+
+ DebugTrace(+1, me, "CcPostWorkQueue:\n", 0 );
+ DebugTrace( 0, me, " WorkQueueEntry = %08lx\n", WorkQueueEntry );
+
+ //
+ // Queue the entry to the respective work queue.
+ //
+
+ ExAcquireFastLock( &CcWorkQueueSpinlock, &OldIrql );
+ InsertTailList( WorkQueue, &WorkQueueEntry->WorkQueueLinks );
+
+ //
+ // Now, if we have any more idle threads we can use, then activate
+ // one.
+ //
+
+ if (!IsListEmpty(&CcIdleWorkerThreadList)) {
+ WorkerThreadEntry = RemoveHeadList( &CcIdleWorkerThreadList );
+ }
+ ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql );
+
+ if (WorkerThreadEntry != NULL) {
+
+ //
+ // I had to peak in the sources to verify that this routine
+ // is a noop if the Flink is not NULL. Sheeeeit!
+ //
+
+ ((PWORK_QUEUE_ITEM)WorkerThreadEntry)->List.Flink = NULL;
+ ExQueueWorkItem( (PWORK_QUEUE_ITEM)WorkerThreadEntry, CriticalWorkQueue );
+ }
+
+ //
+ // And return to our caller
+ //
+
+ DebugTrace(-1, me, "CcPostWorkQueue -> VOID\n", 0 );
+
+ return;
+}
+
+
+//
+// Internal support routine
+//
+
+VOID
+CcWorkerThread (
+ PVOID ExWorkQueueItem
+ )
+
+/*++
+
+Routine Description:
+
+ This is worker thread routine for processing cache manager work queue
+ entries.
+
+Arguments:
+
+ ExWorkQueueItem - The work item used for this thread
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PWORK_QUEUE_ENTRY WorkQueueEntry;
+ BOOLEAN RescanOk = FALSE;
+
+ ASSERT(FIELD_OFFSET(WORK_QUEUE_ENTRY, WorkQueueLinks) == 0);
+
+ while (TRUE) {
+
+ ExAcquireFastLock( &CcWorkQueueSpinlock, &OldIrql );
+
+ //
+ // First see if there is something in the express queue.
+ //
+
+ if (!IsListEmpty(&CcExpressWorkQueue)) {
+ WorkQueueEntry = (PWORK_QUEUE_ENTRY)RemoveHeadList( &CcExpressWorkQueue );
+
+ //
+ // If there was nothing there, then try the regular queue.
+ //
+
+ } else if (!IsListEmpty(&CcRegularWorkQueue)) {
+ WorkQueueEntry = (PWORK_QUEUE_ENTRY)RemoveHeadList( &CcRegularWorkQueue );
+
+ //
+ // Else we can break and go idle.
+ //
+
+ } else {
+ break;
+ }
+
+ ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql );
+
+ //
+ // Process the entry within a try-except clause, so that any errors
+ // will cause us to continue after the called routine has unwound.
+ //
+
+ try {
+
+ switch (WorkQueueEntry->Function) {
+
+ //
+ // A read ahead or write behind request has been nooped (but
+ // left in the queue to keep the semaphore count right).
+ //
+
+ case Noop:
+ break;
+
+ //
+ // Perform read ahead
+ //
+
+ case ReadAhead:
+
+ DebugTrace( 0, me, "CcWorkerThread Read Ahead FileObject = %08lx\n",
+ WorkQueueEntry->Parameters.Read.FileObject );
+
+ CcPerformReadAhead( WorkQueueEntry->Parameters.Read.FileObject );
+
+ break;
+
+ //
+ // Perform write behind
+ //
+
+ case WriteBehind:
+
+ DebugTrace( 0, me, "CcWorkerThread WriteBehind SharedCacheMap = %08lx\n",
+ WorkQueueEntry->Parameters.Write.SharedCacheMap );
+
+ RescanOk = (BOOLEAN)NT_SUCCESS(CcWriteBehind( WorkQueueEntry->Parameters.Write.SharedCacheMap ));
+ break;
+
+ //
+ // Perform Lazy Write Scan
+ //
+
+ case LazyWriteScan:
+
+ DebugTrace( 0, me, "CcWorkerThread Lazy Write Scan\n", 0 );
+
+ CcLazyWriteScan();
+ break;
+ }
+
+ }
+ except( CcExceptionFilter( GetExceptionCode() )) {
+
+ NOTHING;
+ }
+
+ CcFreeWorkQueueEntry( WorkQueueEntry );
+ }
+
+ //
+ // No more work. Requeue our worker thread entry and get out.
+ //
+
+ InsertTailList( &CcIdleWorkerThreadList,
+ &((PWORK_QUEUE_ITEM)ExWorkQueueItem)->List );
+
+ ExReleaseFastLock( &CcWorkQueueSpinlock, OldIrql );
+
+ if (!IsListEmpty(&CcDeferredWrites) && (CcTotalDirtyPages >= 20) && RescanOk) {
+ CcLazyWriteScan();
+ }
+
+ return;
+}
diff --git a/private/ntos/cache/logsup.c b/private/ntos/cache/logsup.c
new file mode 100644
index 000000000..22739c051
--- /dev/null
+++ b/private/ntos/cache/logsup.c
@@ -0,0 +1,548 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ logsup.c
+
+Abstract:
+
+ This module implements the special cache manager support for logging
+ file systems.
+
+Author:
+
+ Tom Miller [TomM] 30-Jul-1991
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Define our debug constant
+//
+
+#define me 0x0000040
+
+
+VOID
+CcSetAdditionalCacheAttributes (
+ IN PFILE_OBJECT FileObject,
+ IN BOOLEAN DisableReadAhead,
+ IN BOOLEAN DisableWriteBehind
+ )
+
+/*++
+
+Routine Description:
+
+ This routine supports the setting of disable read ahead or disable write
+ behind flags to control Cache Manager operation. This routine may be
+ called any time after calling CcInitializeCacheMap. Initially both
+ read ahead and write behind are enabled. Note that the state of both
+ of these flags must be specified on each call to this routine.
+
+Arguments:
+
+ FileObject - File object for which the respective flags are to be set.
+
+ DisableReadAhead - FALSE to enable read ahead, TRUE to disable it.
+
+ DisableWriteBehind - FALSE to enable write behind, TRUE to disable it.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ KIRQL OldIrql;
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // Now set the flags and return.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ if (DisableReadAhead) {
+ SetFlag(SharedCacheMap->Flags, DISABLE_READ_AHEAD);
+ } else {
+ ClearFlag(SharedCacheMap->Flags, DISABLE_READ_AHEAD);
+ }
+ if (DisableWriteBehind) {
+ SetFlag(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND | MODIFIED_WRITE_DISABLED);
+ } else {
+ ClearFlag(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND);
+ }
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+}
+
+
+VOID
+CcSetLogHandleForFile (
+ IN PFILE_OBJECT FileObject,
+ IN PVOID LogHandle,
+ IN PFLUSH_TO_LSN FlushToLsnRoutine
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to instruct the Cache Manager to store the
+ specified log handle with the shared cache map for a file, to support
+ subsequent calls to the other routines in this module which effectively
+ perform an associative search for files by log handle.
+
+Arguments:
+
+ FileObject - File for which the log handle should be stored.
+
+ LogHandle - Log Handle to store.
+
+ FlushToLsnRoutine - A routine to call before flushing buffers for this
+ file, to insure a log file is flushed to the most
+ recent Lsn for any Bcb being flushed.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // Now set the log file handle and flush routine
+ //
+
+ SharedCacheMap->LogHandle = LogHandle;
+ SharedCacheMap->FlushToLsnRoutine = FlushToLsnRoutine;
+}
+
+
+LARGE_INTEGER
+CcGetDirtyPages (
+ IN PVOID LogHandle,
+ IN PDIRTY_PAGE_ROUTINE DirtyPageRoutine,
+ IN PVOID Context1,
+ IN PVOID Context2
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to return all of the dirty pages in all files
+ for a given log handle. Each page is returned by an individual call to
+ the Dirty Page Routine. The Dirty Page Routine is defined by a prototype
+ in ntos\inc\cache.h.
+
+Arguments:
+
+ LogHandle - Log Handle which must match the log handle previously stored
+ for all files which are to be returned.
+
+ DirtyPageRoutine -- The routine to call as each dirty page for this log
+ handle is found.
+
+ Context1 - First context parameter to be passed to the Dirty Page Routine.
+
+ Context2 - First context parameter to be passed to the Dirty Page Routine.
+
+Return Value:
+
+ LARGE_INTEGER - Oldest Lsn found of all the dirty pages, or 0 if no dirty pages
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PBCB Bcb, BcbToUnpin;
+ KIRQL OldIrql;
+ NTSTATUS ExceptionStatus;
+ LARGE_INTEGER SavedFileOffset, SavedOldestLsn, SavedNewestLsn;
+ ULONG SavedByteLength;
+ ULONG LoopsWithLockHeld = 0;
+ LARGE_INTEGER OldestLsn = {0,0};
+
+ //
+ // Synchronize with changes to the SharedCacheMap list.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap = CONTAINING_RECORD( CcDirtySharedCacheMapList.SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+
+ BcbToUnpin = NULL;
+ while (&SharedCacheMap->SharedCacheMapLinks != &CcDirtySharedCacheMapList.SharedCacheMapLinks) {
+
+ //
+ // Skip over cursors, SharedCacheMaps for other LogHandles, and ones with
+ // no dirty pages
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, IS_CURSOR) && (SharedCacheMap->LogHandle == LogHandle) &&
+ (SharedCacheMap->DirtyPages != 0)) {
+
+ //
+ // This SharedCacheMap should stick around for a while in the dirty list.
+ //
+
+ SharedCacheMap->OpenCount += 1;
+ SharedCacheMap->DirtyPages += 1;
+
+ //
+ // Set our initial resume point and point to first Bcb in List.
+ //
+
+ Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, BCB, BcbLinks );
+
+ //
+ // Scan to the end of the Bcb list.
+ //
+
+ while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) {
+
+ //
+ // If the Bcb is dirty, then capture the inputs for the
+ // callback routine so we can call without holding a spinlock.
+ //
+
+ LoopsWithLockHeld += 1;
+ if ((Bcb->NodeTypeCode == CACHE_NTC_BCB) && Bcb->Dirty) {
+
+ SavedFileOffset = Bcb->FileOffset;
+ SavedByteLength = Bcb->ByteLength;
+ SavedOldestLsn = Bcb->OldestLsn;
+ SavedNewestLsn = Bcb->NewestLsn;
+
+ //
+ // Increment PinCount so the Bcb sticks around
+ //
+
+ Bcb->PinCount += 1;
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Any Bcb to unpin from a previous loop?
+ //
+
+ if (BcbToUnpin != NULL) {
+ CcUnpinFileData( BcbToUnpin, TRUE, UNPIN );
+ BcbToUnpin = NULL;
+ }
+
+ //
+ // Call the file system
+ //
+
+ (*DirtyPageRoutine)( SharedCacheMap->FileObject,
+ &SavedFileOffset,
+ SavedByteLength,
+ &SavedOldestLsn,
+ &SavedNewestLsn,
+ Context1,
+ Context2 );
+
+ //
+ // Possibly update OldestLsn
+ //
+
+ if ((SavedOldestLsn.QuadPart != 0) &&
+ ((OldestLsn.QuadPart == 0) || (SavedOldestLsn.QuadPart < OldestLsn.QuadPart ))) {
+ OldestLsn = SavedOldestLsn;
+ }
+
+ //
+ // Now reacquire the spinlock and scan from the resume point
+ // point to the next Bcb to return in the descending list.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ //
+ // Normally the Bcb can stay around a while, but if not,
+ // we will just remember it for the next time we do not
+ // have the spin lock. We cannot unpin it now, because
+ // we would lose our place in the list.
+ //
+
+ if (Bcb->Dirty || (Bcb->PinCount > 1)) {
+ Bcb->PinCount -= 1;
+ } else {
+ BcbToUnpin = Bcb;
+ }
+
+ //
+ // Normally the Bcb is not going away now, but if it is
+ // we need to free it by calling the normal routine
+
+ LoopsWithLockHeld = 0;
+ }
+
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Flink, BCB, BcbLinks );
+ }
+
+ //
+ // We need to unpin any Bcb we are holding before moving on to
+ // the next SharedCacheMap, or else CcDeleteSharedCacheMap will
+ // also delete this Bcb.
+ //
+
+ if (BcbToUnpin != NULL) {
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ CcUnpinFileData( BcbToUnpin, TRUE, UNPIN );
+ BcbToUnpin = NULL;
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ }
+
+ //
+ // Now release the SharedCacheMap, leaving it in the dirty list.
+ //
+
+ SharedCacheMap->OpenCount -= 1;
+ SharedCacheMap->DirtyPages -= 1;
+ }
+
+ //
+ // Make sure we occassionally drop the lock. Set WRITE_QUEUED
+ // to keep the guy from going away, and increment DirtyPages to
+ // keep in in this list.
+ //
+
+ if ((++LoopsWithLockHeld >= 20) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) {
+
+ SetFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages += 1;
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ LoopsWithLockHeld = 0;
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages -= 1;
+ }
+
+ //
+ // Now loop back for the next cache map.
+ //
+
+ SharedCacheMap =
+ CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ return OldestLsn;
+}
+
+
+BOOLEAN
+CcIsThereDirtyData (
+ IN PVPB Vpb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine returns TRUE if the specified Vcb has any unwritten dirty
+ data in the cache.
+
+Arguments:
+
+ Vpb - specifies Vpb to check for
+
+Return Value:
+
+ FALSE - if the Vpb has no dirty data
+ TRUE - if the Vpb has dirty data
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ KIRQL OldIrql;
+ ULONG LoopsWithLockHeld = 0;
+
+ //
+ // Synchronize with changes to the SharedCacheMap list.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap = CONTAINING_RECORD( CcDirtySharedCacheMapList.SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+
+ while (&SharedCacheMap->SharedCacheMapLinks != &CcDirtySharedCacheMapList.SharedCacheMapLinks) {
+
+ //
+ // Look at this one if the Vpb matches and if there is dirty data.
+ // For what it's worth, don't worry about dirty data in temporary files,
+ // as that should not concern the caller if it wants to dismount.
+ //
+
+ if (!FlagOn(SharedCacheMap->Flags, IS_CURSOR) &&
+ (SharedCacheMap->FileObject->Vpb == Vpb) &&
+ (SharedCacheMap->DirtyPages != 0) &&
+ !FlagOn(SharedCacheMap->FileObject->Flags, FO_TEMPORARY_FILE)) {
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ return TRUE;
+ }
+
+ //
+ // Make sure we occassionally drop the lock. Set WRITE_QUEUED
+ // to keep the guy from going away, and increment DirtyPages to
+ // keep in in this list.
+ //
+
+ if ((++LoopsWithLockHeld >= 20) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED | IS_CURSOR)) {
+
+ SetFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages += 1;
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ LoopsWithLockHeld = 0;
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED);
+ SharedCacheMap->DirtyPages -= 1;
+ }
+
+ //
+ // Now loop back for the next cache map.
+ //
+
+ SharedCacheMap =
+ CONTAINING_RECORD( SharedCacheMap->SharedCacheMapLinks.Flink,
+ SHARED_CACHE_MAP,
+ SharedCacheMapLinks );
+ }
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ return FALSE;
+}
+
+LARGE_INTEGER
+CcGetLsnForFileObject(
+ IN PFILE_OBJECT FileObject,
+ OUT PLARGE_INTEGER OldestLsn OPTIONAL
+ )
+
+/*++
+
+Routine Description:
+
+ This routine returns the oldest and newest LSNs for a file object.
+
+Arguments:
+
+ FileObject - File for which the log handle should be stored.
+
+ OldestLsn - pointer to location to store oldest LSN for file object.
+
+Return Value:
+
+ The newest LSN for the file object.
+
+--*/
+
+{
+ PBCB Bcb;
+ KIRQL OldIrql;
+ LARGE_INTEGER Oldest, Newest;
+ PSHARED_CACHE_MAP SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // initialize lsn variables
+ //
+
+ Oldest.LowPart = 0;
+ Oldest.HighPart = 0;
+ Newest.LowPart = 0;
+ Newest.HighPart = 0;
+
+ if(SharedCacheMap == NULL) {
+ return Oldest;
+ }
+
+ ExAcquireFastLock(&CcMasterSpinLock, &OldIrql);
+
+ //
+ // Now point to first Bcb in List, and loop through it.
+ //
+
+ Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, BCB, BcbLinks );
+
+ while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) {
+
+ //
+ // If the Bcb is dirty then capture the oldest and newest lsn
+ //
+
+
+ if ((Bcb->NodeTypeCode == CACHE_NTC_BCB) && Bcb->Dirty) {
+
+ LARGE_INTEGER BcbLsn, BcbNewest;
+
+ BcbLsn = Bcb->OldestLsn;
+ BcbNewest = Bcb->NewestLsn;
+
+ if ((BcbLsn.QuadPart != 0) &&
+ ((Oldest.QuadPart == 0) ||
+ (BcbLsn.QuadPart < Oldest.QuadPart))) {
+
+ Oldest = BcbLsn;
+ }
+
+ if ((BcbLsn.QuadPart != 0) && (BcbNewest.QuadPart > Newest.QuadPart)) {
+
+ Newest = BcbNewest;
+ }
+ }
+
+
+ Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Flink, BCB, BcbLinks );
+ }
+
+ //
+ // Now release the spin lock for this Bcb list and generate a callback
+ // if we got something.
+ //
+
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+
+ if (ARGUMENT_PRESENT(OldestLsn)) {
+
+ *OldestLsn = Oldest;
+ }
+
+ return Newest;
+}
diff --git a/private/ntos/cache/mdlsup.c b/private/ntos/cache/mdlsup.c
new file mode 100644
index 000000000..0435e7283
--- /dev/null
+++ b/private/ntos/cache/mdlsup.c
@@ -0,0 +1,999 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ mdlsup.c
+
+Abstract:
+
+ This module implements the Mdl support routines for the Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-May-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Debug Trace Level
+//
+
+#define me (0x00000010)
+
+VOID
+CcMdlRead (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ OUT PMDL *MdlChain,
+ OUT PIO_STATUS_BLOCK IoStatus
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to lock the specified file data in the cache
+ and return a description of it in an Mdl along with the correct
+ I/O status. It is *not* safe to call this routine from Dpc level.
+
+ This routine is synchronous, and raises on errors.
+
+ As each call returns, the pages described by the Mdl are
+ locked in memory, but not mapped in system space. If the caller
+ needs the pages mapped in system space, then it must map them.
+
+ Note that each call is a "single shot" which should be followed by
+ a call to CcMdlReadComplete. To resume an Mdl-based transfer, the
+ caller must form one or more subsequent calls to CcMdlRead with
+ appropriately adjusted parameters.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ MdlChain - On output it returns a pointer to an Mdl chain describing
+ the desired data. Note that even if FALSE is returned,
+ one or more Mdls may have been allocated, as may be ascertained
+ by the IoStatus.Information field (see below).
+
+ IoStatus - Pointer to standard I/O status block to receive the status
+ for the transfer. (STATUS_SUCCESS guaranteed for cache
+ hits, otherwise the actual I/O status is returned.) The
+ I/O Information Field indicates how many bytes have been
+ successfully locked down in the Mdl Chain.
+
+Return Value:
+
+ None
+
+Raises:
+
+ STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PPRIVATE_CACHE_MAP PrivateCacheMap;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ PMDL Mdl;
+ PMDL MdlTemp;
+ ULONG SavedState = 0;
+ ULONG OriginalLength = Length;
+ ULONG Information = 0;
+ PVACB Vacb = NULL;
+ ULONG SavedMissCounter = 0;
+
+ KIRQL OldIrql;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PVACB ActiveVacb = NULL;
+
+ DebugTrace(+1, me, "CcMdlRead\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+ PrivateCacheMap = FileObject->PrivateCacheMap;
+
+ //
+ // See if we have an active Vacb, that we need to free.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // If there is an end of a page to be zeroed, then free that page now,
+ // so we don't send Greg the uninitialized data...
+ //
+
+ if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // If read ahead is enabled, then do the read ahead here so it
+ // overlaps with the copy (otherwise we will do it below).
+ // Note that we are assuming that we will not get ahead of our
+ // current transfer - if read ahead is working it should either
+ // already be in memory or else underway.
+ //
+
+ if (PrivateCacheMap->ReadAheadEnabled && (PrivateCacheMap->ReadAheadLength[1] == 0)) {
+ CcScheduleReadAhead( FileObject, FileOffset, Length );
+ }
+
+ //
+ // Increment performance counters
+ //
+
+ CcMdlReadWait += 1;
+
+ //
+ // This is not an exact solution, but when IoPageRead gets a miss,
+ // it cannot tell whether it was CcCopyRead or CcMdlRead, but since
+ // the miss should occur very soon, by loading the pointer here
+ // probably the right counter will get incremented, and in any case,
+ // we hope the errrors average out!
+ //
+
+ CcMissCounter = &CcMdlReadWaitMiss;
+
+ FOffset = *FileOffset;
+
+ //
+ // Check for read past file size, the caller must filter this case out.
+ //
+
+ ASSERT( ( FOffset.QuadPart + (LONGLONG)Length ) <= SharedCacheMap->FileSize.QuadPart );
+
+ //
+ // Put try-finally around the loop to deal with any exceptions
+ //
+
+ try {
+
+ //
+ // Not all of the transfer will come back at once, so we have to loop
+ // until the entire transfer is complete.
+ //
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ LARGE_INTEGER BeyondLastByte;
+
+ //
+ // Map the data and read it in (if necessary) with the
+ // MmProbeAndLockPages call below.
+ //
+
+ CacheBuffer = CcGetVirtualAddress( SharedCacheMap,
+ FOffset,
+ &Vacb,
+ &ReceivedLength );
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength;
+
+ //
+ // Now attempt to allocate an Mdl to describe the mapped data.
+ //
+
+ DebugTrace( 0, mm, "IoAllocateMdl:\n", 0 );
+ DebugTrace( 0, mm, " BaseAddress = %08lx\n", CacheBuffer );
+ DebugTrace( 0, mm, " Length = %08lx\n", ReceivedLength );
+
+ Mdl = IoAllocateMdl( CacheBuffer,
+ ReceivedLength,
+ FALSE,
+ FALSE,
+ NULL );
+
+ DebugTrace( 0, mm, " <Mdl = %08lx\n", Mdl );
+
+ if (Mdl == NULL) {
+ DebugTrace( 0, 0, "Failed to allocate Mdl\n", 0 );
+
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ DebugTrace( 0, mm, "MmProbeAndLockPages:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", Mdl );
+
+ //
+ // Note that this probe should never fail, because we can
+ // trust the address returned from CcPinFileData. Therefore,
+ // if we succeed in allocating the Mdl above, we should
+ // manage to elude any expected exceptions through the end
+ // of this loop.
+ //
+
+ //
+ // Set to see if the miss counter changes in order to
+ // detect when we should turn on read ahead.
+ //
+
+ SavedMissCounter += CcMdlReadWaitMiss;
+
+ MmDisablePageFaultClustering(&SavedState);
+ MmProbeAndLockPages( Mdl, KernelMode, IoReadAccess );
+ MmEnablePageFaultClustering(SavedState);
+ SavedState = 0;
+
+ SavedMissCounter -= CcMdlReadWaitMiss;
+
+ //
+ // Unmap the data now, now that the pages are locked down.
+ //
+
+ CcFreeVirtualAddress( Vacb );
+ Vacb = NULL;
+
+ //
+ // Now link the Mdl into the caller's chain
+ //
+
+ if ( *MdlChain == NULL ) {
+ *MdlChain = Mdl;
+ } else {
+ MdlTemp = CONTAINING_RECORD( *MdlChain, MDL, Next );
+ while (MdlTemp->Next != NULL) {
+ MdlTemp = MdlTemp->Next;
+ }
+ MdlTemp->Next = Mdl;
+ }
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data.
+ //
+
+ FOffset = BeyondLastByte;
+
+ //
+ // Update number of bytes transferred.
+ //
+
+ Information += ReceivedLength;
+
+ //
+ // Calculate length left to transfer.
+ //
+
+ Length -= ReceivedLength;
+ }
+ }
+ finally {
+
+ CcMissCounter = &CcThrowAway;
+
+ if (AbnormalTermination()) {
+
+ if (SavedState != 0) {
+ MmEnablePageFaultClustering(SavedState);
+ }
+
+ //
+ // We may have failed to allocate an Mdl while still having
+ // data mapped.
+ //
+
+ if (Vacb != NULL) {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ //
+ // Otherwise loop to deallocate the Mdls
+ //
+
+ while (*MdlChain != NULL) {
+ MdlTemp = (*MdlChain)->Next;
+
+ DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", *MdlChain );
+
+ MmUnlockPages( *MdlChain );
+ IoFreeMdl( *MdlChain );
+
+ *MdlChain = MdlTemp;
+ }
+
+ DebugTrace(-1, me, "CcMdlRead -> Unwinding\n", 0 );
+
+ }
+ else {
+
+ //
+ // Now enable read ahead if it looks like we got any misses, and do
+ // the first one.
+ //
+
+ if (!PrivateCacheMap->ReadAheadEnabled && (SavedMissCounter != 0)) {
+
+ PrivateCacheMap->ReadAheadEnabled = TRUE;
+ CcScheduleReadAhead( FileObject, FileOffset, OriginalLength );
+ }
+
+ //
+ // Now that we have described our desired read ahead, let's
+ // shift the read history down.
+ //
+
+ PrivateCacheMap->FileOffset1 = PrivateCacheMap->FileOffset2;
+ PrivateCacheMap->BeyondLastByte1 = PrivateCacheMap->BeyondLastByte2;
+ PrivateCacheMap->FileOffset2 = *FileOffset;
+ PrivateCacheMap->BeyondLastByte2.QuadPart =
+ FileOffset->QuadPart + (LONGLONG)OriginalLength;
+
+ IoStatus->Status = STATUS_SUCCESS;
+ IoStatus->Information = Information;
+ }
+ }
+
+
+ DebugTrace( 0, me, " <MdlChain = %08lx\n", *MdlChain );
+ DebugTrace2(0, me, " <IoStatus = %08lx, %08lx\n", IoStatus->Status,
+ IoStatus->Information );
+ DebugTrace(-1, me, "CcMdlRead -> VOID\n", 0 );
+
+ return;
+}
+
+
+//
+// First we have the old routine which checks for an entry in the FastIo vector.
+// This routine becomes obsolete for every component that compiles with the new
+// definition of FsRtlMdlReadComplete in fsrtl.h.
+//
+
+VOID
+CcMdlReadComplete (
+ IN PFILE_OBJECT FileObject,
+ IN PMDL MdlChain
+ )
+
+{
+ PDEVICE_OBJECT DeviceObject;
+ PFAST_IO_DISPATCH FastIoDispatch;
+
+ DeviceObject = IoGetRelatedDeviceObject( FileObject );
+ FastIoDispatch = DeviceObject->DriverObject->FastIoDispatch;
+
+ if ((FastIoDispatch != NULL) &&
+ (FastIoDispatch->SizeOfFastIoDispatch > FIELD_OFFSET(FAST_IO_DISPATCH, MdlWriteComplete)) &&
+ (FastIoDispatch->MdlReadComplete != NULL)) {
+
+ FastIoDispatch->MdlReadComplete( FileObject, MdlChain, DeviceObject );
+
+ } else {
+ CcMdlReadComplete2( FileObject, MdlChain );
+ }
+}
+
+VOID
+CcMdlReadComplete2 (
+ IN PFILE_OBJECT FileObject,
+ IN PMDL MdlChain
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called at IPL0 after a call to CcMdlRead. The
+ caller must simply supply the address of the MdlChain returned in
+ CcMdlRead.
+
+ This call does the following:
+
+ Deletes the MdlChain
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ MdlChain - same as returned from corresponding call to CcMdlRead.
+
+Return Value:
+
+ None.
+--*/
+
+{
+ PMDL MdlNext;
+
+ DebugTrace(+1, me, "CcMdlReadComplete\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace( 0, me, " MdlChain = %08lx\n", MdlChain );
+
+ //
+ // Deallocate the Mdls
+ //
+
+ while (MdlChain != NULL) {
+
+ MdlNext = MdlChain->Next;
+
+ DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", MdlChain );
+
+ MmUnlockPages( MdlChain );
+
+ IoFreeMdl( MdlChain );
+
+ MdlChain = MdlNext;
+ }
+
+ DebugTrace(-1, me, "CcMdlReadComplete -> VOID\n", 0 );
+}
+
+
+VOID
+CcPrepareMdlWrite (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ OUT PMDL *MdlChain,
+ OUT PIO_STATUS_BLOCK IoStatus
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to lock the specified file data in the cache
+ and return a description of it in an Mdl along with the correct
+ I/O status. Pages to be completely overwritten may be satisfied
+ with emtpy pages. It is *not* safe to call this routine from Dpc level.
+
+ This call is synchronous and raises on error.
+
+ When this call returns, the caller may immediately begin
+ to transfer data into the buffers via the Mdl.
+
+ When the call returns with TRUE, the pages described by the Mdl are
+ locked in memory, but not mapped in system space. If the caller
+ needs the pages mapped in system space, then it must map them.
+ On the subsequent call to CcMdlWriteComplete the pages will be
+ unmapped if they were mapped, and in any case unlocked and the Mdl
+ deallocated.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ MdlChain - On output it returns a pointer to an Mdl chain describing
+ the desired data. Note that even if FALSE is returned,
+ one or more Mdls may have been allocated, as may be ascertained
+ by the IoStatus.Information field (see below).
+
+ IoStatus - Pointer to standard I/O status block to receive the status
+ for the in-transfer of the data. (STATUS_SUCCESS guaranteed
+ for cache hits, otherwise the actual I/O status is returned.)
+ The I/O Information Field indicates how many bytes have been
+ successfully locked down in the Mdl Chain.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVOID CacheBuffer;
+ LARGE_INTEGER FOffset;
+ PVACB Vacb;
+ PMDL Mdl;
+ PMDL MdlTemp;
+ LARGE_INTEGER Temp;
+ ULONG SavedState = 0;
+ ULONG ZeroFlags = 0;
+ ULONG Information = 0;
+
+ KIRQL OldIrql;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PVACB ActiveVacb = NULL;
+
+ DebugTrace(+1, me, "CcPrepareMdlWrite\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart,
+ FileOffset->HighPart );
+ DebugTrace( 0, me, " Length = %08lx\n", Length );
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // See if we have an active Vacb, that we need to free.
+ //
+
+ GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty );
+
+ //
+ // If there is an end of a page to be zeroed, then free that page now,
+ // so it does not cause our data to get zeroed. If there is an active
+ // page, free it so we have the correct ValidDataGoal.
+ //
+
+ if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) {
+
+ CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ FOffset = *FileOffset;
+
+ //
+ // Put try-finally around the loop to deal with exceptions
+ //
+
+ try {
+
+ //
+ // Not all of the transfer will come back at once, so we have to loop
+ // until the entire transfer is complete.
+ //
+
+ while (Length != 0) {
+
+ ULONG ReceivedLength;
+ LARGE_INTEGER BeyondLastByte;
+
+ //
+ // Calculate how much we could potentially access at this
+ // FileOffset, then cut it down if it is more than we need.
+ //
+
+ ReceivedLength = VACB_MAPPING_GRANULARITY -
+ (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1));
+
+ if (ReceivedLength > Length) {
+ ReceivedLength = Length;
+ }
+
+ BeyondLastByte.QuadPart = FOffset.QuadPart + (LONGLONG)ReceivedLength;
+
+ //
+ // At this point we can calculate the ZeroFlags.
+ //
+
+ //
+ // We can always zero middle pages, if any.
+ //
+
+ ZeroFlags = ZERO_MIDDLE_PAGES;
+
+ //
+ // See if we are completely overwriting the first or last page.
+ //
+
+ if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) &&
+ (ReceivedLength >= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_FIRST_PAGE;
+ }
+
+ if ((BeyondLastByte.LowPart & (PAGE_SIZE - 1)) == 0) {
+ ZeroFlags |= ZERO_LAST_PAGE;
+ }
+
+ //
+ // See if the entire transfer is beyond valid data length,
+ // or at least starting from the second page.
+ //
+
+ Temp = FOffset;
+ Temp.LowPart &= ~(PAGE_SIZE -1);
+ Temp.QuadPart = SharedCacheMap->ValidDataGoal.QuadPart - Temp.QuadPart;
+
+ if (Temp.QuadPart <= 0) {
+ ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ } else if ((Temp.HighPart == 0) && (Temp.LowPart <= PAGE_SIZE)) {
+ ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE;
+ }
+
+ (VOID)CcMapAndRead( SharedCacheMap,
+ &FOffset,
+ ReceivedLength,
+ ZeroFlags,
+ TRUE,
+ &Vacb,
+ &CacheBuffer );
+
+ //
+ // Now attempt to allocate an Mdl to describe the mapped data.
+ //
+
+ DebugTrace( 0, mm, "IoAllocateMdl:\n", 0 );
+ DebugTrace( 0, mm, " BaseAddress = %08lx\n", CacheBuffer );
+ DebugTrace( 0, mm, " Length = %08lx\n", ReceivedLength );
+
+ Mdl = IoAllocateMdl( CacheBuffer,
+ ReceivedLength,
+ FALSE,
+ FALSE,
+ NULL );
+
+ DebugTrace( 0, mm, " <Mdl = %08lx\n", Mdl );
+
+ if (Mdl == NULL) {
+ DebugTrace( 0, 0, "Failed to allocate Mdl\n", 0 );
+
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+
+ DebugTrace( 0, mm, "MmProbeAndLockPages:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", Mdl );
+
+ //
+ // Note that this probe should never fail, because we can
+ // trust the address returned from CcPinFileData. Therefore,
+ // if we succeed in allocating the Mdl above, we should
+ // manage to elude any expected exceptions through the end
+ // of this loop.
+ //
+
+ MmDisablePageFaultClustering(&SavedState);
+ MmProbeAndLockPages( Mdl, KernelMode, IoWriteAccess );
+ MmEnablePageFaultClustering(SavedState);
+ SavedState = 0;
+
+ //
+ // Now that some data (maybe zeros) is locked in memory and
+ // set dirty, it is safe, and necessary for us to advance
+ // valid data goal, so that we will not subsequently ask
+ // for a zero page. Note if we are extending valid data,
+ // our caller has the file exclusive.
+ //
+
+ if (BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) {
+ SharedCacheMap->ValidDataGoal = BeyondLastByte;
+ }
+
+ //
+ // Unmap the data now, now that the pages are locked down.
+ //
+
+ CcFreeVirtualAddress( Vacb );
+ Vacb = NULL;
+
+ //
+ // Now link the Mdl into the caller's chain
+ //
+
+ if ( *MdlChain == NULL ) {
+ *MdlChain = Mdl;
+ } else {
+ MdlTemp = CONTAINING_RECORD( *MdlChain, MDL, Next );
+ while (MdlTemp->Next != NULL) {
+ MdlTemp = MdlTemp->Next;
+ }
+ MdlTemp->Next = Mdl;
+ }
+
+ //
+ // Assume we did not get all the data we wanted, and set FOffset
+ // to the end of the returned data.
+ //
+
+ FOffset = BeyondLastByte;
+
+ //
+ // Update number of bytes transferred.
+ //
+
+ Information += ReceivedLength;
+
+ //
+ // Calculate length left to transfer.
+ //
+
+ Length -= ReceivedLength;
+ }
+ }
+ finally {
+
+ if (AbnormalTermination()) {
+
+ if (SavedState != 0) {
+ MmEnablePageFaultClustering(SavedState);
+ }
+
+ if (Vacb != NULL) {
+ CcFreeVirtualAddress( Vacb );
+ }
+
+ //
+ // Otherwise loop to deallocate the Mdls
+ //
+
+ FOffset = *FileOffset;
+ while (*MdlChain != NULL) {
+ MdlTemp = (*MdlChain)->Next;
+
+ DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", *MdlChain );
+
+ MmUnlockPages( *MdlChain );
+
+ //
+ // Extract the File Offset for this part of the transfer, and
+ // tell the lazy writer to write these pages, since we have
+ // marked them dirty. Ignore the only exception (allocation
+ // error), and console ourselves for having tried.
+ //
+
+ //
+ // try-except does not work on MS compiler. We can accept
+ // leaving a few good pages dirty...
+ //
+ // try {
+ // CcSetDirtyInMask( SharedCacheMap, &FOffset, (*MdlChain)->ByteCount );
+ // } except( CcCopyReadExceptionFilter( GetExceptionInformation(), &ExceptionCode ) ) {
+ // NOTHING;
+ // }
+
+ FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)((*MdlChain)->ByteCount);
+
+ IoFreeMdl( *MdlChain );
+
+ *MdlChain = MdlTemp;
+ }
+
+ DebugTrace(-1, me, "CcPrepareMdlWrite -> Unwinding\n", 0 );
+ }
+ else {
+
+ IoStatus->Status = STATUS_SUCCESS;
+ IoStatus->Information = Information;
+
+ //
+ // Make sure the SharedCacheMap does not go away while
+ // the Mdl write is in progress. We decrment below.
+ //
+
+ ExAcquireFastLock( &CcMasterSpinLock, &OldIrql );
+ SharedCacheMap->OpenCount += 1;
+ ExReleaseFastLock( &CcMasterSpinLock, OldIrql );
+ }
+ }
+
+ DebugTrace( 0, me, " <MdlChain = %08lx\n", *MdlChain );
+ DebugTrace(-1, me, "CcPrepareMdlWrite -> VOID\n", 0 );
+
+ return;
+}
+
+
+//
+// First we have the old routine which checks for an entry in the FastIo vector.
+// This routine becomes obsolete for every component that compiles with the new
+// definition of FsRtlMdlWriteComplete in fsrtl.h.
+//
+
+VOID
+CcMdlWriteComplete (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN PMDL MdlChain
+ )
+
+{
+ PDEVICE_OBJECT DeviceObject;
+ PFAST_IO_DISPATCH FastIoDispatch;
+
+ DeviceObject = IoGetRelatedDeviceObject( FileObject );
+ FastIoDispatch = DeviceObject->DriverObject->FastIoDispatch;
+
+ if ((FastIoDispatch != NULL) &&
+ (FastIoDispatch->SizeOfFastIoDispatch > FIELD_OFFSET(FAST_IO_DISPATCH, MdlWriteComplete)) &&
+ (FastIoDispatch->MdlWriteComplete != NULL)) {
+
+ FastIoDispatch->MdlWriteComplete( FileObject, FileOffset, MdlChain, DeviceObject );
+
+ } else {
+ CcMdlWriteComplete2( FileObject, FileOffset, MdlChain );
+ }
+}
+
+VOID
+CcMdlWriteComplete2 (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN PMDL MdlChain
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called at IPL0 after a call to CcPrepareMdlWrite.
+ The caller supplies the ActualLength of data that it actually wrote
+ into the buffer, which may be less than or equal to the Length specified
+ in CcPrepareMdlWrite.
+
+ This call does the following:
+
+ Makes sure the data up to ActualLength eventually gets written.
+ If WriteThrough is FALSE, the data will not be written immediately.
+ If WriteThrough is TRUE, then the data is written synchronously.
+
+ Unmaps the pages (if mapped), unlocks them and deletes the MdlChain
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Original file offset read above.
+
+ MdlChain - same as returned from corresponding call to CcPrepareMdlWrite.
+
+Return Value:
+
+ None
+
+--*/
+
+{
+ PMDL MdlNext;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ LARGE_INTEGER FOffset;
+ IO_STATUS_BLOCK IoStatus;
+ KIRQL OldIrql;
+ NTSTATUS StatusToRaise = STATUS_SUCCESS;
+
+ DebugTrace(+1, me, "CcMdlWriteComplete\n", 0 );
+ DebugTrace( 0, me, " FileObject = %08lx\n", FileObject );
+ DebugTrace( 0, me, " MdlChain = %08lx\n", MdlChain );
+
+ SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap;
+
+ //
+ // Deallocate the Mdls
+ //
+
+ FOffset.QuadPart = *(LONGLONG UNALIGNED *)FileOffset;
+ while (MdlChain != NULL) {
+
+ MdlNext = MdlChain->Next;
+
+ DebugTrace( 0, mm, "MmUnlockPages/IoFreeMdl:\n", 0 );
+ DebugTrace( 0, mm, " Mdl = %08lx\n", MdlChain );
+
+ //
+ // Now clear the dirty bits in the Pte and set them in the
+ // Pfn.
+ //
+
+ MmUnlockPages( MdlChain );
+
+ //
+ // Extract the File Offset for this part of the transfer.
+ //
+
+ if (FlagOn(FileObject->Flags, FO_WRITE_THROUGH)) {
+
+ MmFlushSection ( FileObject->SectionObjectPointer,
+ &FOffset,
+ MdlChain->ByteCount,
+ &IoStatus,
+ TRUE );
+
+ //
+ // If we got an I/O error, remember it.
+ //
+
+ if (!NT_SUCCESS(IoStatus.Status)) {
+ StatusToRaise = IoStatus.Status;
+ }
+
+ } else {
+
+ NTSTATUS ExceptionCode;
+
+ //
+ // Ignore the only exception (allocation error), and console
+ // ourselves for having tried.
+ //
+
+ try {
+ CcSetDirtyInMask( SharedCacheMap, &FOffset, MdlChain->ByteCount );
+ } except( CcCopyReadExceptionFilter( GetExceptionInformation(), &ExceptionCode ) ) {
+ StatusToRaise = STATUS_INSUFFICIENT_RESOURCES;
+ }
+ }
+
+ FOffset.QuadPart = FOffset.QuadPart + (LONGLONG)(MdlChain->ByteCount);
+
+ IoFreeMdl( MdlChain );
+
+ MdlChain = MdlNext;
+ }
+
+ //
+ // Now release our open count.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+
+ SharedCacheMap->OpenCount -= 1;
+
+ if ((SharedCacheMap->OpenCount == 0) &&
+ !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) &&
+ (SharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &SharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // If we got an I/O error, raise it now.
+ //
+
+ if (!NT_SUCCESS(StatusToRaise)) {
+ FsRtlNormalizeNtstatus( StatusToRaise,
+ STATUS_UNEXPECTED_IO_ERROR );
+ }
+
+ DebugTrace(-1, me, "CcMdlWriteComplete -> TRUE\n", 0 );
+
+ return;
+}
+
+
+
diff --git a/private/ntos/cache/mp/makefile b/private/ntos/cache/mp/makefile
new file mode 100644
index 000000000..6ee4f43fa
--- /dev/null
+++ b/private/ntos/cache/mp/makefile
@@ -0,0 +1,6 @@
+#
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source
+# file to this component. This file merely indirects to the real make file
+# that is shared by all the components of NT OS/2
+#
+!INCLUDE $(NTMAKEENV)\makefile.def
diff --git a/private/ntos/cache/mp/sources b/private/ntos/cache/mp/sources
new file mode 100644
index 000000000..dbeb18d62
--- /dev/null
+++ b/private/ntos/cache/mp/sources
@@ -0,0 +1,29 @@
+!IF 0
+
+Copyright (c) 1989 Microsoft Corporation
+
+Module Name:
+
+ sources.
+
+Abstract:
+
+ This file specifies the target component being built and the list of
+ sources files needed to build that component. Also specifies optional
+ compiler switches and libraries that are unique for the component being
+ built.
+
+
+Author:
+
+ Steve Wood (stevewo) 12-Apr-1990
+
+NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl
+
+!ENDIF
+
+NT_UP=0
+
+TARGETPATH=..\..\mpobj
+
+!include ..\sources.inc
diff --git a/private/ntos/cache/pinsup.c b/private/ntos/cache/pinsup.c
new file mode 100644
index 000000000..142806b1a
--- /dev/null
+++ b/private/ntos/cache/pinsup.c
@@ -0,0 +1,1274 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ pinsup.c
+
+Abstract:
+
+ This module implements the pointer-based Pin support routines for the
+ Cache subsystem.
+
+Author:
+
+ Tom Miller [TomM] 4-June-1990
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Define our debug constant
+//
+
+#define me 0x00000008
+
+#if LIST_DBG
+
+#define SetCallersAddress(BCB) { \
+ RtlGetCallersAddress( &(BCB)->CallerAddress, \
+ &(BCB)->CallersCallerAddress ); \
+}
+
+#endif
+
+//
+// Internal routines
+//
+
+POBCB
+CcAllocateObcb (
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN PBCB FirstBcb
+ );
+
+
+BOOLEAN
+CcMapData (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Wait,
+ OUT PVOID *Bcb,
+ OUT PVOID *Buffer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to map the specified file data in the cache.
+ A pointer is returned to the desired data in the cache.
+
+ If the caller does not want to block on this call, then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to supply the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ data is immediately accessible in the cache and no blocking is
+ required, this routine returns TRUE with a pointer to the data.
+
+ Note that a call to this routine with Wait supplied as TRUE is
+ considerably faster than a call with Wait supplies as FALSE, because
+ in the Wait TRUE case we only have to make sure the data is mapped
+ in order to return.
+
+ It is illegal to modify data that is only mapped, and can in fact lead
+ to serious problems. It is impossible to check for this in all cases,
+ however CcSetDirtyPinnedData may implement some Assertions to check for
+ this. If the caller wishes to modify data that it has only mapped, then
+ it must *first* call CcPinMappedData.
+
+ In any case, the caller MUST subsequently call CcUnpinData.
+ Naturally if CcPinRead or CcPreparePinWrite were called multiple
+ times for the same data, CcUnpinData must be called the same number
+ of times.
+
+ The returned Buffer pointer is valid until the data is unpinned, at
+ which point it is invalid to use the pointer further. This buffer pointer
+ will remain valid if CcPinMappedData is called.
+
+ Note that under some circumstances (like Wait supplied as FALSE or more
+ than a page is requested), this routine may actually pin the data, however
+ it is not necessary, and in fact not correct, for the caller to be concerned
+ about this.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+ Bcb - On the first call this returns a pointer to a Bcb
+ parameter which must be supplied as input on all subsequent
+ calls, for this buffer
+
+ Buffer - Returns pointer to desired data, valid until the buffer is
+ unpinned or freed. This pointer will remain valid if CcPinMappedData
+ is called.
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not delivered
+
+ TRUE - if the data is being delivered
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ LARGE_INTEGER BeyondLastByte;
+ ULONG ReceivedLength;
+ ULONG SavedState;
+ volatile UCHAR ch;
+ ULONG PageCount = COMPUTE_PAGES_SPANNED(((PVOID)FileOffset->LowPart), Length);
+ PETHREAD Thread = PsGetCurrentThread();
+
+ DebugTrace(+1, me, "CcMapData\n", 0 );
+
+ MmSavePageFaultReadAhead( Thread, &SavedState );
+
+ //
+ // Increment performance counters
+ //
+
+ if (Wait) {
+
+ CcMapDataWait += 1;
+
+ //
+ // Initialize the indirect pointer to our miss counter.
+ //
+
+ CcMissCounter = &CcMapDataWaitMiss;
+
+ } else {
+ CcMapDataNoWait += 1;
+ }
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+
+ //
+ // Call local routine to Map or Access the file data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+
+ if (Wait) {
+
+ *Buffer = CcGetVirtualAddress( SharedCacheMap,
+ *FileOffset,
+ (PVACB *)Bcb,
+ &ReceivedLength );
+
+ ASSERT( ReceivedLength >= Length );
+
+ } else if (!CcPinFileData( FileObject,
+ FileOffset,
+ Length,
+ TRUE,
+ FALSE,
+ Wait,
+ (PBCB *)Bcb,
+ Buffer,
+ &BeyondLastByte )) {
+
+ DebugTrace(-1, me, "CcMapData -> FALSE\n", 0 );
+
+ CcMapDataNoWaitMiss += 1;
+
+ return FALSE;
+
+ } else {
+
+ ASSERT( (BeyondLastByte.QuadPart - FileOffset->QuadPart) >= Length );
+
+#if LIST_DBG
+ {
+ KIRQL OldIrql;
+ PBCB BcbTemp = (PBCB)*Bcb;
+
+ ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql );
+
+ if (BcbTemp->CcBcbLinks.Flink == NULL) {
+
+ InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks );
+ CcBcbCount += 1;
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ SetCallersAddress( BcbTemp );
+
+ } else {
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ }
+
+ }
+#endif
+
+ }
+
+ //
+ // Now let's just sit here and take the miss(es) like a man (and count them).
+ //
+
+ try {
+
+ //
+ // Loop to touch each page
+ //
+
+ BeyondLastByte.LowPart = 0;
+
+ while (PageCount != 0) {
+
+ MmSetPageFaultReadAhead( Thread, PageCount - 1 );
+
+ ch = *((volatile UCHAR *)(*Buffer) + BeyondLastByte.LowPart);
+
+ BeyondLastByte.LowPart += PAGE_SIZE;
+ PageCount -= 1;
+ }
+
+ } finally {
+
+ MmResetPageFaultReadAhead( Thread, SavedState );
+
+ if (AbnormalTermination() && (*Bcb != NULL)) {
+ CcUnpinFileData( (PBCB)*Bcb, TRUE, UNPIN );
+ *Bcb = NULL;
+ }
+ }
+
+ CcMissCounter = &CcThrowAway;
+
+ //
+ // Increment the pointer as a reminder that it is read only.
+ //
+
+ *(PCHAR *)Bcb += 1;
+
+ DebugTrace(-1, me, "CcMapData -> TRUE\n", 0 );
+
+ return TRUE;
+}
+
+
+BOOLEAN
+CcPinMappedData (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Wait,
+ IN OUT PVOID *Bcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to pin data that was previously only mapped.
+ If the routine determines that in fact it was necessary to actually
+ pin the data when CcMapData was called, then this routine does not
+ have to do anything.
+
+ If the caller does not want to block on this call, then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to supply the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ data is immediately accessible in the cache and no blocking is
+ required, this routine returns TRUE with a pointer to the data.
+
+ If the data is not returned in the first call, the caller
+ may request the data later with Wait = TRUE. It is not required
+ that the caller request the data later.
+
+ If the caller subsequently modifies the data, it should call
+ CcSetDirtyPinnedData.
+
+ In any case, the caller MUST subsequently call CcUnpinData.
+ Naturally if CcPinRead or CcPreparePinWrite were called multiple
+ times for the same data, CcUnpinData must be called the same number
+ of times.
+
+ Note there are no performance counters in this routine, as the misses
+ will almost always occur on the map above, and there will seldom be a
+ miss on this conversion.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+ Bcb - On the first call this returns a pointer to a Bcb
+ parameter which must be supplied as input on all subsequent
+ calls, for this buffer
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not delivered
+
+ TRUE - if the data is being delivered
+
+--*/
+
+{
+ PVOID Buffer;
+ LARGE_INTEGER BeyondLastByte;
+ PSHARED_CACHE_MAP SharedCacheMap;
+ LARGE_INTEGER LocalFileOffset = *FileOffset;
+ POBCB MyBcb = NULL;
+ PBCB *CurrentBcbPtr = (PBCB *)&MyBcb;
+ BOOLEAN Result = FALSE;
+
+ DebugTrace(+1, me, "CcPinMappedData\n", 0 );
+
+ //
+ // If the Bcb is no longer ReadOnly, then just return.
+ //
+
+ if ((*(PULONG)Bcb & 1) == 0) {
+ return TRUE;
+ }
+
+ //
+ // Remove the Read Only flag
+ //
+
+ *(PCHAR *)Bcb -= 1;
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+
+ //
+ // We only count the calls to this routine, since they are almost guaranteed
+ // to be hits.
+ //
+
+ CcPinMappedDataCount += 1;
+
+ //
+ // Guarantee we will put the flag back if required.
+ //
+
+ try {
+
+ if (((PBCB)*Bcb)->NodeTypeCode != CACHE_NTC_BCB) {
+
+ //
+ // Form loop to handle occassional overlapped Bcb case.
+ //
+
+ do {
+
+ //
+ // If we have already been through the loop, then adjust
+ // our file offset and length from the last time.
+ //
+
+ if (MyBcb != NULL) {
+
+ //
+ // If this is the second time through the loop, then it is time
+ // to handle the overlap case and allocate an OBCB.
+ //
+
+ if (CurrentBcbPtr == (PBCB *)&MyBcb) {
+
+ MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb );
+
+ //
+ // Set CurrentBcbPtr to point at the first entry in
+ // the vector (which is already filled in), before
+ // advancing it below.
+ //
+
+ CurrentBcbPtr = &MyBcb->Bcbs[0];
+ }
+
+ Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart);
+ LocalFileOffset.QuadPart = BeyondLastByte.QuadPart;
+ CurrentBcbPtr += 1;
+ }
+
+ //
+ // Call local routine to Map or Access the file data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+
+ if (!CcPinFileData( FileObject,
+ &LocalFileOffset,
+ Length,
+ (BOOLEAN)!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED),
+ FALSE,
+ Wait,
+ CurrentBcbPtr,
+ &Buffer,
+ &BeyondLastByte )) {
+
+ try_return( Result = FALSE );
+ }
+
+ //
+ // Continue looping if we did not get everything.
+ //
+
+ } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length);
+
+ //
+ // Free the Vacb before going on.
+ //
+
+ CcFreeVirtualAddress( (PVACB)*Bcb );
+
+ *Bcb = MyBcb;
+
+ //
+ // Debug routines used to insert and remove Bcbs from the global list
+ //
+
+#if LIST_DBG
+ {
+ KIRQL OldIrql;
+ PBCB BcbTemp = (PBCB)*Bcb;
+
+ ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql );
+
+ if (BcbTemp->CcBcbLinks.Flink == NULL) {
+
+ InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks );
+ CcBcbCount += 1;
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ SetCallersAddress( BcbTemp );
+
+ } else {
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ }
+
+ }
+#endif
+ }
+
+ //
+ // If he really has a Bcb, all we have to do is acquire it shared since he is
+ // no longer ReadOnly.
+ //
+
+ else {
+
+ if (!ExAcquireSharedStarveExclusive( &((PBCB)*Bcb)->Resource, Wait )) {
+
+ try_return( Result = FALSE );
+ }
+ }
+
+ Result = TRUE;
+
+ try_exit: NOTHING;
+ }
+ finally {
+
+ if (!Result) {
+
+ //
+ // Put the Read Only flag back
+ //
+
+ *(PCHAR *)Bcb += 1;
+
+ //
+ // We may have gotten partway through
+ //
+
+ if (MyBcb != NULL) {
+ CcUnpinData( MyBcb );
+ }
+ }
+
+ DebugTrace(-1, me, "CcPinMappedData -> %02lx\n", Result );
+ }
+ return Result;
+}
+
+
+BOOLEAN
+CcPinRead (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Wait,
+ OUT PVOID *Bcb,
+ OUT PVOID *Buffer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to pin the specified file data in the cache.
+ A pointer is returned to the desired data in the cache. This routine
+ is intended for File System support and is not intended to be called
+ from Dpc level.
+
+ If the caller does not want to block on this call, then
+ Wait should be supplied as FALSE. If Wait was supplied as FALSE and
+ it is currently impossible to supply the requested data without
+ blocking, then this routine will return FALSE. However, if the
+ data is immediately accessible in the cache and no blocking is
+ required, this routine returns TRUE with a pointer to the data.
+
+ If the data is not returned in the first call, the caller
+ may request the data later with Wait = TRUE. It is not required
+ that the caller request the data later.
+
+ If the caller subsequently modifies the data, it should call
+ CcSetDirtyPinnedData.
+
+ In any case, the caller MUST subsequently call CcUnpinData.
+ Naturally if CcPinRead or CcPreparePinWrite were called multiple
+ times for the same data, CcUnpinData must be called the same number
+ of times.
+
+ The returned Buffer pointer is valid until the data is unpinned, at
+ which point it is invalid to use the pointer further.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ Wait - Supplies TRUE if it is ok to block the caller's thread
+ Supplies 3 if it is ok to block the caller's thread and the Bcb should
+ be exclusive
+ Supplies FALSE if it is not ok to block the caller's thread
+
+ Bcb - On the first call this returns a pointer to a Bcb
+ parameter which must be supplied as input on all subsequent
+ calls, for this buffer
+
+ Buffer - Returns pointer to desired data, valid until the buffer is
+ unpinned or freed.
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the data was not delivered
+
+ TRUE - if the data is being delivered
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVOID LocalBuffer;
+ LARGE_INTEGER BeyondLastByte;
+ LARGE_INTEGER LocalFileOffset = *FileOffset;
+ POBCB MyBcb = NULL;
+ PBCB *CurrentBcbPtr = (PBCB *)&MyBcb;
+ BOOLEAN Result = FALSE;
+
+ DebugTrace(+1, me, "CcPinRead\n", 0 );
+
+ //
+ // Increment performance counters
+ //
+
+ if (Wait) {
+
+ CcPinReadWait += 1;
+
+ //
+ // Initialize the indirect pointer to our miss counter.
+ //
+
+ CcMissCounter = &CcPinReadWaitMiss;
+
+ } else {
+ CcPinReadNoWait += 1;
+ }
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+
+ try {
+
+ //
+ // Form loop to handle occassional overlapped Bcb case.
+ //
+
+ do {
+
+ //
+ // If we have already been through the loop, then adjust
+ // our file offset and length from the last time.
+ //
+
+ if (MyBcb != NULL) {
+
+ //
+ // If this is the second time through the loop, then it is time
+ // to handle the overlap case and allocate an OBCB.
+ //
+
+ if (CurrentBcbPtr == (PBCB *)&MyBcb) {
+
+ MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb );
+
+ //
+ // Set CurrentBcbPtr to point at the first entry in
+ // the vector (which is already filled in), before
+ // advancing it below.
+ //
+
+ CurrentBcbPtr = &MyBcb->Bcbs[0];
+
+ //
+ // Also on second time through, return starting Buffer
+ //
+
+ *Buffer = LocalBuffer;
+ }
+
+ Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart);
+ LocalFileOffset.QuadPart = BeyondLastByte.QuadPart;
+ CurrentBcbPtr += 1;
+ }
+
+ //
+ // Call local routine to Map or Access the file data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+
+ if (!CcPinFileData( FileObject,
+ &LocalFileOffset,
+ Length,
+ (BOOLEAN)!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED),
+ FALSE,
+ Wait,
+ CurrentBcbPtr,
+ &LocalBuffer,
+ &BeyondLastByte )) {
+
+ CcPinReadNoWaitMiss += 1;
+
+ try_return( Result = FALSE );
+ }
+
+ //
+ // Continue looping if we did not get everything.
+ //
+
+ } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length);
+
+ *Bcb = MyBcb;
+
+ //
+ // Debug routines used to insert and remove Bcbs from the global list
+ //
+
+#if LIST_DBG
+
+ {
+ KIRQL OldIrql;
+ PBCB BcbTemp = (PBCB)*Bcb;
+
+ ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql );
+
+ if (BcbTemp->CcBcbLinks.Flink == NULL) {
+
+ InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks );
+ CcBcbCount += 1;
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ SetCallersAddress( BcbTemp );
+
+ } else {
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ }
+
+ }
+
+#endif
+
+ //
+ // In the normal (nonoverlapping) case we return the
+ // correct buffer address here.
+ //
+
+ if (CurrentBcbPtr == (PBCB *)&MyBcb) {
+ *Buffer = LocalBuffer;
+ }
+
+ Result = TRUE;
+
+ try_exit: NOTHING;
+ }
+ finally {
+
+ CcMissCounter = &CcThrowAway;
+
+ if (!Result) {
+
+ //
+ // We may have gotten partway through
+ //
+
+ if (MyBcb != NULL) {
+ CcUnpinData( MyBcb );
+ }
+ }
+
+ DebugTrace(-1, me, "CcPinRead -> %02lx\n", Result );
+ }
+
+ return Result;
+}
+
+
+BOOLEAN
+CcPreparePinWrite (
+ IN PFILE_OBJECT FileObject,
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN BOOLEAN Zero,
+ IN BOOLEAN Wait,
+ OUT PVOID *Bcb,
+ OUT PVOID *Buffer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine attempts to lock the specified file data in the cache
+ and return a pointer to it along with the correct
+ I/O status. Pages to be completely overwritten may be satisfied
+ with emtpy pages.
+
+ If not all of the pages can be prepared, and Wait was supplied as
+ FALSE, then this routine will return FALSE, and its outputs will
+ be meaningless. The caller may request the data later with
+ Wait = TRUE. However, it is not required that the caller request
+ the data later.
+
+ If Wait is supplied as TRUE, and all of the pages can be prepared
+ without blocking, this call will return TRUE immediately. Otherwise,
+ this call will block until all of the pages can be prepared, and
+ then return TRUE.
+
+ When this call returns with TRUE, the caller may immediately begin
+ to transfer data into the buffers via the Buffer pointer. The
+ buffer will already be marked dirty.
+
+ The caller MUST subsequently call CcUnpinData.
+ Naturally if CcPinRead or CcPreparePinWrite were called multiple
+ times for the same data, CcUnpinData must be called the same number
+ of times.
+
+ The returned Buffer pointer is valid until the data is unpinned, at
+ which point it is invalid to use the pointer further.
+
+Arguments:
+
+ FileObject - Pointer to the file object for a file which was
+ opened with NO_INTERMEDIATE_BUFFERING clear, i.e., for
+ which CcInitializeCacheMap was called by the file system.
+
+ FileOffset - Byte offset in file for desired data.
+
+ Length - Length of desired data in bytes.
+
+ Zero - If supplied as TRUE, the buffer will be zeroed on return.
+
+ Wait - FALSE if caller may not block, TRUE otherwise (see description
+ above)
+
+ Bcb - This returns a pointer to a Bcb parameter which must be
+ supplied as input to CcPinWriteComplete.
+
+ Buffer - Returns pointer to desired data, valid until the buffer is
+ unpinned or freed.
+
+Return Value:
+
+ FALSE - if Wait was supplied as FALSE and the pages were not delivered
+
+ TRUE - if the pages are being delivered
+
+--*/
+
+{
+ PSHARED_CACHE_MAP SharedCacheMap;
+ PVOID LocalBuffer;
+ LARGE_INTEGER BeyondLastByte;
+ LARGE_INTEGER LocalFileOffset = *FileOffset;
+ POBCB MyBcb = NULL;
+ PBCB *CurrentBcbPtr = (PBCB *)&MyBcb;
+ ULONG OriginalLength = Length;
+ BOOLEAN Result = FALSE;
+
+ DebugTrace(+1, me, "CcPreparePinWrite\n", 0 );
+
+ //
+ // Get pointer to SharedCacheMap.
+ //
+
+ SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer
+ + sizeof(PVOID));
+
+ try {
+
+ //
+ // Form loop to handle occassional overlapped Bcb case.
+ //
+
+ do {
+
+ //
+ // If we have already been through the loop, then adjust
+ // our file offset and length from the last time.
+ //
+
+ if (MyBcb != NULL) {
+
+ //
+ // If this is the second time through the loop, then it is time
+ // to handle the overlap case and allocate an OBCB.
+ //
+
+ if (CurrentBcbPtr == (PBCB *)&MyBcb) {
+
+ MyBcb = CcAllocateObcb( FileOffset, Length, (PBCB)MyBcb );
+
+ //
+ // Set CurrentBcbPtr to point at the first entry in
+ // the vector (which is already filled in), before
+ // advancing it below.
+ //
+
+ CurrentBcbPtr = &MyBcb->Bcbs[0];
+
+ //
+ // Also on second time through, return starting Buffer
+ //
+
+ *Buffer = LocalBuffer;
+ }
+
+ Length -= (ULONG)(BeyondLastByte.QuadPart - LocalFileOffset.QuadPart);
+ LocalFileOffset.QuadPart = BeyondLastByte.QuadPart;
+ CurrentBcbPtr += 1;
+ }
+
+ //
+ // Call local routine to Map or Access the file data. If we cannot map
+ // the data because of a Wait condition, return FALSE.
+ //
+
+ if (!CcPinFileData( FileObject,
+ &LocalFileOffset,
+ Length,
+ FALSE,
+ TRUE,
+ Wait,
+ CurrentBcbPtr,
+ &LocalBuffer,
+ &BeyondLastByte )) {
+
+ try_return( Result = FALSE );
+ }
+
+ //
+ // Continue looping if we did not get everything.
+ //
+
+ } while((BeyondLastByte.QuadPart - LocalFileOffset.QuadPart) < Length);
+
+ *Bcb = MyBcb;
+
+ //
+ // Debug routines used to insert and remove Bcbs from the global list
+ //
+
+#if LIST_DBG
+
+ {
+ KIRQL OldIrql;
+ PBCB BcbTemp = (PBCB)*Bcb;
+
+ ExAcquireSpinLock( &CcBcbSpinLock, &OldIrql );
+
+ if (BcbTemp->CcBcbLinks.Flink == NULL) {
+
+ InsertTailList( &CcBcbList, &BcbTemp->CcBcbLinks );
+ CcBcbCount += 1;
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ SetCallersAddress( BcbTemp );
+
+ } else {
+ ExReleaseSpinLock( &CcBcbSpinLock, OldIrql );
+ }
+
+ }
+
+#endif
+
+ //
+ // In the normal (nonoverlapping) case we return the
+ // correct buffer address here.
+ //
+
+ if (CurrentBcbPtr == (PBCB *)&MyBcb) {
+ *Buffer = LocalBuffer;
+ }
+
+ if (Zero) {
+ RtlZeroMemory( *Buffer, OriginalLength );
+ }
+
+ CcSetDirtyPinnedData( MyBcb, NULL );
+
+ Result = TRUE;
+
+ try_exit: NOTHING;
+ }
+ finally {
+
+ CcMissCounter = &CcThrowAway;
+
+ if (!Result) {
+
+ //
+ // We may have gotten partway through
+ //
+
+ if (MyBcb != NULL) {
+ CcUnpinData( MyBcb );
+ }
+ }
+
+ DebugTrace(-1, me, "CcPreparePinWrite -> %02lx\n", Result );
+ }
+
+ return Result;
+}
+
+
+VOID
+CcUnpinData (
+ IN PVOID Bcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called at IPL0, some time after calling CcPinRead
+ or CcPreparePinWrite. It performs any cleanup that is necessary.
+
+Arguments:
+
+ Bcb - Bcb parameter returned from the last call to CcPinRead.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ DebugTrace(+1, me, "CcUnpinData:\n", 0 );
+ DebugTrace( 0, me, " >Bcb = %08lx\n", Bcb );
+
+ //
+ // Test for ReadOnly and unpin accordingly.
+ //
+
+ if (((ULONG)Bcb & 1) != 0) {
+
+ //
+ // Remove the Read Only flag
+ //
+
+ (PCHAR)Bcb -= 1;
+
+ CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
+
+ } else {
+
+ //
+ // Handle the overlapped Bcb case.
+ //
+
+ if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) {
+
+ PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0];
+
+ //
+ // Loop to free all Bcbs with recursive calls
+ // (rather than dealing with RO for this uncommon case).
+ //
+
+ while (*BcbPtrPtr != NULL) {
+ CcUnpinData(*(BcbPtrPtr++));
+ }
+
+ //
+ // Then free the pool for the Obcb
+ //
+
+ ExFreePool( Bcb );
+
+ //
+ // Otherwise, it is a normal Bcb
+ //
+
+ } else {
+ CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN );
+ }
+ }
+
+ DebugTrace(-1, me, "CcUnPinData -> VOID\n", 0 );
+}
+
+
+VOID
+CcSetBcbOwnerPointer (
+ IN PVOID Bcb,
+ IN PVOID OwnerPointer
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to set the resource owner for the Bcb resource,
+ for cases where another thread will do the unpin *and* the current thread
+ may exit.
+
+Arguments:
+
+ Bcb - Bcb parameter returned from the last call to CcPinRead.
+
+ OwnerPointer - A valid resource owner pointer, which means a pointer to
+ an allocated system address, with the low-order two bits
+ set. The address may not be deallocated until after the
+ unpin call.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ ASSERT(((ULONG)Bcb & 1) == 0);
+
+ //
+ // Handle the overlapped Bcb case.
+ //
+
+ if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) {
+
+ PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0];
+
+ //
+ // Loop to set owner for all Bcbs.
+ //
+
+ while (*BcbPtrPtr != NULL) {
+ ExSetResourceOwnerPointer( &(*BcbPtrPtr)->Resource, OwnerPointer );
+ BcbPtrPtr++;
+ }
+
+ //
+ // Otherwise, it is a normal Bcb
+ //
+
+ } else {
+
+ //
+ // Handle normal case.
+ //
+
+ ExSetResourceOwnerPointer( &((PBCB)Bcb)->Resource, OwnerPointer );
+ }
+}
+
+
+VOID
+CcUnpinDataForThread (
+ IN PVOID Bcb,
+ IN ERESOURCE_THREAD ResourceThreadId
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called at IPL0, some time after calling CcPinRead
+ or CcPreparePinWrite. It performs any cleanup that is necessary,
+ releasing the Bcb resource for the given thread.
+
+Arguments:
+
+ Bcb - Bcb parameter returned from the last call to CcPinRead.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ DebugTrace(+1, me, "CcUnpinDataForThread:\n", 0 );
+ DebugTrace( 0, me, " >Bcb = %08lx\n", Bcb );
+ DebugTrace( 0, me, " >ResoureceThreadId = %08lx\n", ResoureceThreadId );
+
+ //
+ // Test for ReadOnly and unpin accordingly.
+ //
+
+ if (((ULONG)Bcb & 1) != 0) {
+
+ //
+ // Remove the Read Only flag
+ //
+
+ (PCHAR)Bcb -= 1;
+
+ CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
+
+ } else {
+
+ //
+ // Handle the overlapped Bcb case.
+ //
+
+ if (((POBCB)Bcb)->NodeTypeCode == CACHE_NTC_OBCB) {
+
+ PBCB *BcbPtrPtr = &((POBCB)Bcb)->Bcbs[0];
+
+ //
+ // Loop to free all Bcbs with recursive calls
+ // (rather than dealing with RO for this uncommon case).
+ //
+
+ while (*BcbPtrPtr != NULL) {
+ CcUnpinDataForThread( *(BcbPtrPtr++), ResourceThreadId );
+ }
+
+ //
+ // Then free the pool for the Obcb
+ //
+
+ ExFreePool( Bcb );
+
+ //
+ // Otherwise, it is a normal Bcb
+ //
+
+ } else {
+
+ //
+ // If not readonly, we can release the resource for the thread first,
+ // and then call CcUnpinFileData. Release resource first in case
+ // Bcb gets deallocated.
+ //
+
+ ExReleaseResourceForThread( &((PBCB)Bcb)->Resource, ResourceThreadId );
+ CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN );
+ }
+ }
+ DebugTrace(-1, me, "CcUnpinDataForThread -> VOID\n", 0 );
+}
+
+
+POBCB
+CcAllocateObcb (
+ IN PLARGE_INTEGER FileOffset,
+ IN ULONG Length,
+ IN PBCB FirstBcb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine is called by the various pinning routines to allocate and
+ initialize an overlap Bcb.
+
+Arguments:
+
+ FileOffset - Starting file offset for the Obcb (An Obcb starts with a
+ public structure, which someone could use)
+
+ Length - Length of the range covered by the Obcb
+
+ FirstBcb - First Bcb already created, which only covers the start of
+ the desired range (low order bit may be set to indicate ReadOnly)
+
+Return Value:
+
+ Pointer to the allocated Obcb
+
+--*/
+
+{
+ ULONG LengthToAllocate;
+ POBCB Obcb;
+
+ //
+ // Allocate according to the worst case, assuming that we
+ // will need as many additional Bcbs as there are pages
+ // remaining. (One Bcb pointer is already in OBCB.) Also
+ // throw in one more pointer to guarantee users of the OBCB
+ // can always terminate on NULL.
+ //
+
+ LengthToAllocate = sizeof(OBCB) +
+ (((Length - ((PBCB)((ULONG)FirstBcb & ~1))->ByteLength +
+ (2 * PAGE_SIZE) - 1) / PAGE_SIZE) * sizeof(PBCB));
+
+ Obcb = FsRtlAllocatePool( NonPagedPool, LengthToAllocate );
+ RtlZeroMemory( Obcb, LengthToAllocate );
+ Obcb->NodeTypeCode = CACHE_NTC_OBCB;
+ Obcb->NodeByteSize = (USHORT)LengthToAllocate;
+ Obcb->ByteLength = Length;
+ Obcb->FileOffset = *FileOffset;
+ Obcb->Bcbs[0] = FirstBcb;
+
+ return Obcb;
+}
diff --git a/private/ntos/cache/sources.inc b/private/ntos/cache/sources.inc
new file mode 100644
index 000000000..8e6e120b6
--- /dev/null
+++ b/private/ntos/cache/sources.inc
@@ -0,0 +1,53 @@
+!IF 0
+
+Copyright (c) 1989 Microsoft Corporation
+
+Module Name:
+
+ sources.
+
+Abstract:
+
+ This file specifies the target component being built and the list of
+ sources files needed to build that component. Also specifies optional
+ compiler switches and libraries that are unique for the component being
+ built.
+
+
+Author:
+
+ Steve Wood (stevewo) 12-Apr-1990
+
+NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl
+
+!ENDIF
+
+MAJORCOMP=ntos
+MINORCOMP=cache
+
+TARGETNAME=cache
+TARGETTYPE=LIBRARY
+
+INCLUDES=..;..\..\inc
+MIPS_OPTIONS=-nodwalign
+GPSIZE=32
+
+MSC_WARNING_LEVEL=/W3 /WX
+
+C_DEFINES=$(C_DEFINES) -D_NTSYSTEM_
+
+SOURCES=..\cachedat.c \
+ ..\cachesub.c \
+ ..\copysup.c \
+ ..\fssup.c \
+ ..\lazyrite.c \
+ ..\logsup.c \
+ ..\mdlsup.c \
+ ..\pinsup.c \
+ ..\vacbsup.c
+
+PRECOMPILED_INCLUDE=..\cc.h
+PRECOMPILED_PCH=cc.pch
+PRECOMPILED_OBJ=cc.obj
+
+SOURCES_USED=..\sources.inc
diff --git a/private/ntos/cache/up/makefile b/private/ntos/cache/up/makefile
new file mode 100644
index 000000000..6ee4f43fa
--- /dev/null
+++ b/private/ntos/cache/up/makefile
@@ -0,0 +1,6 @@
+#
+# DO NOT EDIT THIS FILE!!! Edit .\sources. if you want to add a new source
+# file to this component. This file merely indirects to the real make file
+# that is shared by all the components of NT OS/2
+#
+!INCLUDE $(NTMAKEENV)\makefile.def
diff --git a/private/ntos/cache/up/sources b/private/ntos/cache/up/sources
new file mode 100644
index 000000000..6dca9c583
--- /dev/null
+++ b/private/ntos/cache/up/sources
@@ -0,0 +1,27 @@
+!IF 0
+
+Copyright (c) 1989 Microsoft Corporation
+
+Module Name:
+
+ sources.
+
+Abstract:
+
+ This file specifies the target component being built and the list of
+ sources files needed to build that component. Also specifies optional
+ compiler switches and libraries that are unique for the component being
+ built.
+
+
+Author:
+
+ Steve Wood (stevewo) 12-Apr-1990
+
+NOTE: Commented description of this file is in \nt\bak\bin\sources.tpl
+
+!ENDIF
+
+TARGETPATH=..\..\obj
+
+!include ..\sources.inc
diff --git a/private/ntos/cache/vacbsup.c b/private/ntos/cache/vacbsup.c
new file mode 100644
index 000000000..d1e0e09f9
--- /dev/null
+++ b/private/ntos/cache/vacbsup.c
@@ -0,0 +1,1421 @@
+/*++
+
+Copyright (c) 1990 Microsoft Corporation
+
+Module Name:
+
+ vacbsup.c
+
+Abstract:
+
+ This module implements the support routines for the Virtual Address
+ Control Block support for the Cache Manager. These routines are used
+ to manage a large number of relatively small address windows to map
+ file data for all forms of cache access.
+
+Author:
+
+ Tom Miller [TomM] 8-Feb-1992
+
+Revision History:
+
+--*/
+
+#include "cc.h"
+
+//
+// Define our debug constant
+//
+
+#define me 0x000000040
+
+//
+// Define a few macros for manipulating the Vacb array.
+//
+
+#define GetVacb(SCM,OFF) ( \
+ ((OFF).HighPart != 0) ? \
+ (SCM)->Vacbs[(ULONG)((ULONGLONG)((OFF).QuadPart) >> VACB_OFFSET_SHIFT)] : \
+ (SCM)->Vacbs[(OFF).LowPart >> VACB_OFFSET_SHIFT] \
+)
+
+#define SetVacb(SCM,OFF,VACB) { \
+ ASSERT((OFF).HighPart < VACB_MAPPING_GRANULARITY); \
+ if ((OFF).HighPart != 0) { \
+ (SCM)->Vacbs[(ULONG)((ULONGLONG)((OFF).QuadPart) >> VACB_OFFSET_SHIFT)] = (VACB); \
+ } else {(SCM)->Vacbs[(OFF).LowPart >> VACB_OFFSET_SHIFT] = (VACB);} \
+}
+
+#define SizeOfVacbArray(LSZ) ( \
+ ((LSZ).HighPart != 0) ? \
+ ((ULONG)((ULONGLONG)((LSZ).QuadPart) >> VACB_OFFSET_SHIFT) * sizeof(PVACB)) : \
+ (LSZ).LowPart > (PREALLOCATED_VACBS * VACB_MAPPING_GRANULARITY) ? \
+ (((LSZ).LowPart >> VACB_OFFSET_SHIFT) * sizeof(PVACB)) : \
+ (PREALLOCATED_VACBS * sizeof(PVACB)) \
+)
+
+#define CheckedDec(N) { \
+ ASSERT((N) != 0); \
+ (N) -= 1; \
+}
+
+//
+// Internal Support Routines.
+//
+
+VOID
+CcUnmapVacb (
+ IN PVACB Vacb,
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ );
+
+PVACB
+CcGetVacbMiss (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER FileOffset,
+ IN OUT PKIRQL OldIrql
+ );
+
+#ifdef ALLOC_PRAGMA
+#pragma alloc_text(INIT, CcInitializeVacbs)
+#endif
+
+
+VOID
+CcInitializeVacbs(
+)
+
+/*++
+
+Routine Description:
+
+ This routine must be called during Cache Manager initialization to
+ initialize the Virtual Address Control Block structures.
+
+Arguments:
+
+ None.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ ULONG VacbBytes;
+
+ CcNumberVacbs = (MmSizeOfSystemCacheInPages >> (VACB_OFFSET_SHIFT - PAGE_SHIFT)) - 2;
+ VacbBytes = CcNumberVacbs * sizeof(VACB);
+
+ KeInitializeSpinLock( &CcVacbSpinLock );
+ CcNextVictimVacb =
+ CcVacbs = (PVACB)FsRtlAllocatePool( NonPagedPool, VacbBytes );
+ CcBeyondVacbs = (PVACB)((PCHAR)CcVacbs + VacbBytes);
+ RtlZeroMemory( CcVacbs, VacbBytes );
+}
+
+
+PVOID
+CcGetVirtualAddressIfMapped (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LONGLONG FileOffset,
+ OUT PVACB *Vacb,
+ OUT PULONG ReceivedLength
+ )
+
+/*++
+
+Routine Description:
+
+ This routine returns a virtual address for the specified FileOffset,
+ iff it is mapped. Otherwise, it informs the caller that the specified
+ virtual address was not mapped. In the latter case, it still returns
+ a ReceivedLength, which may be used to advance to the next view boundary.
+
+Arguments:
+
+ SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file.
+
+ FileOffset - Supplies the desired FileOffset within the file.
+
+ Vach - Returns a Vacb pointer which must be supplied later to free
+ this virtual address, or NULL if not mapped.
+
+ ReceivedLength - Returns the number of bytes to the next view boundary,
+ whether the desired file offset is mapped or not.
+
+Return Value:
+
+ The virtual address at which the desired data is mapped, or NULL if it
+ is not mapped.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ ULONG VacbOffset = (ULONG)FileOffset & (VACB_MAPPING_GRANULARITY - 1);
+ PVOID Value = NULL;
+
+ ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
+
+ //
+ // Generate ReceivedLength return right away.
+ //
+
+ *ReceivedLength = VACB_MAPPING_GRANULARITY - VacbOffset;
+
+ //
+ // Acquire the Vacb lock to see if the desired offset is already mapped.
+ //
+
+ ExAcquireFastLock( &CcVacbSpinLock, &OldIrql );
+
+ ASSERT( FileOffset <= SharedCacheMap->SectionSize.QuadPart );
+
+ if ((*Vacb = GetVacb( SharedCacheMap, *(PLARGE_INTEGER)&FileOffset )) != NULL) {
+
+ if ((*Vacb)->Overlay.ActiveCount == 0) {
+ SharedCacheMap->VacbActiveCount += 1;
+ }
+
+ (*Vacb)->Overlay.ActiveCount += 1;
+
+
+ Value = (PVOID)((PCHAR)(*Vacb)->BaseAddress + VacbOffset);
+ }
+
+ ExReleaseFastLock( &CcVacbSpinLock, OldIrql );
+ return Value;
+}
+
+
+PVOID
+CcGetVirtualAddress (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER FileOffset,
+ OUT PVACB *Vacb,
+ OUT PULONG ReceivedLength
+ )
+
+/*++
+
+Routine Description:
+
+ This is the main routine for Vacb management. It may be called to acquire
+ a virtual address for a given file offset. If the desired file offset is
+ already mapped, this routine does very little work before returning with
+ the desired virtual address and Vacb pointer (which must be supplied to
+ free the mapping).
+
+ If the desired virtual address is not currently mapped, then this routine
+ claims a Vacb from the tail of the Vacb LRU to reuse its mapping. This Vacb
+ is then unmapped if necessary (normally not required), and mapped to the
+ desired address.
+
+Arguments:
+
+ SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file.
+
+ FileOffset - Supplies the desired FileOffset within the file.
+
+ Vacb - Returns a Vacb pointer which must be supplied later to free
+ this virtual address.
+
+ ReceivedLength - Returns the number of bytes which are contiguously
+ mapped starting at the virtual address returned.
+
+Return Value:
+
+ The virtual address at which the desired data is mapped.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PVACB TempVacb;
+ ULONG VacbOffset = FileOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1);
+
+ ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
+
+ //
+ // Acquire the Vacb lock to see if the desired offset is already mapped.
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+
+ ASSERT( FileOffset.QuadPart <= SharedCacheMap->SectionSize.QuadPart );
+
+ if ((TempVacb = GetVacb( SharedCacheMap, FileOffset )) == NULL) {
+
+ TempVacb = CcGetVacbMiss( SharedCacheMap, FileOffset, &OldIrql );
+
+ } else {
+
+ if (TempVacb->Overlay.ActiveCount == 0) {
+ SharedCacheMap->VacbActiveCount += 1;
+ }
+
+ TempVacb->Overlay.ActiveCount += 1;
+ }
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+
+ //
+ // Now form all outputs.
+ //
+
+ *Vacb = TempVacb;
+ *ReceivedLength = VACB_MAPPING_GRANULARITY - VacbOffset;
+
+ ASSERT(KeGetCurrentIrql() < DISPATCH_LEVEL);
+
+ return (PVOID)((PCHAR)TempVacb->BaseAddress + VacbOffset);
+}
+
+
+PVACB
+CcGetVacbMiss (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER FileOffset,
+ IN OUT PKIRQL OldIrql
+ )
+
+/*++
+
+Routine Description:
+
+ This is the main routine for Vacb management. It may be called to acquire
+ a virtual address for a given file offset. If the desired file offset is
+ already mapped, this routine does very little work before returning with
+ the desired virtual address and Vacb pointer (which must be supplied to
+ free the mapping).
+
+ If the desired virtual address is not currently mapped, then this routine
+ claims a Vacb from the tail of the Vacb LRU to reuse its mapping. This Vacb
+ is then unmapped if necessary (normally not required), and mapped to the
+ desired address.
+
+Arguments:
+
+ SharedCacheMap - Supplies a pointer to the Shared Cache Map for the file.
+
+ FileOffset - Supplies the desired FileOffset within the file.
+
+ OldIrql - Pointer to the OldIrql variable in the caller
+
+Return Value:
+
+ The Vacb.
+
+--*/
+
+{
+ PSHARED_CACHE_MAP OldSharedCacheMap;
+ PVACB Vacb, TempVacb;
+ LARGE_INTEGER MappedLength;
+ LARGE_INTEGER NormalOffset;
+ NTSTATUS Status;
+ ULONG ActivePage;
+ ULONG PageIsDirty;
+ PVACB ActiveVacb = NULL;
+ BOOLEAN MasterAcquired = FALSE;
+ ULONG VacbOffset = FileOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1);
+
+ NormalOffset = FileOffset;
+ NormalOffset.LowPart -= VacbOffset;
+
+ //
+ // For Sequential only files, we periodically unmap unused views
+ // behind us as we go, to keep from hogging memory.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN) &&
+ ((NormalOffset.LowPart & (SEQUENTIAL_ONLY_MAP_LIMIT - 1)) == 0) &&
+ (NormalOffset.QuadPart >= (SEQUENTIAL_ONLY_MAP_LIMIT * 2))) {
+
+ //
+ // Use MappedLength as a scratch variable to form the offset
+ // to start unmapping. We are not synchronized with these past
+ // views, so it is possible that CcUnmapVacbArray will kick out
+ // early when it sees an active view. That is why we go back
+ // twice the distance, and effectively try to unmap everything
+ // twice. The second time should normally do it. If the file
+ // is truly sequential only, then the only collision expected
+ // might be the previous view if we are being called from readahead,
+ // or there is a small chance that we can collide with the
+ // Lazy Writer during the small window where he briefly maps
+ // the file to push out the dirty bits.
+ //
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+ MappedLength.QuadPart = NormalOffset.QuadPart - (SEQUENTIAL_ONLY_MAP_LIMIT * 2);
+ CcUnmapVacbArray( SharedCacheMap, &MappedLength, (SEQUENTIAL_ONLY_MAP_LIMIT * 2) );
+ ExAcquireSpinLock( &CcVacbSpinLock, OldIrql );
+ }
+
+ //
+ // Scan from the next victim for a free Vacb
+ //
+
+ Vacb = CcNextVictimVacb;
+
+ while (TRUE) {
+
+ //
+ // Handle the wrap case
+ //
+
+ if (Vacb == CcBeyondVacbs) {
+ Vacb = CcVacbs;
+ }
+
+ //
+ // If this guy is not active, break out and use him. Also, if
+ // it is an Active Vacb, nuke it now, because the reader may be idle and we
+ // want to clean up.
+ //
+
+ OldSharedCacheMap = Vacb->SharedCacheMap;
+ if ((Vacb->Overlay.ActiveCount == 0) ||
+ ((ActiveVacb == NULL) &&
+ (OldSharedCacheMap != NULL) &&
+ (OldSharedCacheMap->ActiveVacb == Vacb))) {
+
+ //
+ // The normal case is that the Vacb is no longer mapped
+ // and we can just get out and use it.
+ //
+
+ if (Vacb->BaseAddress == NULL) {
+ break;
+ }
+
+ //
+ // Else the Vacb is mapped. If we haven't done so
+ // already, we have to bias the open count so the
+ // SharedCacheMap (and its section reference) do not
+ // get away before we complete the unmap. Unfortunately
+ // we have to free the Vacb lock first to obey our locking
+ // order.
+ //
+
+ if (!MasterAcquired) {
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+ ExAcquireSpinLock( &CcMasterSpinLock, OldIrql );
+ ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock );
+ MasterAcquired = TRUE;
+
+ //
+ // Reset the next victim on this rare path to allow our guy
+ // to scan the entire list again. Since we terminate the scan
+ // when we see we have incremented into this guy, we have cannot
+ // leave it on the first Vacb! In this case we will terminate
+ // at CcBeyondVacbs. Third time should be the charm on this fix!
+ //
+
+ CcNextVictimVacb = Vacb;
+ if (CcNextVictimVacb == CcVacbs) {
+ CcNextVictimVacb = CcBeyondVacbs;
+ }
+ }
+
+ //
+ // If this Vacb went active while we had the spin lock
+ // dropped, then we have to start a new scan! At least
+ // now we have both locks so that this cannot happen again.
+ //
+
+ if (Vacb->Overlay.ActiveCount != 0) {
+
+ //
+ // Most likely we are here to free an Active Vacb from copy
+ // read. Rather than repeat all the tests from above, we will
+ // just try to get the active Vacb if we haven't already got
+ // one.
+ //
+
+ if ((ActiveVacb == NULL) && (Vacb->SharedCacheMap != NULL)) {
+
+ //
+ // Get the active Vacb.
+ //
+
+ GetActiveVacbAtDpcLevel( Vacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // Otherwise we will break out and use this Vacb. If it
+ // is still mapped we can now safely increment the open
+ // count.
+ //
+
+ } else {
+
+ if (Vacb->BaseAddress != NULL) {
+
+ //
+ // Note that if the SharedCacheMap is currently
+ // being deleted, we need to skip over
+ // it, otherwise we will become the second
+ // deleter. CcDeleteSharedCacheMap clears the
+ // pointer in the SectionObjectPointer.
+ //
+
+ if (Vacb->SharedCacheMap->FileObject->SectionObjectPointer->SharedCacheMap ==
+ Vacb->SharedCacheMap) {
+
+ Vacb->SharedCacheMap->OpenCount += 1;
+ break;
+ }
+
+ } else {
+
+ break;
+ }
+ }
+ }
+
+ //
+ // Advance to the next guy and see if we have scanned
+ // the entire list.
+ //
+
+ Vacb += 1;
+
+ if (Vacb == CcNextVictimVacb) {
+
+ //
+ // Release the spinlock(s) acquired above.
+ //
+
+ if (MasterAcquired) {
+
+ ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock );
+ ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql );
+
+ } else {
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+ }
+
+ //
+ // If we found an active vacb, then free it and go back and
+ // try again. Else it's time to bail.
+ //
+
+ if (ActiveVacb != NULL) {
+ CcFreeActiveVacb( ActiveVacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ ActiveVacb = NULL;
+
+ //
+ // Reacquire spinlocks to loop back
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, OldIrql );
+ ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock );
+ MasterAcquired = TRUE;
+
+ } else {
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+ }
+ }
+
+ CcNextVictimVacb = Vacb + 1;
+
+ //
+ // Unlink it from the other SharedCacheMap, so the other
+ // guy will not try to use it when we free the spin lock.
+ //
+
+ if (Vacb->SharedCacheMap != NULL) {
+
+ OldSharedCacheMap = Vacb->SharedCacheMap;
+ SetVacb( OldSharedCacheMap, Vacb->Overlay.FileOffset, NULL );
+ Vacb->SharedCacheMap = NULL;
+ }
+
+ //
+ // Mark it in use so no one else will muck with it after
+ // we release the spin lock.
+ //
+
+ Vacb->Overlay.ActiveCount = 1;
+ SharedCacheMap->VacbActiveCount += 1;
+
+ //
+ // Release the spinlock(s) acquired above.
+ //
+
+ if (MasterAcquired) {
+
+ ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock );
+ ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql );
+
+ } else {
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+ }
+
+ //
+ // If the Vacb is already mapped, then unmap it.
+ //
+
+ if (Vacb->BaseAddress != NULL) {
+
+ CcUnmapVacb( Vacb, OldSharedCacheMap );
+
+ //
+ // Now we can decrement the open count as we normally
+ // do, possibly deleting the guy.
+ //
+
+ ExAcquireSpinLock( &CcMasterSpinLock, OldIrql );
+
+ //
+ // Now release our open count.
+ //
+
+ OldSharedCacheMap->OpenCount -= 1;
+
+ if ((OldSharedCacheMap->OpenCount == 0) &&
+ !FlagOn(OldSharedCacheMap->Flags, WRITE_QUEUED) &&
+ (OldSharedCacheMap->DirtyPages == 0)) {
+
+ //
+ // Move to the dirty list.
+ //
+
+ RemoveEntryList( &OldSharedCacheMap->SharedCacheMapLinks );
+ InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks,
+ &OldSharedCacheMap->SharedCacheMapLinks );
+
+ //
+ // Make sure the Lazy Writer will wake up, because we
+ // want him to delete this SharedCacheMap.
+ //
+
+ LazyWriter.OtherWork = TRUE;
+ if (!LazyWriter.ScanActive) {
+ CcScheduleLazyWriteScan();
+ }
+ }
+
+ ExReleaseSpinLock( &CcMasterSpinLock, *OldIrql );
+ }
+
+ //
+ // Use try-finally to return this guy to the list if we get an
+ // exception.
+ //
+
+ try {
+
+ //
+ // Assume we are mapping to the end of the section, but
+ // reduce to our normal mapping granularity if the section
+ // is too large.
+ //
+
+ MappedLength.QuadPart = SharedCacheMap->SectionSize.QuadPart - NormalOffset.QuadPart;
+
+ if ((MappedLength.HighPart != 0) ||
+ (MappedLength.LowPart > VACB_MAPPING_GRANULARITY)) {
+
+ MappedLength.LowPart = VACB_MAPPING_GRANULARITY;
+ }
+
+ //
+ // Now map this one in the system cache.
+ //
+
+ DebugTrace( 0, mm, "MmMapViewInSystemCache:\n", 0 );
+ DebugTrace( 0, mm, " Section = %08lx\n", SharedCacheMap->Section );
+ DebugTrace2(0, mm, " Offset = %08lx, %08lx\n",
+ NormalOffset.LowPart,
+ NormalOffset.HighPart );
+ DebugTrace( 0, mm, " ViewSize = %08lx\n", MappedLength.LowPart );
+
+ Status =
+ MmMapViewInSystemCache( SharedCacheMap->Section,
+ &Vacb->BaseAddress,
+ &NormalOffset,
+ &MappedLength.LowPart );
+
+ DebugTrace( 0, mm, " <BaseAddress = %08lx\n", Vacb->BaseAddress );
+ DebugTrace( 0, mm, " <ViewSize = %08lx\n", MappedLength.LowPart );
+
+ if (!NT_SUCCESS( Status )) {
+
+ DebugTrace( 0, 0, "Error from Map, Status = %08lx\n", Status );
+
+ ExRaiseStatus( FsRtlNormalizeNtstatus( Status,
+ STATUS_UNEXPECTED_MM_MAP_ERROR ));
+ }
+
+ } finally {
+
+ //
+ // Take this opportunity to free the active vacb.
+ //
+
+ if (ActiveVacb != NULL) {
+
+ CcFreeActiveVacb( ActiveVacb->SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty );
+ }
+
+ //
+ // On abnormal termination, get this guy back in the list.
+ //
+
+ if (AbnormalTermination()) {
+
+ ExAcquireSpinLock( &CcVacbSpinLock, OldIrql );
+
+ //
+ // This is like the unlucky case below. Just back out the stuff
+ // we did and put the guy at the tail of the list. Basically
+ // only the Map should fail, and we clear BaseAddress accordingly.
+ //
+
+ Vacb->BaseAddress = NULL;
+
+ CheckedDec(Vacb->Overlay.ActiveCount);
+ CheckedDec(SharedCacheMap->VacbActiveCount);
+
+ //
+ // If there is someone waiting for this count to go to zero,
+ // wake them here.
+ //
+
+ if (SharedCacheMap->WaitOnActiveCount != NULL) {
+ KeSetEvent( SharedCacheMap->WaitOnActiveCount, 0, FALSE );
+ }
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+ }
+ }
+
+ //
+ // Finish filling in the Vacb, and store its address in the array in
+ // the Shared Cache Map. (We have to rewrite the ActiveCount
+ // since it is overlaid.) To do this we must racquire the
+ // spin lock one more time. Note we have to check for the unusual
+ // case that someone beat us to mapping this view, since we had to
+ // drop the spin lock.
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, OldIrql );
+
+ if ((TempVacb = GetVacb( SharedCacheMap, NormalOffset )) == NULL) {
+
+ Vacb->SharedCacheMap = SharedCacheMap;
+ Vacb->Overlay.FileOffset = NormalOffset;
+ Vacb->Overlay.ActiveCount = 1;
+
+ SetVacb( SharedCacheMap, NormalOffset, Vacb );
+
+ //
+ // This is the unlucky case where we collided with someone else
+ // trying to map the same view. He can get in because we dropped
+ // the spin lock above. Rather than allocating events and making
+ // someone wait, considering this case is fairly unlikely, we just
+ // dump this one at the tail of the list and use the one from the
+ // guy who beat us.
+ //
+
+ } else {
+
+ //
+ // Now we have to increment all of the counts for the one that
+ // was already there, then ditch the one we had.
+ //
+
+ if (TempVacb->Overlay.ActiveCount == 0) {
+ SharedCacheMap->VacbActiveCount += 1;
+ }
+
+ TempVacb->Overlay.ActiveCount += 1;
+
+ //
+ // Now unmap the one we mapped and proceed with the other Vacb.
+ // On this path we have to release the spinlock to do the unmap,
+ // and then reacquire the spinlock before cleaning up.
+ //
+
+ ExReleaseSpinLock( &CcVacbSpinLock, *OldIrql );
+
+ CcUnmapVacb( Vacb, SharedCacheMap );
+
+ ExAcquireSpinLock( &CcVacbSpinLock, OldIrql );
+ CheckedDec(Vacb->Overlay.ActiveCount);
+ CheckedDec(SharedCacheMap->VacbActiveCount);
+ Vacb->SharedCacheMap = NULL;
+
+ Vacb = TempVacb;
+ }
+
+ return Vacb;
+}
+
+
+VOID
+FASTCALL
+CcFreeVirtualAddress (
+ IN PVACB Vacb
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called once for each call to CcGetVirtualAddress,
+ to free that virtual address.
+
+Arguments:
+
+ Vacb - Supplies the Vacb which was returned from CcGetVirtualAddress.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PSHARED_CACHE_MAP SharedCacheMap = Vacb->SharedCacheMap;
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+
+ CheckedDec(Vacb->Overlay.ActiveCount);
+
+ //
+ // If the count goes to zero, then we want to decrement the global
+ // Active count, and the count in the Scb.
+ //
+
+ if (Vacb->Overlay.ActiveCount == 0) {
+
+ //
+ // If the SharedCacheMap address is not NULL, then this one is
+ // in use by a shared cache map, and we have to decrement his
+ // count and see if anyone is waiting.
+ //
+
+ if (SharedCacheMap != NULL) {
+
+ CheckedDec(SharedCacheMap->VacbActiveCount);
+
+ //
+ // If there is someone waiting for this count to go to zero,
+ // wake them here.
+ //
+
+ if (SharedCacheMap->WaitOnActiveCount != NULL) {
+ KeSetEvent( SharedCacheMap->WaitOnActiveCount, 0, FALSE );
+ }
+ }
+ }
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+}
+
+
+VOID
+CcWaitOnActiveCount (
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to wait for outstanding mappings for
+ a given SharedCacheMap to go inactive. It is intended to be called
+ from CcUninitializeCacheMap, which is called by the file systems
+ during cleanup processing. In that case this routine only has to
+ wait if the user closed a handle without waiting for all I/Os on the
+ handle to complete.
+
+ This routine returns each time the active count is decremented. The
+ caller must recheck his wait conditions on return, either waiting for
+ the ActiveCount to go to 0, or for specific views to go inactive
+ (CcPurgeCacheSection case).
+
+Arguments:
+
+ SharedCacheMap - Supplies the Shared Cache Map on whose VacbActiveCount
+ we wish to wait.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PKEVENT Event;
+
+ //
+ // In the unusual case that we get a cleanup while I/O is still going
+ // on, we can wait here. The caller must test the count for nonzero
+ // before calling this routine.
+ //
+ // Since we are being called from cleanup, we cannot afford to
+ // fail here.
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+
+ //
+ // It is possible that the count went to zero before we acquired the
+ // spinlock, so we must handle two cases here.
+ //
+
+ if (SharedCacheMap->VacbActiveCount != 0) {
+
+ if ((Event = SharedCacheMap->WaitOnActiveCount) == NULL) {
+
+ //
+ // If the local even is not being used as a create event,
+ // then we can use it.
+ //
+
+ if (SharedCacheMap->CreateEvent == NULL) {
+
+ Event = &SharedCacheMap->Event;
+
+ } else {
+
+ Event = (PKEVENT)ExAllocatePool( NonPagedPoolMustSucceed,
+ sizeof(KEVENT) );
+ }
+ }
+
+ KeInitializeEvent( Event,
+ NotificationEvent,
+ FALSE );
+
+ SharedCacheMap->WaitOnActiveCount = Event;
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+
+ KeWaitForSingleObject( Event,
+ Executive,
+ KernelMode,
+ FALSE,
+ (PLARGE_INTEGER)NULL);
+ } else {
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+ }
+}
+
+
+//
+// Internal Support Routine.
+//
+
+VOID
+CcUnmapVacb (
+ IN PVACB Vacb,
+ IN PSHARED_CACHE_MAP SharedCacheMap
+ )
+
+/*++
+
+Routine Description:
+
+ This routine may be called to unmap a previously mapped Vacb, and
+ clear its BaseAddress field.
+
+Arguments:
+
+ Vacb - Supplies the Vacb which was returned from CcGetVirtualAddress.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ //
+ // Make sure it is mapped.
+ //
+
+ ASSERT(SharedCacheMap != NULL);
+ ASSERT(Vacb->BaseAddress != NULL);
+
+ //
+ // Call MM to unmap it.
+ //
+
+ DebugTrace( 0, mm, "MmUnmapViewInSystemCache:\n", 0 );
+ DebugTrace( 0, mm, " BaseAddress = %08lx\n", Vacb->BaseAddress );
+
+ MmUnmapViewInSystemCache( Vacb->BaseAddress,
+ SharedCacheMap->Section,
+ FlagOn(SharedCacheMap->Flags, ONLY_SEQUENTIAL_ONLY_SEEN) );
+
+ Vacb->BaseAddress = NULL;
+}
+
+
+VOID
+FASTCALL
+CcCreateVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER NewSectionSize
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called when a SharedCacheMap is created to create
+ and initialize the initial Vacb array.
+
+Arguments:
+
+ SharedCacheMap - Supplies the shared cache map for which the array is
+ to be created.
+
+ NewSectionSize - Supplies the current size of the section which must be
+ covered by the Vacb array.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ PVACB *NewAddresses;
+ ULONG NewSize, SizeToAllocate;
+ PLIST_ENTRY BcbListHead;
+
+ NewSize = SizeToAllocate = SizeOfVacbArray(NewSectionSize);
+
+ //
+ // The following limit is greater than the MM limit
+ // (i.e., MM actually only supports even smaller sections).
+ // This limit is required here in order to get the correct
+ // answer from SizeOfVacbArray.
+ //
+
+ if (NewSectionSize.HighPart & 0xFFFFC000) {
+ ExRaiseStatus(STATUS_SECTION_TOO_BIG);
+ }
+
+ //
+ // See if we can use the array inside the shared cache map.
+ //
+
+ if (NewSize == (PREALLOCATED_VACBS * sizeof(PVACB))) {
+
+ NewAddresses = &SharedCacheMap->InitialVacbs[0];
+
+ //
+ // Else allocate the array.
+ //
+
+ } else {
+
+ //
+ // For large metadata streams, double the size to allocate
+ // an array of Bcb listheads. Each two Vacb pointers also
+ // gets its own Bcb listhead, thus requiring double the size.
+ //
+
+ ASSERT(SIZE_PER_BCB_LIST == (VACB_MAPPING_GRANULARITY * 2));
+
+ //
+ // Does this stream get a Bcb Listhead array?
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
+ (NewSectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY)) {
+
+ SizeToAllocate *= 2;
+ }
+
+ NewAddresses = ExAllocatePool( NonPagedPool, SizeToAllocate );
+ if (NewAddresses == NULL) {
+ SharedCacheMap->Status = STATUS_INSUFFICIENT_RESOURCES;
+ ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES );
+ }
+ }
+
+ RtlZeroMemory( NewAddresses, NewSize );
+
+ //
+ // Loop to insert the Bcb listheads (if any) in the *descending* order
+ // Bcb list.
+ //
+
+ if (SizeToAllocate != NewSize) {
+
+ for (BcbListHead = (PLIST_ENTRY)((PCHAR)NewAddresses + NewSize);
+ BcbListHead < (PLIST_ENTRY)((PCHAR)NewAddresses + SizeToAllocate);
+ BcbListHead++) {
+
+ InsertHeadList( &SharedCacheMap->BcbList, BcbListHead );
+ }
+ }
+
+ SharedCacheMap->Vacbs = NewAddresses;
+ SharedCacheMap->SectionSize = NewSectionSize;
+}
+
+
+VOID
+CcExtendVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN LARGE_INTEGER NewSectionSize
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called any time the section for a shared cache
+ map is extended, in order to extend the Vacb array (if necessary).
+
+Arguments:
+
+ SharedCacheMap - Supplies the shared cache map for which the array is
+ to be created.
+
+ NewSectionSize - Supplies the new size of the section which must be
+ covered by the Vacb array.
+
+Return Value:
+
+ None.
+
+--*/
+
+{
+ KIRQL OldIrql;
+ PVACB *OldAddresses;
+ PVACB *NewAddresses;
+ ULONG OldSize;
+ ULONG NewSize, SizeToAllocate;
+ ULONG GrowingBcbListHeads = FALSE;
+
+ //
+ // The following limit is greater than the MM limit
+ // (i.e., MM actually only supports even smaller sections).
+ // This limit is required here in order to get the correct
+ // answer from SizeOfVacbArray.
+ //
+
+ if (NewSectionSize.HighPart & 0xFFFFC000) {
+ ExRaiseStatus(STATUS_SECTION_TOO_BIG);
+ }
+
+ //
+ // See if we will be growing the Bcb ListHeads, and take out the
+ // master lock if so.
+ //
+
+ if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) &&
+ (NewSectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY)) {
+
+ GrowingBcbListHeads = TRUE;
+ ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql );
+ ExAcquireSpinLockAtDpcLevel( &CcVacbSpinLock );
+
+ } else {
+
+ //
+ // Acquire the spin lock to serialize with anyone who might like
+ // to "steal" one of the mappings we are going to move.
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+ }
+
+ //
+ // It's all a noop if the new size is not larger...
+ //
+
+ if (NewSectionSize.QuadPart > SharedCacheMap->SectionSize.QuadPart) {
+
+ NewSize = SizeToAllocate = SizeOfVacbArray(NewSectionSize);
+ OldSize = SizeOfVacbArray(SharedCacheMap->SectionSize);
+
+ //
+ // Only do something if the size is growing.
+ //
+
+ if (NewSize > OldSize) {
+
+ //
+ // Does this stream get a Bcb Listhead array?
+ //
+
+ if (GrowingBcbListHeads) {
+ SizeToAllocate *= 2;
+ }
+
+ NewAddresses = ExAllocatePool( NonPagedPool, SizeToAllocate );
+
+ if (NewAddresses == NULL) {
+ if (GrowingBcbListHeads) {
+ ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock );
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ } else {
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+ }
+ ExRaiseStatus(STATUS_INSUFFICIENT_RESOURCES);
+ }
+
+ OldAddresses = SharedCacheMap->Vacbs;
+ if (OldAddresses != NULL) {
+ RtlCopyMemory( NewAddresses, OldAddresses, OldSize );
+ } else {
+ OldSize = 0;
+ }
+
+ RtlZeroMemory( (PCHAR)NewAddresses + OldSize, NewSize - OldSize );
+
+ //
+ // See if we have to initialize Bcb Listheads.
+ //
+
+ if (SizeToAllocate != NewSize) {
+
+ LARGE_INTEGER Offset;
+ PLIST_ENTRY BcbListHeadNew, TempEntry;
+
+ Offset.QuadPart = 0;
+ BcbListHeadNew = (PLIST_ENTRY)((PCHAR)NewAddresses + NewSize);
+
+ //
+ // Handle case where the old array had Bcb Listheads.
+ //
+
+ if ((SharedCacheMap->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) &&
+ (OldAddresses != NULL)) {
+
+ PLIST_ENTRY BcbListHeadOld;
+
+ BcbListHeadOld = (PLIST_ENTRY)((PCHAR)OldAddresses + OldSize);
+
+ //
+ // Loop to remove each old listhead and insert the new one
+ // in its place.
+ //
+
+ do {
+ TempEntry = BcbListHeadOld->Flink;
+ RemoveEntryList( BcbListHeadOld );
+ InsertTailList( TempEntry, BcbListHeadNew );
+ Offset.QuadPart += SIZE_PER_BCB_LIST;
+ BcbListHeadOld += 1;
+ BcbListHeadNew += 1;
+ } while (Offset.QuadPart < SharedCacheMap->SectionSize.QuadPart);
+
+ //
+ // Otherwise, handle the case where we are adding Bcb
+ // Listheads.
+ //
+
+ } else {
+
+ TempEntry = SharedCacheMap->BcbList.Blink;
+
+ //
+ // Loop through any/all Bcbs to insert the new listheads.
+ //
+
+ while (TempEntry != &SharedCacheMap->BcbList) {
+
+ //
+ // Sit on this Bcb until we have inserted all listheads
+ // that go before it.
+ //
+
+ while (Offset.QuadPart <= ((PBCB)CONTAINING_RECORD(TempEntry, BCB, BcbLinks))->FileOffset.QuadPart) {
+
+ InsertHeadList(TempEntry, BcbListHeadNew);
+ Offset.QuadPart += SIZE_PER_BCB_LIST;
+ BcbListHeadNew += 1;
+ }
+ TempEntry = TempEntry->Blink;
+ }
+ }
+
+ //
+ // Now insert the rest of the new listhead entries that were
+ // not finished in either loop above.
+ //
+
+ while (Offset.QuadPart < NewSectionSize.QuadPart) {
+
+ InsertHeadList(&SharedCacheMap->BcbList, BcbListHeadNew);
+ Offset.QuadPart += SIZE_PER_BCB_LIST;
+ BcbListHeadNew += 1;
+ }
+ }
+
+ SharedCacheMap->Vacbs = NewAddresses;
+
+ if ((OldAddresses != &SharedCacheMap->InitialVacbs[0]) &&
+ (OldAddresses != NULL)) {
+ ExFreePool( OldAddresses );
+ }
+ }
+
+ SharedCacheMap->SectionSize = NewSectionSize;
+ }
+
+ if (GrowingBcbListHeads) {
+ ExReleaseSpinLockFromDpcLevel( &CcVacbSpinLock );
+ ExReleaseSpinLock( &CcMasterSpinLock, OldIrql );
+ } else {
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+ }
+}
+
+
+BOOLEAN
+FASTCALL
+CcUnmapVacbArray (
+ IN PSHARED_CACHE_MAP SharedCacheMap,
+ IN PLARGE_INTEGER FileOffset OPTIONAL,
+ IN ULONG Length
+ )
+
+/*++
+
+Routine Description:
+
+ This routine must be called to do any unmapping and associated
+ cleanup for a shared cache map, just before it is deleted.
+
+Arguments:
+
+ SharedCacheMap - Supplies a pointer to the shared cache map
+ which is about to be deleted.
+
+ FileOffset - If supplied, only unmap the specified offset and length
+
+ Length - Completes range to unmap if FileOffset specified. If FileOffset
+ is specified, Length of 0 means unmap to the end of the section.
+
+Return Value:
+
+ FALSE -- if an the unmap was not done due to an active vacb
+ TRUE -- if the unmap was done
+
+--*/
+
+{
+ PVACB Vacb;
+ KIRQL OldIrql;
+ LARGE_INTEGER StartingFileOffset = {0,0};
+ LARGE_INTEGER EndingFileOffset = SharedCacheMap->SectionSize;
+
+ //
+ // We could be just cleaning up for error recovery.
+ //
+
+ if (SharedCacheMap->Vacbs == NULL) {
+ return TRUE;
+ }
+
+ //
+ // See if a range was specified.
+ //
+
+ if (ARGUMENT_PRESENT(FileOffset)) {
+ StartingFileOffset = *FileOffset;
+ if (Length != 0) {
+ EndingFileOffset.QuadPart = FileOffset->QuadPart + Length;
+ }
+ }
+
+ //
+ // Acquire the spin lock to
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+
+ while (StartingFileOffset.QuadPart < EndingFileOffset.QuadPart) {
+
+ //
+ // Note that the caller with an explicit range may be off the
+ // end of the section (example CcPurgeCacheSection for cache
+ // coherency). That is the reason for the first part of the
+ // test below.
+ //
+ // Check the next cell once without the spin lock, it probably will
+ // not change, but we will handle it if it does not.
+ //
+
+ if ((StartingFileOffset.QuadPart < SharedCacheMap->SectionSize.QuadPart) &&
+ ((Vacb = GetVacb( SharedCacheMap, StartingFileOffset )) != NULL)) {
+
+ //
+ // Return here if we are unlucky and see an active
+ // Vacb. It could be Purge calling, and the Lazy Writer
+ // may have done a CcGetVirtualAddressIfMapped!
+ //
+
+ if (Vacb->Overlay.ActiveCount != 0) {
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+ return FALSE;
+ }
+
+ //
+ // Unlink it from the other SharedCacheMap, so the other
+ // guy will not try to use it when we free the spin lock.
+ //
+
+ SetVacb( SharedCacheMap, StartingFileOffset, NULL );
+ Vacb->SharedCacheMap = NULL;
+
+ //
+ // Increment the open count so that no one else will
+ // try to unmap or reuse until we are done.
+ //
+
+ Vacb->Overlay.ActiveCount += 1;
+
+ //
+ // Release the spin lock.
+ //
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+
+ //
+ // Unmap and free it if we really got it above.
+ //
+
+ CcUnmapVacb( Vacb, SharedCacheMap );
+
+ //
+ // Reacquire the spin lock so that we can decrment the count.
+ //
+
+ ExAcquireSpinLock( &CcVacbSpinLock, &OldIrql );
+ Vacb->Overlay.ActiveCount -= 1;
+ }
+
+ StartingFileOffset.QuadPart = StartingFileOffset.QuadPart + VACB_MAPPING_GRANULARITY;
+ }
+
+ ExReleaseSpinLock( &CcVacbSpinLock, OldIrql );
+
+ return TRUE;
+}
+
+