/*++ Copyright (c) 1990 Microsoft Corporation Module Name: cachesub.c Abstract: This module implements the common subroutines for the Cache subsystem. Author: Tom Miller [TomM] 4-May-1990 Revision History: --*/ #include "cc.h" extern POBJECT_TYPE IoFileObjectType; // // The Bug check file id for this module // #define BugCheckFileId (CACHE_BUG_CHECK_CACHESUB) // // Define our debug constant // #define me 0x00000002 // // Define those errors which should be retried // #define RetryError(STS) (((STS) == STATUS_VERIFY_REQUIRED) || ((STS) == STATUS_FILE_LOCK_CONFLICT)) ULONG CcMaxDirtyWrite = 0x10000; // // Local support routines // BOOLEAN CcFindBcb ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN OUT PLARGE_INTEGER BeyondLastByte, OUT PBCB *Bcb ); PBCB CcAllocateInitializeBcb ( IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL, IN OUT PBCB AfterBcb, IN PLARGE_INTEGER FileOffset, IN PLARGE_INTEGER Length ); NTSTATUS CcSetValidData( IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER ValidDataLength ); BOOLEAN CcAcquireByteRangeForWrite ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER TargetOffset OPTIONAL, IN ULONG TargetLength, OUT PLARGE_INTEGER FileOffset, OUT PULONG Length, OUT PBCB *FirstBcb ); VOID CcReleaseByteRangeFromWrite ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN ULONG Length, IN PBCB FirstBcb, IN BOOLEAN VerifyRequired ); // // Internal support routine // BOOLEAN CcPinFileData ( IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER FileOffset, IN ULONG Length, IN BOOLEAN ReadOnly, IN BOOLEAN WriteOnly, IN BOOLEAN Wait, OUT PBCB *Bcb, OUT PVOID *BaseAddress, OUT PLARGE_INTEGER BeyondLastByte ) /*++ Routine Description: This routine locks the specified range of file data into memory. Note that the data desired by the caller (or the first part of it) may be in one of three states: No Bcb exists which describes the data A Bcb exists describing the data, but it is not mapped (BcbOut->BaseAddress == NULL) A Bcb exists describing the data, and it is mapped Given the above three states, and given that the caller may call with either Wait == FALSE or Wait == TRUE, this routine has basically six cases. What has to be done, and the order in which things must be done varies quite a bit with each of these six cases. The most straight-forward implementation of this routine, with the least amount of branching, is achieved by determining which of the six cases applies, and dispatching fairly directly to that case. The handling of the cases is summarized in the following table: Wait == TRUE Wait == FALSE ------------ ------------- no Bcb Case 1: Case 2: CcAllocateInitializeBcb CcMapAndRead (exit if FALSE) Acquire Bcb Exclusive CcAllocateInitializeBcb Release BcbList SpinLock Acquire Bcb Shared if not ReadOnly CcMapAndRead w/ Wait Release BcbList SpinLock Convert/Release Bcb Resource Bcb not Case 3: Case 4: mapped Increment PinCount Acquire Bcb Exclusive (exit if FALSE) Release BcbList SpinLock CcMapAndRead (exit if FALSE) Acquire Bcb Excl. w/ Wait Increment PinCount if still not mapped Convert/Release Bcb Resource CcMapAndRead w/ Wait Release BcbList SpinLock Convert/Release Bcb Resource Bcb mapped Case 5: Case 6: Increment PinCount if not ReadOnly Release BcbList SpinLock Acquire Bcb shared (exit if FALSE) if not ReadOnly Increment PinCount Acquire Bcb Shared Release BcbList SpinLock It is important to note that most changes to this routine will affect multiple cases from above. Arguments: FileObject - Pointer to File Object for file FileOffset - Offset in file at which map should begin Length - Length of desired map in bytes ReadOnly - Supplies TRUE if caller will only read the mapped data (i.e., TRUE for CcCopyRead, CcMapData and CcMdlRead and FALSE for everyone else) WriteOnly - The specified range of bytes will only be written. Wait - Supplies TRUE if it is ok to block the caller's thread Supplies 3 if it is ok to block the caller's thread and the Bcb should be exclusive Supplies FALSE if it is not ok to block the caller's thread Bcb - Returns a pointer to the Bcb representing the pinned data. BaseAddress - Returns base address of desired data BeyondLastByte - Returns the File Offset of the first byte beyond the last accessible byte. Return Value: FALSE - if Wait was supplied as TRUE, and it was impossible to lock all of the data without blocking TRUE - if the desired data, is being returned Raises: STATUS_INSUFFICIENT_RESOURCES - If a pool allocation failure occurs. This can only occur if Wait was specified as TRUE. (If Wait is specified as FALSE, and an allocation failure occurs, this routine simply returns FALSE.) --*/ { PSHARED_CACHE_MAP SharedCacheMap; LARGE_INTEGER TrialBound; KIRQL OldIrql; PBCB BcbOut = NULL; ULONG ZeroFlags = 0; BOOLEAN SpinLockAcquired = FALSE; BOOLEAN UnmapBcb = FALSE; BOOLEAN Result = FALSE; ULONG ActivePage; ULONG PageIsDirty; PVACB ActiveVacb = NULL; DebugTrace(+1, me, "CcPinFileData:\n", 0 ); DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " Length = %08lx\n", Length ); DebugTrace( 0, me, " Wait = %02lx\n", Wait ); // // Get pointer to SharedCacheMap via File Object. // SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + sizeof(PVOID)); // // See if we have an active Vacb, that we need to free. // GetActiveVacb( SharedCacheMap, OldIrql, ActiveVacb, ActivePage, PageIsDirty ); // // If there is an end of a page to be zeroed, then free that page now, // so it does not cause our data to get zeroed. If there is an active // page, free it so we have the correct ValidDataGoal. // if ((ActiveVacb != NULL) || (SharedCacheMap->NeedToZero != NULL)) { CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); } // // Make sure the calling file system is not asking to map beyond the // end of the section, for example, that it did not forget to do // CcExtendCacheSection. // ASSERT( ( FileOffset->QuadPart + (LONGLONG)Length ) <= SharedCacheMap->SectionSize.QuadPart ); // // Initially clear output // *Bcb = NULL; *BaseAddress = NULL; // // Acquire Bcb List Exclusive to look for Bcb // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); SpinLockAcquired = TRUE; // // Use try to guarantee cleanup on the way out. // try { BOOLEAN Found; LARGE_INTEGER FOffset; LARGE_INTEGER TLength; PVOID BAddress; PVACB Vacb; // // Search for Bcb describing the largest matching "prefix" byte range, // or where to insert it. // TrialBound.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; Found = CcFindBcb( SharedCacheMap, FileOffset, &TrialBound, &BcbOut ); // // Cases 1 and 2 - Bcb was not found. // // First caculate data to pin down. // if (!Found) { // // Not found, calculate data to pin down. // // Round local copy of FileOffset down to page boundary, and // round copies of size and minimum size up. Also make sure that // we keep the length from crossing the end of the SharedCacheMap. // FOffset = *FileOffset; TLength.QuadPart = TrialBound.QuadPart - FOffset.QuadPart; TLength.LowPart += FOffset.LowPart & (PAGE_SIZE - 1); // // At this point we can calculate the ReadOnly flag for // the purposes of whether to use the Bcb resource, and // we can calculate the ZeroFlags. // if ((!ReadOnly && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) || WriteOnly) { // // We can always zero middle pages, if any. // ZeroFlags = ZERO_MIDDLE_PAGES; if (((FOffset.LowPart & (PAGE_SIZE - 1)) == 0) && (Length >= PAGE_SIZE)) { ZeroFlags |= ZERO_FIRST_PAGE; } if ((TLength.LowPart & (PAGE_SIZE - 1)) == 0) { ZeroFlags |= ZERO_LAST_PAGE; } } // // We treat Bcbs as ReadOnly (do not acquire resource) if they // are in sections for which we have not disabled modified writing. // if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { ReadOnly = TRUE; } TLength.LowPart = ROUND_TO_PAGES( TLength.LowPart ); FOffset.LowPart &= ~(PAGE_SIZE - 1); // // Even if we are readonly, we can still zero pages entirely // beyond valid data length. // if (FOffset.QuadPart >= SharedCacheMap->ValidDataGoal.QuadPart) { ZeroFlags |= ZERO_FIRST_PAGE | ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; } else if ((FOffset.QuadPart + (LONGLONG)PAGE_SIZE) >= SharedCacheMap->ValidDataGoal.QuadPart) { ZeroFlags |= ZERO_MIDDLE_PAGES | ZERO_LAST_PAGE; } // // We will get into trouble if we try to read more than we // can map by one Vacb. So make sure that our lengths stay // within a Vacb. // if (TLength.LowPart > VACB_MAPPING_GRANULARITY) { TLength.LowPart = VACB_MAPPING_GRANULARITY; } if ((FOffset.LowPart & ~(VACB_MAPPING_GRANULARITY - 1)) != ((FOffset.LowPart + TLength.LowPart - 1) & ~(VACB_MAPPING_GRANULARITY - 1))) { TLength.LowPart = VACB_MAPPING_GRANULARITY - (FOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1)); } // // Case 1 - Bcb was not found and Wait is TRUE. // // Note that it is important to minimize the time that the Bcb // List spin lock is held, as well as guarantee we do not take // any faults while holding this lock. // // If we can (and perhaps will) wait, then it is important to // allocate the Bcb acquire it exclusive and free the Bcb List. // We then procede to read in the data, and anyone else finding // our Bcb will have to wait shared to insure that the data is // in. // if (Wait) { BcbOut = CcAllocateInitializeBcb ( SharedCacheMap, BcbOut, &FOffset, &TLength ); if (BcbOut == NULL) { DebugTrace( 0, 0, "Bcb allocation failure\n", 0 ); ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); } // // Now just acquire the newly-allocated Bcb shared, and // release the spin lock. // if (!ReadOnly) { if (Wait == 3) { (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); } else { (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); } } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; // // Now read in the data. // // We set UnmapBcb to be TRUE for the duration of this call, // so that if we get an exception, we will call CcUnpinFileData // and probably delete the Bcb. // UnmapBcb = TRUE; (VOID)CcMapAndRead( SharedCacheMap, &FOffset, TLength.LowPart, ZeroFlags, TRUE, &Vacb, &BAddress ); UnmapBcb = FALSE; // // Now we have to reacquire the Bcb List spinlock to load // up the mapping if we are the first one, else we collided // with someone else who loaded the mapping first, and we // will just free our mapping. It is guaranteed that the // data will be mapped to the same place. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (BcbOut->BaseAddress == NULL) { BcbOut->BaseAddress = BAddress; BcbOut->Vacb = Vacb; } else { CcFreeVirtualAddress( Vacb ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Calculate Base Address of the data we want. // *BaseAddress = (PCHAR)BcbOut->BaseAddress + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); // // Success! // try_return( Result = TRUE ); } // // Case 2 - Bcb was not found and Wait is FALSE // // If we cannot wait, then we go immediately see if the data is // there (CcMapAndRead), and then only set up the Bcb and release // the spin lock if the data is there. Note here we call // CcMapAndRead while holding the spin lock, because we know we // will not fault and not block before returning. // else { // // Now try to allocate and initialize the Bcb. If we // fail to allocate one, then return FALSE, since we know that // Wait = FALSE. The caller may get lucky if he calls // us back with Wait = TRUE. // BcbOut = CcAllocateInitializeBcb ( SharedCacheMap, BcbOut, &FOffset, &TLength ); if (BcbOut == NULL) { try_return( Result = FALSE ); } // // If we are not ReadOnly, we must acquire the newly-allocated // resource shared, and then we can free the spin lock. // if (!ReadOnly) { ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; // // Note that since this call has Wait = FALSE, it cannot // get an exception (see procedure header). // UnmapBcb = TRUE; if (!CcMapAndRead( SharedCacheMap, &FOffset, TLength.LowPart, ZeroFlags, FALSE, &Vacb, &BAddress )) { try_return( Result = FALSE ); } UnmapBcb = FALSE; // // Now we have to reacquire the Bcb List spinlock to load // up the mapping if we are the first one, else we collided // with someone else who loaded the mapping first, and we // will just free our mapping. It is guaranteed that the // data will be mapped to the same place. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (BcbOut->BaseAddress == NULL) { BcbOut->BaseAddress = BAddress; BcbOut->Vacb = Vacb; } else { CcFreeVirtualAddress( Vacb ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Calculate Base Address of the data we want. // *BaseAddress = (PCHAR)BcbOut->BaseAddress + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); // // Success! // try_return( Result = TRUE ); } } else { // // We treat Bcbs as ReadOnly (do not acquire resource) if they // are in sections for which we have not disabled modified writing. // if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { ReadOnly = TRUE; } } // // Cases 3 and 4 - Bcb is there but not mapped // if (BcbOut->BaseAddress == NULL) { // // It is too complicated to attempt to calculate any ZeroFlags in this // case, because we have to not only do the tests above, but also // compare to the byte range in the Bcb since we will be passing // those parameters to CcMapAndRead. Also, the probability of hitting // some window where zeroing is of any advantage is quite small. // // // Set up to just reread the Bcb exactly as the data in it is // described. // FOffset = BcbOut->FileOffset; TLength.QuadPart = (LONGLONG)BcbOut->ByteLength; // // Case 3 - Bcb is there but not mapped and Wait is TRUE // // Increment the PinCount, and then release the BcbList // SpinLock so that we can wait to acquire the Bcb exclusive. // Once we have the Bcb exclusive, map and read it in if no // one beats us to it. Someone may have beat us to it since // we had to release the SpinLock above. // if (Wait) { BcbOut->PinCount += 1; // // Now we have to release the BcbList SpinLock in order to // acquire the Bcb shared. // ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; if (!ReadOnly) { if (Wait == 3) { (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); } else { (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); } } // // Now procede to map and read the data in. // // Now read in the data. // // We set UnmapBcb to be TRUE for the duration of this call, // so that if we get an exception, we will call CcUnpinFileData // and probably delete the Bcb. // UnmapBcb = TRUE; (VOID)CcMapAndRead( SharedCacheMap, &FOffset, TLength.LowPart, ZeroFlags, TRUE, &Vacb, &BAddress ); UnmapBcb = FALSE; // // Now we have to reacquire the Bcb List spinlock to load // up the mapping if we are the first one, else we collided // with someone else who loaded the mapping first, and we // will just free our mapping. It is guaranteed that the // data will be mapped to the same place. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (BcbOut->BaseAddress == NULL) { BcbOut->BaseAddress = BAddress; BcbOut->Vacb = Vacb; } else { CcFreeVirtualAddress( Vacb ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // // Calculate Base Address of the data we want. // *BaseAddress = (PCHAR)BcbOut->BaseAddress + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); // // Success! // try_return( Result = TRUE ); } // // Case 4 - Bcb is there but not mapped, and Wait is FALSE // // Since we cannot wait, we go immediately see if the data is // there (CcMapAndRead), and then only set up the Bcb and release // the spin lock if the data is there. Note here we call // CcMapAndRead while holding the spin lock, because we know we // will not fault and not block before returning. // else { if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) { try_return( Result = FALSE ); } BcbOut->PinCount += 1; ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; // // Note that since this call has Wait = FALSE, it cannot // get an exception (see procedure header). // UnmapBcb = TRUE; if (!CcMapAndRead( SharedCacheMap, &BcbOut->FileOffset, BcbOut->ByteLength, ZeroFlags, FALSE, &Vacb, &BAddress )) { try_return( Result = FALSE ); } UnmapBcb = FALSE; // // Now we have to reacquire the Bcb List spinlock to load // up the mapping if we are the first one, else we collided // with someone else who loaded the mapping first, and we // will just free our mapping. It is guaranteed that the // data will be mapped to the same place. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (BcbOut->BaseAddress == NULL) { BcbOut->BaseAddress = BAddress; BcbOut->Vacb = Vacb; } else { CcFreeVirtualAddress( Vacb ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Calculate Base Address of the data we want. // *BaseAddress = (PCHAR)BcbOut->BaseAddress + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); // // Success! // try_return( Result = TRUE ); } } // // Cases 5 and 6 - Bcb is there and it is mapped // else { // // Case 5 - Bcb is there and mapped, and Wait is TRUE // // We can just increment the PinCount, release the SpinLock // and then acquire the Bcb Shared if we are not ReadOnly. // if (Wait) { BcbOut->PinCount += 1; ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; // // Acquire Bcb Resource shared to insure that it is in memory. // if (!ReadOnly) { if (Wait == 3) { (VOID)ExAcquireResourceExclusive( &BcbOut->Resource, TRUE ); } else { (VOID)ExAcquireSharedStarveExclusive( &BcbOut->Resource, TRUE ); } } } // // Case 6 - Bcb is there and mapped, and Wait is FALSE // // If we are not ReadOnly, we have to first see if we can // acquire the Bcb shared before incrmenting the PinCount, // since we will have to return FALSE if we cannot acquire the // resource. // else { // // Acquire Bcb Resource shared to insure that it is in memory. // if (!ReadOnly && !ExAcquireSharedStarveExclusive( &BcbOut->Resource, FALSE )) { try_return( Result = FALSE ); } BcbOut->PinCount += 1; ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); SpinLockAcquired = FALSE; } // // Calculate Base Address of the data we want. // *BaseAddress = (PCHAR)BcbOut->BaseAddress + (ULONG)( FileOffset->QuadPart - BcbOut->FileOffset.QuadPart ); // // Success! // try_return( Result = TRUE ); } try_exit: NOTHING; } finally { // // Release the spinlock if it is acquired. // if (SpinLockAcquired) { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } // // An abnormal termination can occur on an allocation failure, // or on a failure to map and read the buffer. The latter // operation is performed with UnmapBcb = TRUE, so that we // know to make the unmap call. // if (UnmapBcb) { CcUnpinFileData( BcbOut, ReadOnly, UNPIN ); BcbOut = NULL; } if (Result) { *Bcb = BcbOut; if (BcbOut != NULL) { *BeyondLastByte = BcbOut->BeyondLastByte; } else { *BeyondLastByte = *FileOffset; } } DebugTrace( 0, me, " %02lx\n", Result ); } return Result; } // // Internal Support Routine // VOID FASTCALL CcUnpinFileData ( IN OUT PBCB Bcb, IN BOOLEAN ReadOnly, IN UNMAP_ACTIONS UnmapAction ) /*++ Routine Description: This routine umaps and unlocks the specified buffer, which was previously locked and mapped by calling CcPinFileData. Arguments: Bcb - Pointer previously returned from CcPinFileData. As may be seen above, this pointer may be either a Bcb or a Vacb. ReadOnly - must specify same value as when data was mapped UnmapAction - UNPIN or SET_CLEAN Return Value: None --*/ { KIRQL OldIrql; PSHARED_CACHE_MAP SharedCacheMap; DebugTrace(+1, me, "CcUnpinFileData >Bcb = %08lx\n", Bcb ); // // Note, since we have to allocate so many Vacbs, we do not use // a node type code. However, the Vacb starts with a BaseAddress, // so we assume that the low byte of the Bcb node type code has // some bits set, which a page-aligned Base Address cannot. // ASSERT( (CACHE_NTC_BCB & 0xFF) != 0 ); if (Bcb->NodeTypeCode != CACHE_NTC_BCB) { ASSERT(((PVACB)Bcb)->SharedCacheMap->NodeTypeCode == CACHE_NTC_SHARED_CACHE_MAP); CcFreeVirtualAddress( (PVACB)Bcb ); DebugTrace(-1, me, "CcUnpinFileData -> VOID (simple release)\n", 0 ); return; } SharedCacheMap = Bcb->SharedCacheMap; // // We treat Bcbs as ReadOnly (do not acquire resource) if they // are in sections for which we have not disabled modified writing. // if (!FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { ReadOnly = TRUE; } // // Synchronize // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); switch (UnmapAction) { case UNPIN: ASSERT( Bcb->PinCount > 0 ); Bcb->PinCount -= 1; break; case SET_CLEAN: if (Bcb->Dirty) { ULONG Pages = Bcb->ByteLength >> PAGE_SHIFT; // // Reverse the rest of the actions taken when the Bcb was set dirty. // Bcb->Dirty = FALSE; SharedCacheMap->DirtyPages -= Pages; CcTotalDirtyPages -= Pages; // // Normally we need to reduce CcPagesYetToWrite appropriately. // if (CcPagesYetToWrite > Pages) { CcPagesYetToWrite -= Pages; } else { CcPagesYetToWrite = 0; } // // Remove SharedCacheMap from dirty list if nothing more dirty, // and someone still has the cache map opened. // if ((SharedCacheMap->DirtyPages == 0) && (SharedCacheMap->OpenCount != 0)) { RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcCleanSharedCacheMapList, &SharedCacheMap->SharedCacheMapLinks ); } } break; default: CcBugCheck( UnmapAction, 0, 0 ); } // // If we brought it to 0, then we have to kill it. // if (Bcb->PinCount == 0) { // // If the Bcb is Dirty, we only release the resource and unmap now. // if (Bcb->Dirty) { if (Bcb->BaseAddress != NULL) { // // Capture CcFreeVirtualAddress parameters to locals so that we can // reset Bcb->BaseAddress and release the spinlock before // unmapping. // PVOID BaseAddress = Bcb->BaseAddress; ULONG ByteLength = Bcb->ByteLength; PVACB Vacb = Bcb->Vacb; Bcb->BaseAddress = NULL; Bcb->Vacb = NULL; if (!ReadOnly) { ExReleaseResource( &Bcb->Resource ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); CcFreeVirtualAddress( Vacb ); } else { if (!ReadOnly) { ExReleaseResource( &Bcb->Resource ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } } // // Otherwise, we also delete the Bcb. // else { RemoveEntryList( &Bcb->BcbLinks ); if (Bcb->BaseAddress != NULL) { CcFreeVirtualAddress( Bcb->Vacb ); } // // Debug routines used to remove Bcbs from the global list // #if LIST_DBG ExAcquireSpinLockAtDpcLevel( &CcBcbSpinLock ); if (Bcb->CcBcbLinks.Flink != NULL) { RemoveEntryList( &Bcb->CcBcbLinks ); CcBcbCount -= 1; } ExReleaseSpinLockFromDpcLevel( &CcBcbSpinLock ); #endif #if DBG if (!ReadOnly) { ExReleaseResource( &Bcb->Resource ); } // // ASSERT that the resource is unowned. // ASSERT( Bcb->Resource.ActiveCount == 0 ); #endif CcDeallocateBcb( Bcb ); ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } } // // Else we just have to release our Shared access, if we are not // readonly. We don't need to do this above, since we deallocate // the entire Bcb there. // else { if (!ReadOnly) { ExReleaseResource( &Bcb->Resource ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } DebugTrace(-1, me, "CcUnpinFileData -> VOID\n", 0 ); return; } VOID CcSetReadAheadGranularity ( IN PFILE_OBJECT FileObject, IN ULONG Granularity ) /*++ Routine Description: This routine may be called to set the read ahead granularity used by the Cache Manager. The default is PAGE_SIZE. The number is decremented and stored as a mask. Arguments: FileObject - File Object for which granularity shall be set Granularity - new granularity, which must be an even power of 2 and >= PAGE_SIZE Return Value: None --*/ { ((PPRIVATE_CACHE_MAP)FileObject->PrivateCacheMap)->ReadAheadMask = Granularity - 1; } VOID CcScheduleReadAhead ( IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER FileOffset, IN ULONG Length ) /*++ Routine Description: This routine is called by Copy Read and Mdl Read file system routines to perform common Read Ahead processing. The input parameters describe the current read which has just been completed, or perhaps only started in the case of Mdl Reads. Based on these parameters, an assessment is made on how much data should be read ahead, and whether that data has already been read ahead. The processing is divided into two parts: CALCULATE READ AHEAD REQUIREMENTS (CcScheduleReadAhead) PERFORM READ AHEAD (CcPerformReadAhead) File systems should always call CcReadAhead, which will conditionally call CcScheduleReadAhead (if the read is large enough). If such a call determines that there is read ahead work to do, and no read ahead is currently active, then it will set ReadAheadActive and schedule read ahead to be peformed by the Lazy Writer, who will call CcPeformReadAhead. Arguments: FileObject - supplies pointer to FileObject on which readahead should be considered. FileOffset - supplies the FileOffset at which the last read just occurred. Length - supplies the length of the last read. Return Value: None --*/ { LARGE_INTEGER NewOffset; LARGE_INTEGER NewBeyond; LARGE_INTEGER FileOffset1, FileOffset2; KIRQL OldIrql; PSHARED_CACHE_MAP SharedCacheMap; PPRIVATE_CACHE_MAP PrivateCacheMap; PWORK_QUEUE_ENTRY WorkQueueEntry; ULONG ReadAheadSize; BOOLEAN Changed = FALSE; DebugTrace(+1, me, "CcScheduleReadAhead:\n", 0 ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " Length = %08lx\n", Length ); SharedCacheMap = *(PSHARED_CACHE_MAP *)((PCHAR)FileObject->SectionObjectPointer + sizeof(PVOID)); PrivateCacheMap = FileObject->PrivateCacheMap; if ((PrivateCacheMap == NULL) || (SharedCacheMap == NULL) || FlagOn(SharedCacheMap->Flags, DISABLE_READ_AHEAD)) { DebugTrace(-1, me, "CcScheduleReadAhead -> VOID (Nooped)\n", 0 ); return; } // // Round boundaries of transfer up to some greater granularity, so that // sequential reads will be recognized even if a few bytes are skipped // between records. // NewOffset = *FileOffset; NewBeyond.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; // // Find the next read ahead boundary beyond the current read. // ReadAheadSize = (Length + PrivateCacheMap->ReadAheadMask) & ~PrivateCacheMap->ReadAheadMask; FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize; FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask; // // CALCULATE READ AHEAD REQUIREMENTS // // // Take out the ReadAhead spinlock to synchronize our read ahead decision. // ExAcquireSpinLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql ); // // Read Ahead Case 0. // // Sequential-only hint in the file object. For this case we will // try and always keep two read ahead granularities read ahead from // and including the end of the current transfer. This case has the // lowest overhead, and the code is completely immune to how the // caller skips around. Sequential files use ReadAheadOffset[1] in // the PrivateCacheMap as their "high water mark". // if (FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { // // If the next boundary is greater than or equal to the high-water mark, // then read ahead. // if (FileOffset2.QuadPart >= PrivateCacheMap->ReadAheadOffset[1].QuadPart) { // // On the first read if we are using a large read ahead granularity, // and the read did not get it all, we will just get the rest of the // first data we want. // if ((FileOffset->QuadPart == 0) && (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1)) && ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) { FileOffset1.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) ); PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize - FileOffset1.LowPart; FileOffset2.QuadPart = (LONGLONG)ReadAheadSize; // // Calculate the next read ahead boundary. // } else { FileOffset1.QuadPart = PrivateCacheMap->ReadAheadOffset[1].QuadPart + (LONGLONG)ReadAheadSize; // // If the end of the current read is actually beyond where we would // normally do our read ahead, then we have fallen behind, and we must // advance to that spot. // if (FileOffset2.QuadPart > FileOffset1.QuadPart) { FileOffset1 = FileOffset2; } PrivateCacheMap->ReadAheadLength[0] = ReadAheadSize; FileOffset2.QuadPart = FileOffset1.QuadPart + (LONGLONG)ReadAheadSize; } // // Now issue the next two read aheads. // PrivateCacheMap->ReadAheadOffset[0] = FileOffset1; PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize; Changed = TRUE; } // // Read Ahead Case 1. // // If this is the third of three sequential reads, then we will see if // we can read ahead. Note that if the first read to a file is to // offset 0, it passes this test. // } else if ((NewOffset.HighPart == PrivateCacheMap->BeyondLastByte2.HighPart) && ((NewOffset.LowPart & ~NOISE_BITS) == (PrivateCacheMap->BeyondLastByte2.LowPart & ~NOISE_BITS)) && (PrivateCacheMap->FileOffset2.HighPart == PrivateCacheMap->BeyondLastByte1.HighPart) && ((PrivateCacheMap->FileOffset2.LowPart & ~NOISE_BITS) == (PrivateCacheMap->BeyondLastByte1.LowPart & ~NOISE_BITS))) { // // On the first read if we are using a large read ahead granularity, // and the read did not get it all, we will just get the rest of the // first data we want. // if ((FileOffset->QuadPart == 0) && (PrivateCacheMap->ReadAheadMask > (PAGE_SIZE - 1)) && ((Length + PAGE_SIZE - 1) <= PrivateCacheMap->ReadAheadMask)) { FileOffset2.QuadPart = (LONGLONG)( ROUND_TO_PAGES(Length) ); } // // Round read offset to next read ahead boundary. // else { FileOffset2.QuadPart = NewBeyond.QuadPart + (LONGLONG)ReadAheadSize; FileOffset2.LowPart &= ~PrivateCacheMap->ReadAheadMask; } // // Set read ahead length to be the same as for the most recent read, // up to our max. // if (FileOffset2.QuadPart != PrivateCacheMap->ReadAheadOffset[1].QuadPart) { ASSERT( FileOffset2.HighPart >= 0 ); Changed = TRUE; PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; PrivateCacheMap->ReadAheadLength[1] = ReadAheadSize; } } // // Read Ahead Case 2. // // If this is the third read following a particular stride, then we // will see if we can read ahead. One example of an application that // might do this is a spreadsheet. Note that this code even works // for negative strides. // else if ( ( NewOffset.QuadPart - PrivateCacheMap->FileOffset2.QuadPart ) == ( PrivateCacheMap->FileOffset2.QuadPart - PrivateCacheMap->FileOffset1.QuadPart )) { // // According to the current stride, the next offset will be: // // NewOffset + (NewOffset - FileOffset2) // // which is the same as: // // (NewOffset * 2) - FileOffset2 // FileOffset2.QuadPart = ( NewOffset.QuadPart << 1 ) - PrivateCacheMap->FileOffset2.QuadPart; // // If our stride is going backwards through the file, we // have to detect the case where the next step would wrap. // if (FileOffset2.HighPart >= 0) { // // The read ahead length must be extended by the same amount that // we will round the PrivateCacheMap->ReadAheadOffset down. // Length += FileOffset2.LowPart & (PAGE_SIZE - 1); // // Now round the PrivateCacheMap->ReadAheadOffset down. // FileOffset2.LowPart &= ~(PAGE_SIZE - 1); PrivateCacheMap->ReadAheadOffset[1] = FileOffset2; // // Round to page boundary. // PrivateCacheMap->ReadAheadLength[1] = ROUND_TO_PAGES(Length); Changed = TRUE; } } // // Get out if the ReadAhead requirements did not change. // if (!Changed || PrivateCacheMap->ReadAheadActive) { DebugTrace( 0, me, "Read ahead already in progress or no change\n", 0 ); ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); return; } // // Otherwise, we will proceed and try to schedule the read ahead // ourselves. // PrivateCacheMap->ReadAheadActive = TRUE; // // Release spin lock on way out // ExReleaseSpinLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); // // Queue the read ahead request to the Lazy Writer's work queue. // DebugTrace( 0, me, "Queueing read ahead to worker thread\n", 0 ); WorkQueueEntry = CcAllocateWorkQueueEntry(); // // If we failed to allocate a work queue entry, then, we will // quietly bag it. Read ahead is only an optimization, and // no one ever requires that it occur. // if (WorkQueueEntry != NULL) { // // We must reference this file object so that it cannot go away // until we finish Read Ahead processing in the Worker Thread. // ObReferenceObject ( FileObject ); // // Increment open count to make sure the SharedCacheMap stays around. // ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); SharedCacheMap->OpenCount += 1; ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); WorkQueueEntry->Function = (UCHAR)ReadAhead; WorkQueueEntry->Parameters.Read.FileObject = FileObject; CcPostWorkQueue( WorkQueueEntry, &CcExpressWorkQueue ); } // // If we failed to allocate a Work Queue Entry, or all of the pages // are resident we must set the active flag false. // else { ExAcquireFastLock( &PrivateCacheMap->ReadAheadSpinLock, &OldIrql ); PrivateCacheMap->ReadAheadActive = FALSE; ExReleaseFastLock( &PrivateCacheMap->ReadAheadSpinLock, OldIrql ); } DebugTrace(-1, me, "CcScheduleReadAhead -> VOID\n", 0 ); return; } VOID FASTCALL CcPerformReadAhead ( IN PFILE_OBJECT FileObject ) /*++ Routine Description: This routine is called by the Lazy Writer to perform read ahead which has been scheduled for this file by CcScheduleReadAhead. Arguments: FileObject - supplies pointer to FileObject on which readahead should be considered. Return Value: None --*/ { KIRQL OldIrql; PSHARED_CACHE_MAP SharedCacheMap; PPRIVATE_CACHE_MAP PrivateCacheMap; ULONG i; LARGE_INTEGER ReadAheadOffset[2]; ULONG ReadAheadLength[2]; PCACHE_MANAGER_CALLBACKS Callbacks; PVOID Context; ULONG SavedState; BOOLEAN Done; BOOLEAN HitEof = FALSE; BOOLEAN ReadAheadPerformed = FALSE; BOOLEAN FaultOccurred = FALSE; PETHREAD Thread = PsGetCurrentThread(); PVACB Vacb = NULL; DebugTrace(+1, me, "CcPerformReadAhead:\n", 0 ); DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); MmSavePageFaultReadAhead( Thread, &SavedState ); try { // // Since we have the open count biased, we can safely access the // SharedCacheMap. // SharedCacheMap = FileObject->SectionObjectPointer->SharedCacheMap; Callbacks = SharedCacheMap->Callbacks; Context = SharedCacheMap->LazyWriteContext; // // After the first time, keep looping as long as there are new // read ahead requirements. (We will skip out below.) // while (TRUE) { // // Get SharedCacheMap and PrivateCacheMap. If either are now NULL, get // out. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); PrivateCacheMap = FileObject->PrivateCacheMap; // // Now capture the information that we need, so that we can drop the // SharedList Resource. This information is advisory only anyway, and // the caller must guarantee that the FileObject is referenced. // if (PrivateCacheMap != NULL) { ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); // // We are done when the lengths are 0 // Done = ((PrivateCacheMap->ReadAheadLength[0] | PrivateCacheMap->ReadAheadLength[1]) == 0); ReadAheadOffset[0] = PrivateCacheMap->ReadAheadOffset[0]; ReadAheadOffset[1] = PrivateCacheMap->ReadAheadOffset[1]; ReadAheadLength[0] = PrivateCacheMap->ReadAheadLength[0]; ReadAheadLength[1] = PrivateCacheMap->ReadAheadLength[1]; PrivateCacheMap->ReadAheadLength[0] = 0; PrivateCacheMap->ReadAheadLength[1] = 0; ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Acquire the file shared. // (*Callbacks->AcquireForReadAhead)( Context, TRUE ); if ((PrivateCacheMap == NULL) || Done) { try_return( NOTHING ); } // // PERFORM READ AHEAD // // // Now loop until everything is read in. The Read ahead is accomplished // by touching the pages with an appropriate ReadAhead parameter in MM. // i = 0; do { LARGE_INTEGER Offset, SavedOffset; ULONG Length, SavedLength; Offset = ReadAheadOffset[i]; Length = ReadAheadLength[i]; SavedOffset = Offset; SavedLength = Length; if ((Length != 0) && ( Offset.QuadPart <= SharedCacheMap->FileSize.QuadPart )) { ReadAheadPerformed = TRUE; // // Keep length within file and MAX_READ_AHEAD // if ( ( Offset.QuadPart + (LONGLONG)Length ) >= SharedCacheMap->FileSize.QuadPart ) { Length = (ULONG)( SharedCacheMap->FileSize.QuadPart - Offset.QuadPart ); HitEof = TRUE; } if (Length > MAX_READ_AHEAD) { Length = MAX_READ_AHEAD; } // // Now loop to read all of the desired data in. This loop // is more or less like the same loop to read data in // CcCopyRead, except that we do not copy anything, just // unmap as soon as it is in. // while (Length != 0) { ULONG ReceivedLength; PVOID CacheBuffer; ULONG PagesToGo; // // Call local routine to Map or Access the file data. // If we cannot map the data because of a Wait condition, // return FALSE. // // Since this routine is intended to be called from // the finally handler from file system read modules, // it is imperative that it not raise any exceptions. // Therefore, if any expected exception is raised, we // will simply get out. // CacheBuffer = CcGetVirtualAddress( SharedCacheMap, Offset, &Vacb, &ReceivedLength ); // // If we got more than we need, make sure to only transfer // the right amount. // if (ReceivedLength > Length) { ReceivedLength = Length; } // // Now loop to touch all of the pages, calling MM to insure // that if we fault, we take in exactly the number of pages // we need. // PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, ReceivedLength ); CcMissCounter = &CcReadAheadIos; while (PagesToGo) { MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) ); FaultOccurred = (BOOLEAN)!MmCheckCachedPageState(CacheBuffer, FALSE); CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; PagesToGo -= 1; } CcMissCounter = &CcThrowAway; // // Calculate how much data we have left to go. // Length -= ReceivedLength; // // Assume we did not get all the data we wanted, and set // Offset to the end of the returned data. // Offset.QuadPart = Offset.QuadPart + (LONGLONG)ReceivedLength; // // It was only a page, so we can just leave this loop // After freeing the address. // CcFreeVirtualAddress( Vacb ); Vacb = NULL; } } i += 1; } while (i <= 1); // // Release the file // (*Callbacks->ReleaseFromReadAhead)( Context ); } try_exit: NOTHING; } finally { MmResetPageFaultReadAhead(Thread, SavedState); CcMissCounter = &CcThrowAway; // // If we got an error faulting a single page in, release the Vacb // here. It is important to free any mapping before dropping the // resource to prevent purge problems. // if (Vacb != NULL) { CcFreeVirtualAddress( Vacb ); } // // Release the file // (*Callbacks->ReleaseFromReadAhead)( Context ); // // To show we are done, we must make sure the PrivateCacheMap is // still there. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); PrivateCacheMap = FileObject->PrivateCacheMap; // // Show readahead is going inactive. // if (PrivateCacheMap != NULL) { ExAcquireSpinLockAtDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); PrivateCacheMap->ReadAheadActive = FALSE; // // If he said sequential only and we smashed into Eof, then // let's reset the highwater mark in case he wants to read the // file sequentially again. // if (HitEof && FlagOn(FileObject->Flags, FO_SEQUENTIAL_ONLY)) { PrivateCacheMap->ReadAheadOffset[1].LowPart = PrivateCacheMap->ReadAheadOffset[1].HighPart = 0; } // // If no faults occurred, turn read ahead off. // if (ReadAheadPerformed && !FaultOccurred) { PrivateCacheMap->ReadAheadEnabled = FALSE; } ExReleaseSpinLockFromDpcLevel( &PrivateCacheMap->ReadAheadSpinLock ); } // // Free SharedCacheMap list // ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); ObDereferenceObject( FileObject ); // // Serialize again to decrement the open count. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); SharedCacheMap->OpenCount -= 1; if ((SharedCacheMap->OpenCount == 0) && !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && (SharedCacheMap->DirtyPages == 0)) { // // Move to the dirty list. // RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, &SharedCacheMap->SharedCacheMapLinks ); // // Make sure the Lazy Writer will wake up, because we // want him to delete this SharedCacheMap. // LazyWriter.OtherWork = TRUE; if (!LazyWriter.ScanActive) { CcScheduleLazyWriteScan(); } } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } DebugTrace(-1, me, "CcPerformReadAhead -> VOID\n", 0 ); return; } VOID CcSetDirtyInMask ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN ULONG Length ) /*++ Routine Description: This routine may be called to set a range of pages dirty in a user data file, by just setting the corresponding bits in the mask bcb. Arguments: SharedCacheMap - SharedCacheMap where the pages are to be set dirty. FileOffset - FileOffset of first page to set dirty Length - Used in conjunction with FileOffset to determine how many pages to set dirty. Return Value: None --*/ { KIRQL OldIrql; PULONG MaskPtr; ULONG Mask; PMBCB Mbcb; ULONG FirstPage; ULONG LastPage; LARGE_INTEGER BeyondLastByte; // // Here is the maximum size file supported by this implementation. // ASSERT((FileOffset->HighPart & ~(PAGE_SIZE - 1)) == 0); // // Initialize our locals. // FirstPage = (ULONG)((FileOffset->LowPart >> PAGE_SHIFT) | (FileOffset->HighPart << (32 - PAGE_SHIFT))); LastPage = FirstPage + ((ULONG)((FileOffset->LowPart & (PAGE_SIZE - 1)) + Length - 1) >> PAGE_SHIFT); BeyondLastByte.LowPart = (LastPage + 1) << PAGE_SHIFT; BeyondLastByte.HighPart = (LONG)(LastPage >> (32 - PAGE_SHIFT)); // // We have to acquire the shared cache map list, because we // may be changing lists. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); // // If there is no Mbcb, or it is not big enough, we will have to allocate one. // Mbcb = SharedCacheMap->Mbcb; if ((Mbcb == NULL) || (LastPage >= (Mbcb->Bitmap.SizeOfBitMap - 1))) { PMBCB NewMbcb; ULONG RoundedBcbSize = ((sizeof(BCB) + 7) & ~7); ULONG SizeInBytes = ((LastPage + 1 + 1 + 7) / 8) + sizeof(MBCB); // // If the size needed is not larger than a Bcb, then get one from the // Bcb zone. // if (SizeInBytes <= RoundedBcbSize) { NewMbcb = (PMBCB)CcAllocateInitializeBcb( NULL, NULL, NULL, NULL ); if (NewMbcb != NULL) { NewMbcb->Bitmap.SizeOfBitMap = (RoundedBcbSize - sizeof(MBCB)) * 8; } else { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); } // // Otherwise, we will allocate one from the pool. We throw in a fudge // factor of 1 below to account for any bits that may shift off the end, // plus 4 to insure a long word of 0's at the end for scanning, and then // round up to a quad word boundary that we will get anyway. // } else { ULONG SizeToAllocate = (ULONG)(((SharedCacheMap->SectionSize.LowPart >> (PAGE_SHIFT + 3)) | (SharedCacheMap->SectionSize.HighPart << (32 - (PAGE_SHIFT + 3)))) + sizeof(MBCB) + 1 + 7) & ~7; NewMbcb = ExAllocatePool( NonPagedPool, SizeToAllocate ); if (NewMbcb != NULL) { RtlZeroMemory( NewMbcb, SizeToAllocate ); NewMbcb->Bitmap.SizeOfBitMap = (SizeToAllocate - sizeof(MBCB)) * 8; } else { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); ExRaiseStatus( STATUS_INSUFFICIENT_RESOURCES ); } } // // Set in the node type, "empty" FirstDirtyPage state, and the address // of the bitmap. // NewMbcb->NodeTypeCode = CACHE_NTC_MBCB; NewMbcb->FirstDirtyPage = MAXULONG; NewMbcb->Bitmap.Buffer = (PULONG)(NewMbcb + 1); // // If there already was an Mbcb, we need to copy the relevant data from // it and deallocate it. // if (Mbcb != NULL) { NewMbcb->DirtyPages = Mbcb->DirtyPages; NewMbcb->FirstDirtyPage = Mbcb->FirstDirtyPage; NewMbcb->LastDirtyPage = Mbcb->LastDirtyPage; NewMbcb->ResumeWritePage = Mbcb->ResumeWritePage; RtlCopyMemory( NewMbcb + 1, Mbcb + 1, Mbcb->Bitmap.SizeOfBitMap / 8 ); CcDeallocateBcb( (PBCB)Mbcb ); } // // Finally, set to use our new Mbcb. // SharedCacheMap->Mbcb = Mbcb = NewMbcb; } // // If this is the first dirty page for this cache map, there is some work // to do. // if (SharedCacheMap->DirtyPages == 0) { // // If the lazy write scan is not active, then start it. // if (!LazyWriter.ScanActive) { CcScheduleLazyWriteScan(); } // // Move to the dirty list. // RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, &SharedCacheMap->SharedCacheMapLinks ); Mbcb->ResumeWritePage = FirstPage; } // // Now update the first and last dirty page indices and the bitmap. // if (FirstPage < Mbcb->FirstDirtyPage) { Mbcb->FirstDirtyPage = FirstPage; } if (LastPage > Mbcb->LastDirtyPage) { Mbcb->LastDirtyPage = LastPage; } MaskPtr = &Mbcb->Bitmap.Buffer[FirstPage / 32]; Mask = 1 << (FirstPage % 32); // // Loop to set all of the bits and adjust the DirtyPage totals. // for ( ; FirstPage <= LastPage; FirstPage++) { if ((*MaskPtr & Mask) == 0) { CcTotalDirtyPages += 1; SharedCacheMap->DirtyPages += 1; Mbcb->DirtyPages += 1; *MaskPtr |= Mask; } Mask <<= 1; if (Mask == 0) { MaskPtr += 1; Mask = 1; } } // // See if we need to advance our goal for ValidDataLength. // BeyondLastByte.QuadPart = FileOffset->QuadPart + (LONGLONG)Length; if ( BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) { SharedCacheMap->ValidDataGoal = BeyondLastByte; } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } VOID CcSetDirtyPinnedData ( IN PVOID BcbVoid, IN PLARGE_INTEGER Lsn OPTIONAL ) /*++ Routine Description: This routine may be called to set a Bcb (returned by CcPinFileData) dirty, and a candidate for the Lazy Writer. All Bcbs should be set dirty by calling this routine, even if they are to be flushed another way. Arguments: Bcb - Supplies a pointer to a pinned (by CcPinFileData) Bcb, to be set dirty. Lsn - Lsn to be remembered with page. Return Value: None --*/ { PBCB Bcbs[2]; PBCB *BcbPtrPtr; KIRQL OldIrql; PSHARED_CACHE_MAP SharedCacheMap; DebugTrace(+1, me, "CcSetDirtyPinnedData: Bcb = %08lx\n", BcbVoid ); // // Assume this is a normal Bcb, and set up for loop below. // Bcbs[0] = (PBCB)BcbVoid; Bcbs[1] = NULL; BcbPtrPtr = &Bcbs[0]; // // If it is an overlap Bcb, then point into the Bcb vector // for the loop. // if (Bcbs[0]->NodeTypeCode == CACHE_NTC_OBCB) { BcbPtrPtr = &((POBCB)Bcbs[0])->Bcbs[0]; } // // Loop to set all Bcbs dirty // while (*BcbPtrPtr != NULL) { Bcbs[0] = *(BcbPtrPtr++); // // Should be no ReadOnly Bcbs // ASSERT(((ULONG)Bcbs[0] & 1) != 1); SharedCacheMap = Bcbs[0]->SharedCacheMap; // // We have to acquire the shared cache map list, because we // may be changing lists. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (!Bcbs[0]->Dirty) { ULONG Pages = Bcbs[0]->ByteLength >> PAGE_SHIFT; // // Set dirty to keep the Bcb from going away until // it is set Undirty, and assign the next modification time stamp. // Bcbs[0]->Dirty = TRUE; // // Initialize the OldestLsn field. // if (ARGUMENT_PRESENT(Lsn)) { Bcbs[0]->OldestLsn = *Lsn; Bcbs[0]->NewestLsn = *Lsn; } // // Move it to the dirty list if these are the first dirty pages, // and this is not disabled for write behind. // // Increase the count of dirty bytes in the shared cache map. // if ((SharedCacheMap->DirtyPages == 0) && !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { // // If the lazy write scan is not active, then start it. // if (!LazyWriter.ScanActive) { CcScheduleLazyWriteScan(); } RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, &SharedCacheMap->SharedCacheMapLinks ); } SharedCacheMap->DirtyPages += Pages; CcTotalDirtyPages += Pages; } // // If this Lsn happens to be older/newer than the ones we have stored, then // change it. // if (ARGUMENT_PRESENT(Lsn)) { if ((Bcbs[0]->OldestLsn.QuadPart == 0) || (Lsn->QuadPart < Bcbs[0]->OldestLsn.QuadPart)) { Bcbs[0]->OldestLsn = *Lsn; } if (Lsn->QuadPart > Bcbs[0]->NewestLsn.QuadPart) { Bcbs[0]->NewestLsn = *Lsn; } } // // See if we need to advance our goal for ValidDataLength. // if ( Bcbs[0]->BeyondLastByte.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart ) { SharedCacheMap->ValidDataGoal = Bcbs[0]->BeyondLastByte; } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } DebugTrace(-1, me, "CcSetDirtyPinnedData -> VOID\n", 0 ); } NTSTATUS CcSetValidData( IN PFILE_OBJECT FileObject, IN PLARGE_INTEGER ValidDataLength ) /*++ Routine Description: This routine is used to call the File System to update ValidDataLength for a file. Arguments: FileObject - A pointer to a referenced file object describing which file the read should be performed from. ValidDataLength - Pointer to new ValidDataLength. Return Value: Status of operation. --*/ { PIO_STACK_LOCATION IrpSp; PDEVICE_OBJECT DeviceObject; NTSTATUS Status; FILE_END_OF_FILE_INFORMATION Buffer; IO_STATUS_BLOCK IoStatus; KEVENT Event; PIRP Irp; DebugTrace(+1, me, "CcSetValidData:\n", 0 ); DebugTrace( 0, me, " FileObject = %08lx\n", FileObject ); DebugTrace2(0, me, " ValidDataLength = %08lx, %08lx\n", ValidDataLength->LowPart, ValidDataLength->HighPart ); // // Copy ValidDataLength to our buffer. // Buffer.EndOfFile = *ValidDataLength; // // Initialize the event. // KeInitializeEvent( &Event, NotificationEvent, FALSE ); // // Begin by getting a pointer to the device object that the file resides // on. // DeviceObject = IoGetRelatedDeviceObject( FileObject ); // // Allocate an I/O Request Packet (IRP) for this in-page operation. // Irp = IoAllocateIrp( DeviceObject->StackSize, FALSE ); if (Irp == NULL) { DebugTrace(-1, me, "CcSetValidData-> STATUS_INSUFFICIENT_RESOURCES\n", 0 ); return STATUS_INSUFFICIENT_RESOURCES; } // // Get a pointer to the first stack location in the packet. This location // will be used to pass the function codes and parameters to the first // driver. // IrpSp = IoGetNextIrpStackLocation( Irp ); // // Fill in the IRP according to this request, setting the flags to // just cause IO to set the event and deallocate the Irp. // Irp->Flags = IRP_PAGING_IO | IRP_SYNCHRONOUS_PAGING_IO; Irp->RequestorMode = KernelMode; Irp->UserIosb = &IoStatus; Irp->UserEvent = &Event; Irp->Tail.Overlay.OriginalFileObject = FileObject; Irp->Tail.Overlay.Thread = PsGetCurrentThread(); Irp->AssociatedIrp.SystemBuffer = &Buffer; // // Fill in the normal read parameters. // IrpSp->MajorFunction = IRP_MJ_SET_INFORMATION; IrpSp->FileObject = FileObject; IrpSp->DeviceObject = DeviceObject; IrpSp->Parameters.SetFile.Length = sizeof(FILE_END_OF_FILE_INFORMATION); IrpSp->Parameters.SetFile.FileInformationClass = FileEndOfFileInformation; IrpSp->Parameters.SetFile.FileObject = NULL; IrpSp->Parameters.SetFile.AdvanceOnly = TRUE; // // Queue the packet to the appropriate driver based on whether or not there // is a VPB associated with the device. This routine should not raise. // Status = IoCallDriver( DeviceObject, Irp ); // // If pending is returned (which is a successful status), // we must wait for the request to complete. // if (Status == STATUS_PENDING) { KeWaitForSingleObject( &Event, Executive, KernelMode, FALSE, (PLARGE_INTEGER)NULL); } // // If we got an error back in Status, then the Iosb // was not written, so we will just copy the status // there, then test the final status after that. // if (!NT_SUCCESS(Status)) { IoStatus.Status = Status; } DebugTrace(-1, me, "CcSetValidData-> %08lx\n", IoStatus.Status ); return IoStatus.Status; } // // Internal Support Routine // BOOLEAN CcAcquireByteRangeForWrite ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER TargetOffset OPTIONAL, IN ULONG TargetLength, OUT PLARGE_INTEGER FileOffset, OUT PULONG Length, OUT PBCB *FirstBcb ) /*++ Routine Description: This routine is called by the Lazy Writer to try to find a contiguous range of bytes from the specified SharedCacheMap that are dirty and should be flushed. After flushing, these bytes should be released by calling CcReleaseByteRangeFromWrite. Arguments: SharedCacheMap - for the file for which the dirty byte range is sought TargetOffset - If specified, then only the specified range is to be flushed. TargetLength - If target offset specified, this completes the range. In any case, this field is zero for the Lazy Writer, and nonzero for explicit flush calls. FileOffset - Returns the offset for the beginning of the dirty byte range to flush Length - Returns the length of bytes in the range. FirstBcb - Returns the first Bcb in the list for the range, to be used when calling CcReleaseByteRangeFromWrite, or NULL if dirty pages were found in the mask Bcb. Return Value: FALSE - if no dirty byte range could be found to match the necessary criteria. TRUE - if a dirty byte range is being returned. --*/ { KIRQL OldIrql; PMBCB Mbcb; PBCB Bcb; LARGE_INTEGER LsnToFlushTo = {0, 0}; DebugTrace(+1, me, "CcAcquireByteRangeForWrite:\n", 0); DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap); // // Initially clear outputs. // FileOffset->QuadPart = 0; *Length = 0; // // We must acquire the CcMasterSpinLock. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); // // See if there is a simple Mask Bcb, and if there is anything dirty in // it. If so we will simply handle that case here by processing the bitmap. // Mbcb = SharedCacheMap->Mbcb; if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0) && ((Mbcb->PagesToWrite != 0) || (TargetLength != 0))) { PULONG EndPtr; PULONG MaskPtr; ULONG Mask; ULONG FirstDirtyPage; ULONG OriginalFirstDirtyPage; // // If a target range was specified (outside call to CcFlush for a range), // then calculate FirstPage and EndPtr based on these inputs. // if (ARGUMENT_PRESENT(TargetOffset)) { FirstDirtyPage = (ULONG)(TargetOffset->QuadPart >> PAGE_SHIFT); EndPtr = &Mbcb->Bitmap.Buffer[(ULONG)((TargetOffset->QuadPart + TargetLength - 1) >> PAGE_SHIFT) / 32]; // // We do not grow the bitmap with the file, only as we set dirty // pages, so it is possible that the caller is off the end. If // If even the first page is off the end, we will catch it below. // if (EndPtr > &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]) { EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]; } // // Otherwise, for the Lazy Writer pick up where we left off. // } else { // // If a length was specified, then it is an explicit flush, and // we want to start with the first dirty page. // FirstDirtyPage = Mbcb->FirstDirtyPage; // // Otherwise, it is the Lazy Writer, so pick up at the resume // point so long as that is beyond the FirstDirtyPage. // if ((TargetLength == 0) && (Mbcb->ResumeWritePage >= FirstDirtyPage)) { FirstDirtyPage = Mbcb->ResumeWritePage; } EndPtr = &Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32]; } // // Form a few other inputs for our dirty page scan. // MaskPtr = &Mbcb->Bitmap.Buffer[FirstDirtyPage / 32]; Mask = (ULONG)(-1 << (FirstDirtyPage % 32)); OriginalFirstDirtyPage = FirstDirtyPage; // // Because of the possibility of getting stuck on a "hot spot" which gets // modified over and over, we want to be very careful to resume exactly // at the recorded resume point. If there is nothing there, then we // fall into the loop below to scan for nozero long words in the bitmap, // starting at the next longword. // if ((MaskPtr > EndPtr) || (*MaskPtr & Mask) == 0) { MaskPtr += 1; Mask = (ULONG)-1; FirstDirtyPage = (FirstDirtyPage + 32) & ~31; // // If we go beyond the end, then we must wrap back to the first // dirty page. We will just go back to the start of the first // longword. // if (MaskPtr > EndPtr) { // // If this is an explicit flush, get out when we hit the end // of the range. // if (TargetLength != 0) { goto Scan_Bcbs; } MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32]; FirstDirtyPage = Mbcb->FirstDirtyPage & ~31; OriginalFirstDirtyPage = Mbcb->FirstDirtyPage; // // We can also backup the last dirty page hint to our // resume point. // ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage); Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1; } // // To scan the bitmap faster, we scan for entire long words which are // nonzero. // while (*MaskPtr == 0) { MaskPtr += 1; FirstDirtyPage += 32; // // If we go beyond the end, then we must wrap back to the first // dirty page. We will just go back to the start of the first // longword. // if (MaskPtr > EndPtr) { // // If this is an explicit flush, get out when we hit the end // of the range. // if (TargetLength != 0) { goto Scan_Bcbs; } MaskPtr = &Mbcb->Bitmap.Buffer[Mbcb->FirstDirtyPage / 32]; FirstDirtyPage = Mbcb->FirstDirtyPage & ~31; OriginalFirstDirtyPage = Mbcb->FirstDirtyPage; // // We can also backup the last dirty page hint to our // resume point. // ASSERT(Mbcb->ResumeWritePage >= Mbcb->FirstDirtyPage); Mbcb->LastDirtyPage = Mbcb->ResumeWritePage - 1; } } } // // Calculate the first set bit in the mask that we hit on. // Mask = ~Mask + 1; // // Now loop to find the first set bit. // while ((*MaskPtr & Mask) == 0) { Mask <<= 1; FirstDirtyPage += 1; } // // If a TargetOffset was specified, then make sure we do not start // beyond the specified range. // if (ARGUMENT_PRESENT(TargetOffset) && (FirstDirtyPage >= ((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT))) { goto Scan_Bcbs; } // // Now loop to count the set bits at that point, clearing them as we // go because we plan to write the corresponding pages. Stop as soon // as we find a clean page, or we reach our maximum write size. Of // course we want to ignore long word boundaries and keep trying to // extend the write. We do not check for wrapping around the end of // the bitmap here, because we guarantee some zero bits at the end // in CcSetDirtyInMask. // while (((*MaskPtr & Mask) != 0) && (*Length < (MAX_WRITE_BEHIND / PAGE_SIZE)) && (!ARGUMENT_PRESENT(TargetOffset) || ((FirstDirtyPage + *Length) < (ULONG)((TargetOffset->QuadPart + TargetLength + PAGE_SIZE - 1) >> PAGE_SHIFT)))) { ASSERT(MaskPtr <= (&Mbcb->Bitmap.Buffer[Mbcb->LastDirtyPage / 32])); *MaskPtr -= Mask; *Length += 1; Mask <<= 1; if (Mask == 0) { MaskPtr += 1; Mask = 1; if (MaskPtr > EndPtr) { break; } } } // // Now reduce the count of pages we were supposed to write this time, // possibly clearing this count. // if (*Length < Mbcb->PagesToWrite) { Mbcb->PagesToWrite -= *Length; } else { Mbcb->PagesToWrite = 0; } // // Reduce the dirty page counts by the number of pages we just cleared. // ASSERT(Mbcb->DirtyPages >= *Length); CcTotalDirtyPages -= *Length; SharedCacheMap->DirtyPages -= *Length; Mbcb->DirtyPages -= *Length; // // Normally we need to reduce CcPagesYetToWrite appropriately. // if (CcPagesYetToWrite > *Length) { CcPagesYetToWrite -= *Length; } else { CcPagesYetToWrite = 0; } // // If we took out the last dirty page, then move the SharedCacheMap // back to the clean list. // if (SharedCacheMap->DirtyPages == 0) { RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcCleanSharedCacheMapList, &SharedCacheMap->SharedCacheMapLinks ); } // // If the number of dirty pages for the Mcb went to zero, we can reset // our hint fields now. // if (Mbcb->DirtyPages == 0) { Mbcb->FirstDirtyPage = MAXULONG; Mbcb->LastDirtyPage = 0; Mbcb->ResumeWritePage = 0; // // Otherwise we have to update the hint fields. // } else { // // Advance the first dirty page hint if we can. // if (Mbcb->FirstDirtyPage == OriginalFirstDirtyPage) { Mbcb->FirstDirtyPage = FirstDirtyPage + *Length; } // // Set to resume the next scan at the next bit for // the Lazy Writer. // if (TargetLength == 0) { Mbcb->ResumeWritePage = FirstDirtyPage + *Length; } } // // We can save a callback by letting our caller know when // we have no more pages to write. // if (IsListEmpty(&SharedCacheMap->BcbList)) { SharedCacheMap->PagesToWrite = Mbcb->PagesToWrite; } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Now form all of our outputs. We calculated *Length as a page count, // but our caller wants it in bytes. // *Length <<= PAGE_SHIFT; FileOffset->QuadPart = (LONGLONG)FirstDirtyPage << PAGE_SHIFT; *FirstBcb = NULL; DebugTrace2(0, me, " LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " TRUE\n", 0 ); return TRUE; } // // We get here if there is no Mbcb or no dirty pages in it. Note that we // wouldn't even be here if there were no dirty pages in this SharedCacheMap. // // // Now point to last Bcb in List, and loop until we hit one of the // breaks below or the beginning of the list. // Scan_Bcbs: // // Use while TRUE to handle case where the current target range wraps // (escape is at the bottom). // while (TRUE) { Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks ); // // If this is a large file, and we are to resume from a nonzero FileOffset, // call CcFindBcb to get a quicker start. // if ((SharedCacheMap->SectionSize.QuadPart > BEGIN_BCB_LIST_ARRAY) && !ARGUMENT_PRESENT(TargetOffset) && (SharedCacheMap->BeyondLastFlush != 0)) { LARGE_INTEGER TempQ; TempQ.QuadPart = SharedCacheMap->BeyondLastFlush + PAGE_SIZE; // // Position ourselves. If we did not find a Bcb for the BeyondLastFlush // page, then a lower FileOffset was returned, so we want to move forward // one. // if (!CcFindBcb( SharedCacheMap, (PLARGE_INTEGER)&SharedCacheMap->BeyondLastFlush, &TempQ, &Bcb )) { Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); } } while (&Bcb->BcbLinks != &SharedCacheMap->BcbList) { // // Skip over this item if it is a listhead. // if (Bcb->NodeTypeCode != CACHE_NTC_BCB) { Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); continue; } // // If we are doing a specified range, then get out if we hit a // higher Bcb. // if (ARGUMENT_PRESENT(TargetOffset) && ((TargetOffset->QuadPart + TargetLength) <= Bcb->FileOffset.QuadPart)) { break; } // // If we have not started a run, then see if this Bcb is a candidate // to start one. // if (*Length == 0) { // // Else see if the Bcb is dirty, and is in our specified range, if // there is one. // if (!Bcb->Dirty || (ARGUMENT_PRESENT(TargetOffset) && (TargetOffset->QuadPart >= Bcb->BeyondLastByte.QuadPart)) || (!ARGUMENT_PRESENT(TargetOffset) && (Bcb->FileOffset.QuadPart < SharedCacheMap->BeyondLastFlush))) { Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); continue; } } // // Else, if we have started a run, then if this guy cannot be // appended to the run, then break. Note that we ignore the // Bcb's modification time stamp here to simplify the test. // // If the Bcb is currently pinned, then there is no sense in causing // contention, so we will skip over this guy as well. // else { if (!Bcb->Dirty || ( Bcb->FileOffset.QuadPart != ( FileOffset->QuadPart + (LONGLONG)*Length)) || (*Length + Bcb->ByteLength > MAX_WRITE_BEHIND) || (Bcb->PinCount != 0)) { break; } } // // Increment PinCount to prevent Bcb from going away once the // SpinLock is released, or we set it clean for the case where // modified write is allowed. // Bcb->PinCount += 1; // // Release the SpinLock before waiting on the resource. // ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED) && !FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { // // Now acquire the Bcb exclusive, so that we know that nobody // has it pinned and thus no one can be modifying the described // buffer. To acquire the first Bcb in a run, we can afford // to wait, because we are not holding any resources. However // if we already have a Bcb, then we better not wait, because // someone could have this Bcb pinned, and then wait for the // Bcb we already have exclusive. // // For streams for which we have not disabled modified page // writing, we do not need to acquire this resource, and the // foreground processing will not be acquiring the Bcb either. // if (!ExAcquireResourceExclusive( &Bcb->Resource, (BOOLEAN)(*Length == 0) )) { DebugTrace( 0, me, "Could not acquire 2nd Bcb\n", 0 ); // // Release the Bcb count we took out above. We say // ReadOnly = TRUE since we do not own the resource, // and SetClean = FALSE because we just want to decement // the count. // CcUnpinFileData( Bcb, TRUE, UNPIN ); // // When we leave the loop, we have to have the spin lock // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); break; } ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); // // If someone has the file open WriteThrough, then the Bcb may no // longer be dirty. If so, call CcUnpinFileData to decrement the // PinCount we incremented and free the resource. // if (!Bcb->Dirty) { // // Release the spinlock so that we can call CcUnpinFileData // ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); CcUnpinFileData( Bcb, FALSE, UNPIN ); ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); // // Now if we already have some data we can just break to return // it, otherwise we have to restart the scan, since our Bcb // may have gone away. // if (*Length != 0) { break; } else { Bcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Blink, BCB, BcbLinks ); continue; } } // // If we are not in the disable modified write mode (normal user data) // then we must set the buffer clean before doing the write, since we // are unsynchronized with anyone producing dirty data. That way if we, // for example, are writing data out while it is actively being changed, // at least the changer will mark the buffer dirty afterwards and cause // us to write it again later. // } else { CcUnpinFileData( Bcb, TRUE, SET_CLEAN ); ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); } DebugTrace( 0, me, "Adding Bcb = %08lx to run\n", Bcb ); // // Update all of our return values. Note that FirstBcb refers to the // FirstBcb in terms of how the Bcb list is ordered. Since the Bcb list // is ordered by descending file offsets, FirstBcb will actually return // the Bcb with the highest FileOffset. // if (*Length == 0) { *FileOffset = Bcb->FileOffset; } *FirstBcb = Bcb; *Length += Bcb->ByteLength; // // If there is a log file flush callback for this stream, then we must // remember the largest Lsn we are about to flush. // if ((SharedCacheMap->FlushToLsnRoutine != NULL) && (Bcb->NewestLsn.QuadPart > LsnToFlushTo.QuadPart)) { LsnToFlushTo = Bcb->NewestLsn; } Bcb = CONTAINING_RECORD( Bcb->BcbLinks.Blink, BCB, BcbLinks ); } // // If we found something, update our range last flush range and reduce // PagesToWrite. // if (*Length != 0) { // // If this is the Lazy Writer, then update BeyondLastFlush and // the PagesToWrite target. // if (!ARGUMENT_PRESENT(TargetOffset)) { SharedCacheMap->BeyondLastFlush = FileOffset->QuadPart + *Length; if (SharedCacheMap->PagesToWrite > (*Length >> PAGE_SHIFT)) { SharedCacheMap->PagesToWrite -= (*Length >> PAGE_SHIFT); } else { SharedCacheMap->PagesToWrite = 0; } } break; // // Else, if we scanned the entire file, get out - nothing to write now. // } else if ((SharedCacheMap->BeyondLastFlush == 0) || ARGUMENT_PRESENT(TargetOffset)) { break; } // // Otherwise, we may have not found anything because there is nothing // beyond the last flush. In that case it is time to wrap back to 0 // and keep scanning. // SharedCacheMap->BeyondLastFlush = 0; } // // Now release the spinlock file while we go off and do the I/O // ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // If we need to flush to some Lsn, this is the time to do it now // that we have found the largest Lsn and freed the spin lock. // if (LsnToFlushTo.QuadPart != 0) { try { (*SharedCacheMap->FlushToLsnRoutine) ( SharedCacheMap->LogHandle, LsnToFlushTo ); } except( CcExceptionFilter( GetExceptionCode() )) { // // If there was an error, it will be raised. We cannot // write anything until we successfully flush the log // file, so we will release everything here and just // return with 0 bytes. // LARGE_INTEGER LastOffset; PBCB NextBcb; // // Now loop to free up all of the Bcbs. Set the time // stamps to 0, so that we are guaranteed to try to // flush them again on the next sweep. // do { NextBcb = CONTAINING_RECORD( (*FirstBcb)->BcbLinks.Flink, BCB, BcbLinks ); // // Skip over any listheads. // if ((*FirstBcb)->NodeTypeCode == CACHE_NTC_BCB) { LastOffset = (*FirstBcb)->FileOffset; CcUnpinFileData( *FirstBcb, FALSE, UNPIN ); } *FirstBcb = NextBcb; } while (FileOffset->QuadPart != LastOffset.QuadPart); // // Show we did not acquire anything. // *Length = 0; } } // // If we got anything, return TRUE. // DebugTrace2(0, me, " LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " %02lx\n", *Length != 0 ); return ((BOOLEAN)(*Length != 0)); } // // Internal Support Routine // VOID CcReleaseByteRangeFromWrite ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN ULONG Length, IN PBCB FirstBcb, IN BOOLEAN VerifyRequired ) /*++ Routine Description: This routine is called by the Lazy Writer to free a range of bytes and clear all dirty bits, for a byte range returned by CcAcquireByteRangeForWrite. Arguments: SharedCacheMap - As supplied to CcAcquireByteRangeForWrite FileOffset - As returned from CcAcquireByteRangeForWrite Length - As returned from CcAcquirebyteRangeForWrite FirstBcb - As returned from CcAcquireByteRangeForWrite VerifyRequired - supplied as TRUE if a verify required error was received. In this case we must mark/leave the data dirty so that we will try to write it again. Return Value: None --*/ { LARGE_INTEGER LastOffset; PBCB NextBcb; DebugTrace(+1, me, "CcReleaseByteRangeFromWrite:\n", 0); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); // // If it is a mask Mbcb we are getting, then we only have to check // for VerifyRequired. // if (FirstBcb == NULL) { ASSERT(Length != 0); if (VerifyRequired) { CcSetDirtyInMask( SharedCacheMap, FileOffset, Length ); } DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0); return; } // // Now loop to free up all of the Bcbs. If modified writing is disabled // for each Bcb, then we are to set it clean here, since we are synchronized // with callers who set the data dirty. Otherwise we only have the Bcb pinned // so it will not go away, and we only unpin it here. // do { NextBcb = CONTAINING_RECORD( FirstBcb->BcbLinks.Flink, BCB, BcbLinks ); // // Skip over any listheads. // if (FirstBcb->NodeTypeCode == CACHE_NTC_BCB) { LastOffset = FirstBcb->FileOffset; // // If this is file system metadata (we disabled modified writing), // then this is the time to mark the buffer clean, so long as we // did not get verify required. // if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { CcUnpinFileData( FirstBcb, BooleanFlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND), SET_CLEAN ); } // // If we got verify required, we have to mark the buffer dirty again // so we will try again later. Note we have to make this call again // to make sure the right thing happens with time stamps. // if (VerifyRequired) { CcSetDirtyPinnedData( FirstBcb, NULL ); } // // Finally remove a pin count left over from CcAcquireByteRangeForWrite. // CcUnpinFileData( FirstBcb, TRUE, UNPIN ); } FirstBcb = NextBcb; } while (FileOffset->QuadPart != LastOffset.QuadPart); DebugTrace(-1, me, "CcReleaseByteRangeFromWrite -> VOID\n", 0); } // // Internal Support Routine // NTSTATUS FASTCALL CcWriteBehind ( IN PSHARED_CACHE_MAP SharedCacheMap ) /*++ Routine Description: This routine may be called with Wait = FALSE to see if write behind is required, or with Wait = TRUE to perform write behind as required. The code is very similar to the the code that the Lazy Writer performs for each SharedCacheMap. The main difference is in the call to CcAcquireByteRangeForWrite. Write Behind does not care about time stamps (passing ULONG to accept all time stamps), but it will never dump the first (highest byte offset) buffer in the list if the last byte of that buffer is not yet written. The Lazy Writer does exactly the opposite, in the sense that it is totally time-driven, and will even dump a partially modified buffer if it sits around long enough. Arguments: SharedCacheMap - Pointer to SharedCacheMap to be written Return Value: FALSE - if write behind is required, but the caller supplied Wait = FALSE TRUE - if write behind is complete or not required --*/ { IO_STATUS_BLOCK IoStatus; KIRQL OldIrql; ULONG ActivePage; ULONG PageIsDirty; PMBCB Mbcb; NTSTATUS Status; ULONG FileExclusive = FALSE; PVACB ActiveVacb = NULL; DebugTrace(+1, me, "CcWriteBehind\n", 0 ); DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); // // First we have to acquire the file for LazyWrite, to avoid // deadlocking with writers to the file. We do this via the // CallBack procedure specified to CcInitializeCacheMap. // (*SharedCacheMap->Callbacks->AcquireForLazyWrite) ( SharedCacheMap->LazyWriteContext, TRUE ); // // See if there is a previous active page to clean up, but only // do so now if it is the last dirty page or no users have the // file open. We will free it below after dropping the spinlock. // ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); if ((SharedCacheMap->DirtyPages <= 1) || (SharedCacheMap->OpenCount == 0)) { GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); } // // Increment open count so that our caller's views stay available // for CcGetVacbMiss. We could be tying up all of the views, and // still need to write file sizes. // SharedCacheMap->OpenCount += 1; // // If there is a mask bcb, then we need to establish a target for // it to flush. // if ((Mbcb = SharedCacheMap->Mbcb) != 0) { // // Set a target of pages to write, assuming that any Active // Vacb will increase the number. // Mbcb->PagesToWrite = Mbcb->DirtyPages + ((ActiveVacb != NULL) ? 1 : 0); if (Mbcb->PagesToWrite > CcPagesYetToWrite) { Mbcb->PagesToWrite = CcPagesYetToWrite; } } ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); // // Now free the active Vacb, if we found one. // if (ActiveVacb != NULL) { CcFreeActiveVacb( SharedCacheMap, ActiveVacb, ActivePage, PageIsDirty ); } // // Now perform the lazy writing for this file via a special call // to CcFlushCache. He recognizes us by the &CcNoDelay input to // FileOffset, which signifies a Lazy Write, but is subsequently // ignored. // CcFlushCache( SharedCacheMap->FileObject->SectionObjectPointer, &CcNoDelay, 1, &IoStatus ); // // No need for the Lazy Write resource now. // (*SharedCacheMap->Callbacks->ReleaseFromLazyWrite) ( SharedCacheMap->LazyWriteContext ); // // Check if we need to put up a popup. // if (!NT_SUCCESS(IoStatus.Status) && !RetryError(IoStatus.Status)) { // // We lost writebehind data. Try to get the filename. If we can't, // then just raise the error returned by the failing write // POBJECT_NAME_INFORMATION FileNameInfo; NTSTATUS QueryStatus; ULONG whocares; FileNameInfo = ExAllocatePool(PagedPool,1024); if ( FileNameInfo ) { QueryStatus = ObQueryNameString( SharedCacheMap->FileObject, FileNameInfo, 1024, &whocares ); if ( !NT_SUCCESS(QueryStatus) ) { ExFreePool(FileNameInfo); FileNameInfo = NULL; } } if ( FileNameInfo ) { IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&FileNameInfo->Name, NULL ); ExFreePool(FileNameInfo); } else { if ( SharedCacheMap->FileObject->FileName.Length && SharedCacheMap->FileObject->FileName.MaximumLength && SharedCacheMap->FileObject->FileName.Buffer ) { IoRaiseInformationalHardError( STATUS_LOST_WRITEBEHIND_DATA,&SharedCacheMap->FileObject->FileName, NULL ); } } // // See if there is any deferred writes we can post. // } else if (!IsListEmpty(&CcDeferredWrites)) { CcPostDeferredWrites(); } // // Now acquire CcMasterSpinLock again to // see if we need to call CcUninitialize before returning. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); // // If the the current ValidDataGoal is greater (or equal) than ValidDataLength, // then we must see if we have advanced beyond the current ValidDataLength. // // If we have NEVER written anything out from this shared cache map, then // there is no need to check anything associtated with valid data length // here. We will come by here again when, and if, anybody actually // modifies the file and we lazy write some data. // Status = STATUS_SUCCESS; if (FlagOn(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED) && (SharedCacheMap->ValidDataGoal.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart) && (SharedCacheMap->ValidDataLength.QuadPart != MAXLONGLONG) && (SharedCacheMap->FileSize.QuadPart != 0)) { LARGE_INTEGER NewValidDataLength = {0,0}; // // If the Bcb List is completely empty, then we must have written // everything, and then new ValidDataLength is equal to ValidDataGoal. // if (SharedCacheMap->DirtyPages == 0) { NewValidDataLength = SharedCacheMap->ValidDataGoal; } // // Else we will look at the last Bcb in the descending-order Bcb // list, and see if it describes data beyond ValidDataGoal. // // (This test is logically too conservative. For example, the last Bcb // may not even be dirty (in which case we should look at its // predecessor), or we may have earlier written valid data to this // byte range (which also means if we knew this we could look at // the predessor). This simply means that the Lazy Writer may not // successfully get ValidDataLength updated in a file being randomly // accessed until the level of file access dies down, or at the latest // until the file is closed. However, security will never be // compromised.) // else { PBCB LastBcb; PMBCB Mbcb = SharedCacheMap->Mbcb; if ((Mbcb != NULL) && (Mbcb->DirtyPages != 0)) { NewValidDataLength.QuadPart = (LONGLONG)Mbcb->FirstDirtyPage << PAGE_SHIFT; } LastBcb = CONTAINING_RECORD( SharedCacheMap->BcbList.Flink, BCB, BcbLinks ); while (&LastBcb->BcbLinks != &SharedCacheMap->BcbList) { if ((LastBcb->NodeTypeCode == CACHE_NTC_BCB) && LastBcb->Dirty) { break; } LastBcb = CONTAINING_RECORD( LastBcb->BcbLinks.Flink, BCB, BcbLinks ); } // // Check the Base of the last entry. // if ((&LastBcb->BcbLinks != &SharedCacheMap->BcbList) && (LastBcb->FileOffset.QuadPart < NewValidDataLength.QuadPart )) { NewValidDataLength = LastBcb->FileOffset; } } // // If New ValidDataLength has been written, then we have to // call the file system back to update it. We must temporarily // drop our global list while we do this, which is safe to do since // we have not cleared WRITE_QUEUED. // // Note we keep calling any time we wrote the last page of the file, // to solve the "famous" AFS Server problem. The file system will // truncate our valid data call to whatever is currently valid. But // then if he writes a little more, we do not want to stop calling // back. // if ( NewValidDataLength.QuadPart >= SharedCacheMap->ValidDataLength.QuadPart ) { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); // // Call file system to set new valid data. We have no // one to tell if this doesn't work. // Status = CcSetValidData( SharedCacheMap->FileObject, &NewValidDataLength ); ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); if (NT_SUCCESS(Status)) { SharedCacheMap->ValidDataLength = NewValidDataLength; #ifdef TOMM } else if ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)) { DbgPrint("Unexpected status from CcSetValidData: %08lx, FileObject: %08lx\n", Status, SharedCacheMap->FileObject); DbgBreakPoint(); #endif TOMM } } } // // Show we are done. // SharedCacheMap->OpenCount -= 1; // // Make an approximate guess about whether we will call CcDeleteSharedCacheMap or not // to truncate the file. If we fail to acquire here, then we will not delete below, // and just catch it on a subsequent pass. // if (FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED) && (SharedCacheMap->OpenCount == 0)) { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); FsRtlAcquireFileExclusive( SharedCacheMap->FileObject ); FileExclusive = TRUE; ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); } // // Otherwise see if we are to delete this SharedCacheMap. Note // we go ahead and release the Resource first, because with // OpenCount == 0 and an empty Bcb list, no one will be trying // to access this SharedCacheMap but us. Also, by releasing first // we avoid a deadlock with the file system when the FileObject is // dereferenced. Note that CcDeleteSharedCacheMap requires that // the CcMasterSpinLock already be acquired, and it // releases it. We have to clear the indirect pointer in this // case, because no one else will do it. // // Also do not delete the SharedCacheMap if we got an error on // the ValidDataLength callback. If we get a resource allocation // failure or a retryable error (due to log file full?), we have // no one to tell, so we must just loop back and try again. Of // course all I/O errors are just too bad. // if ((SharedCacheMap->OpenCount == 0) && ((SharedCacheMap->DirtyPages == 0) || ((SharedCacheMap->FileSize.QuadPart == 0) && !FlagOn(SharedCacheMap->Flags, PIN_ACCESS))) && (FileExclusive || !FlagOn(SharedCacheMap->Flags, TRUNCATE_REQUIRED)) && (NT_SUCCESS(Status) || ((Status != STATUS_INSUFFICIENT_RESOURCES) && !RetryError(Status)))) { CcDeleteSharedCacheMap( SharedCacheMap, OldIrql, FileExclusive ); } // // In the normal case, we just release the resource on the way out. // else { // // Now release the file if we have it. // if (FileExclusive) { ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); FsRtlReleaseFile( SharedCacheMap->FileObject ); ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); } ClearFlag(SharedCacheMap->Flags, WRITE_QUEUED); ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } DebugTrace(-1, me, "CcWriteBehind->VOID\n", 0 ); return IoStatus.Status; } VOID CcFlushCache ( IN PSECTION_OBJECT_POINTERS SectionObjectPointer, IN PLARGE_INTEGER FileOffset OPTIONAL, IN ULONG Length, OUT PIO_STATUS_BLOCK IoStatus OPTIONAL ) /*++ Routine Description: This routine may be called to flush dirty data from the cache to the cached file on disk. Any byte range within the file may be flushed, or the entire file may be flushed by omitting the FileOffset parameter. This routine does not take a Wait parameter; the caller should assume that it will always block. Arguments: SectionObjectPointer - A pointer to the Section Object Pointers structure in the nonpaged Fcb. FileOffset - If this parameter is supplied (not NULL), then only the byte range specified by FileOffset and Length are flushed. If &CcNoDelay is specified, then this signifies the call from the Lazy Writer, and the lazy write scan should resume as normal from the last spot where it left off in the file. Length - Defines the length of the byte range to flush, starting at FileOffset. This parameter is ignored if FileOffset is specified as NULL. IoStatus - The I/O status resulting from the flush operation. Return Value: None. --*/ { LARGE_INTEGER NextFileOffset, TargetOffset; ULONG NextLength; PBCB FirstBcb; KIRQL OldIrql; PSHARED_CACHE_MAP SharedCacheMap; IO_STATUS_BLOCK TrashStatus; PVOID TempVa; ULONG RemainingLength, TempLength; NTSTATUS PopupStatus; BOOLEAN HotSpot; ULONG BytesWritten = 0; BOOLEAN PopupRequired = FALSE; BOOLEAN VerifyRequired = FALSE; BOOLEAN IsLazyWriter = FALSE; BOOLEAN FreeActiveVacb = FALSE; PVACB ActiveVacb = NULL; NTSTATUS Status = STATUS_SUCCESS; DebugTrace(+1, me, "CcFlushCache:\n", 0 ); DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart : 0, ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart : 0 ); DebugTrace( 0, me, " Length = %08lx\n", Length ); // // If IoStatus passed a Null pointer, set up to through status away. // if (!ARGUMENT_PRESENT(IoStatus)) { IoStatus = &TrashStatus; } IoStatus->Status = STATUS_SUCCESS; IoStatus->Information = 0; // // See if this is the Lazy Writer. Since he wants to use this common // routine, which is also a public routine callable by file systems, // the Lazy Writer shows his call by specifying CcNoDelay as the file offset! // // Also, in case we do not write anything because we see only HotSpot(s), // initialize the Status to indicate a retryable error, so CcWorkerThread // knows we did not make any progress. Of course any actual flush will // overwrite this code. // if (FileOffset == &CcNoDelay) { IoStatus->Status = STATUS_VERIFY_REQUIRED; IsLazyWriter = TRUE; FileOffset = NULL; } // // If there is nothing to do, return here. // if (ARGUMENT_PRESENT(FileOffset) && (Length == 0)) { DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 ); return; } // // See if the file is cached. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); SharedCacheMap = SectionObjectPointer->SharedCacheMap; if (SharedCacheMap != NULL) { // // Increment the open count to keep it from going away. // SharedCacheMap->OpenCount += 1; if ((SharedCacheMap->NeedToZero != NULL) || (SharedCacheMap->ActiveVacb != NULL)) { ULONG FirstPage = 0; ULONG LastPage = MAXULONG; if (ARGUMENT_PRESENT(FileOffset)) { FirstPage = (ULONG)(FileOffset->QuadPart >> PAGE_SHIFT); LastPage = (ULONG)((FileOffset->QuadPart + Length - 1) >> PAGE_SHIFT); } // // Make sure we do not flush the active page without zeroing any // uninitialized data. Also, it is very important to free the active // page if it is the one to be flushed, so that we get the dirty // bit out to the Pfn. // if (((((LONGLONG)LastPage + 1) << PAGE_SHIFT) > SharedCacheMap->ValidDataGoal.QuadPart) || ((SharedCacheMap->NeedToZero != NULL) && (FirstPage <= SharedCacheMap->NeedToZeroPage) && (LastPage >= SharedCacheMap->NeedToZeroPage)) || ((SharedCacheMap->ActiveVacb != NULL) && (FirstPage <= SharedCacheMap->ActivePage) && (LastPage >= SharedCacheMap->ActivePage))) { GetActiveVacbAtDpcLevel( SharedCacheMap, ActiveVacb, RemainingLength, TempLength ); FreeActiveVacb = TRUE; } } } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); if (FreeActiveVacb) { CcFreeActiveVacb( SharedCacheMap, ActiveVacb, RemainingLength, TempLength ); } // // Scan for dirty pages if there is a shared cache map. // if (SharedCacheMap != NULL) { // // If FileOffset was not specified then set to flush entire region // and set valid data length to the goal so that we will not get // any more call backs. // if (!IsLazyWriter && !ARGUMENT_PRESENT(FileOffset)) { SharedCacheMap->ValidDataLength = SharedCacheMap->ValidDataGoal; } // // If this is an explicit flush, initialize our offset to scan for. // if (ARGUMENT_PRESENT(FileOffset)) { TargetOffset = *FileOffset; } // // Assume we want to pass the explicit flush flag in Length. // But overwrite it if a length really was specified. On // subsequent loops, NextLength will have some nonzero value. // NextLength = 1; if (Length != 0) { NextLength = Length; } // // Loop as long as we find buffers to flush for this // SharedCacheMap, and we are not trying to delete the guy. // while (((SharedCacheMap->PagesToWrite != 0) || !IsLazyWriter) && ((SharedCacheMap->FileSize.QuadPart != 0) || FlagOn(SharedCacheMap->Flags, PIN_ACCESS)) && !VerifyRequired && CcAcquireByteRangeForWrite ( SharedCacheMap, IsLazyWriter ? NULL : (ARGUMENT_PRESENT(FileOffset) ? &TargetOffset : NULL), IsLazyWriter ? 0: NextLength, &NextFileOffset, &NextLength, &FirstBcb )) { // // Assume this range is not a hot spot. // HotSpot = FALSE; // // We defer calling Mm to set address range modified until here, to take // overhead out of the main line path, and to reduce the number of TBIS // on a multiprocessor. // RemainingLength = NextLength; do { // // See if the next file offset is mapped. (If not, the dirty bit // was propagated on the unmap.) // if ((TempVa = CcGetVirtualAddressIfMapped( SharedCacheMap, NextFileOffset.QuadPart + NextLength - RemainingLength, &ActiveVacb, &TempLength)) != NULL) { // // Reduce TempLength to RemainingLength if necessary, and // call MM. // if (TempLength > RemainingLength) { TempLength = RemainingLength; } // // Clear the Dirty bit (if set) in the PTE and set the // Pfn modified. Assume if the Pte was dirty, that this may // be a hot spot. Do not do hot spots for metadata, and unless // they are within ValidDataLength as reported to the file system // via CcSetValidData. // HotSpot = (BOOLEAN)((MmSetAddressRangeModified(TempVa, TempLength) || HotSpot) && ((NextFileOffset.QuadPart + NextLength) < (SharedCacheMap->ValidDataLength.QuadPart)) && ((SharedCacheMap->LazyWritePassCount & 0xF) != 0) && IsLazyWriter) && !FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED); CcFreeVirtualAddress( ActiveVacb ); } else { // // Reduce TempLength to RemainingLength if necessary. // if (TempLength > RemainingLength) { TempLength = RemainingLength; } } // // Reduce RemainingLength by what we processed. // RemainingLength -= TempLength; // // Loop until done. // } while (RemainingLength != 0); CcLazyWriteHotSpots += HotSpot; // // Now flush, now flush if we do not think it is a hot spot. // if (!HotSpot) { MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, &NextFileOffset, NextLength, IoStatus, !IsLazyWriter ); if (NT_SUCCESS(IoStatus->Status)) { ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED); ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); // // Increment performance counters // if (IsLazyWriter) { CcLazyWriteIos += 1; CcLazyWritePages += (NextLength + PAGE_SIZE - 1) >> PAGE_SHIFT; } } else { LARGE_INTEGER Offset = NextFileOffset; ULONG RetryLength = NextLength; DebugTrace2( 0, 0, "I/O Error on Cache Flush: %08lx, %08lx\n", IoStatus->Status, IoStatus->Information ); if (RetryError(IoStatus->Status)) { VerifyRequired = TRUE; // // Loop to write each page individually, starting with one // more try on the page that got the error, in case that page // or any page beyond it can be successfully written // individually. Note that Offset and RetryLength are // guaranteed to be in integral pages, but the Information // field from the failed request is not. // // We ignore errors now, and give it one last shot, before // setting the pages clean (see below). // } else { do { DebugTrace2( 0, 0, "Trying page at offset %08lx, %08lx\n", Offset.LowPart, Offset.HighPart ); MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer, &Offset, PAGE_SIZE, IoStatus, !IsLazyWriter ); DebugTrace2( 0, 0, "I/O status = %08lx, %08lx\n", IoStatus->Status, IoStatus->Information ); if (NT_SUCCESS(IoStatus->Status)) { ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); SetFlag(SharedCacheMap->Flags, LAZY_WRITE_OCCURRED); ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); } if ((!NT_SUCCESS(IoStatus->Status)) && !RetryError(IoStatus->Status)) { PopupRequired = TRUE; PopupStatus = IoStatus->Status; } VerifyRequired = VerifyRequired || RetryError(IoStatus->Status); Offset.QuadPart = Offset.QuadPart + (LONGLONG)PAGE_SIZE; RetryLength -= PAGE_SIZE; } while(RetryLength > 0); } } } // // Now release the Bcb resources and set them clean. Note we do not check // here for errors, and just returned in the I/O status. Errors on writes // are rare to begin with. Nonetheless, our strategy is to rely on // one or more of the following (depending on the file system) to prevent // errors from getting to us. // // - Retries and/or other forms of error recovery in the disk driver // - Mirroring driver // - Hot fixing in the noncached path of the file system // // In the unexpected case that a write error does get through, we // *currently* just set the Bcbs clean anyway, rather than let // Bcbs and pages accumulate which cannot be written. Note we did // a popup above to at least notify the guy. // // Set the pages dirty again if we either saw a HotSpot or got // verify required. // CcReleaseByteRangeFromWrite ( SharedCacheMap, &NextFileOffset, NextLength, FirstBcb, (BOOLEAN)(HotSpot || VerifyRequired) ); // // See if there is any deferred writes we should post. // BytesWritten += NextLength; if ((BytesWritten >= 0x40000) && !IsListEmpty(&CcDeferredWrites)) { CcPostDeferredWrites(); BytesWritten = 0; } // // Now for explicit flushes, we should advance our range. // if (ARGUMENT_PRESENT(FileOffset)) { NextFileOffset.QuadPart += NextLength; // // Done yet? // if ((FileOffset->QuadPart + Length) <= NextFileOffset.QuadPart) { break; } // // Calculate new target range // NextLength = (ULONG)((FileOffset->QuadPart + Length) - NextFileOffset.QuadPart); TargetOffset = NextFileOffset; } } } // // If there is a user-mapped file, then we perform the "service" of // flushing even data not written via the file system. To do this // we simply reissue the original flush, sigh. // if ((SharedCacheMap == NULL) || FlagOn(((PFSRTL_COMMON_FCB_HEADER)(SharedCacheMap->FileObject->FsContext))->Flags, FSRTL_FLAG_USER_MAPPED_FILE) && !IsLazyWriter) { // // Call MM to flush the section through our view. // DebugTrace( 0, mm, "MmFlushSection:\n", 0 ); DebugTrace( 0, mm, " SectionObjectPointer = %08lx\n", SectionObjectPointer ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", ARGUMENT_PRESENT(FileOffset) ? FileOffset->LowPart : 0, ARGUMENT_PRESENT(FileOffset) ? FileOffset->HighPart : 0 ); DebugTrace( 0, mm, " RegionSize = %08lx\n", Length ); try { Status = MmFlushSection( SectionObjectPointer, FileOffset, Length, IoStatus, TRUE ); } except( CcExceptionFilter( IoStatus->Status = GetExceptionCode() )) { KdPrint(("CACHE MANAGER: MmFlushSection raised %08lx\n", IoStatus->Status)); } DebugTrace2(0, mm, " Status, IoStatus->Information ); } // // Now we can get rid of the open count, and clean up as required. // if (SharedCacheMap != NULL) { // // Serialize again to decrement the open count. // ExAcquireSpinLock( &CcMasterSpinLock, &OldIrql ); SharedCacheMap->OpenCount -= 1; if ((SharedCacheMap->OpenCount == 0) && !FlagOn(SharedCacheMap->Flags, WRITE_QUEUED) && (SharedCacheMap->DirtyPages == 0)) { // // Move to the dirty list. // RemoveEntryList( &SharedCacheMap->SharedCacheMapLinks ); InsertTailList( &CcDirtySharedCacheMapList.SharedCacheMapLinks, &SharedCacheMap->SharedCacheMapLinks ); // // Make sure the Lazy Writer will wake up, because we // want him to delete this SharedCacheMap. // LazyWriter.OtherWork = TRUE; if (!LazyWriter.ScanActive) { CcScheduleLazyWriteScan(); } } ExReleaseSpinLock( &CcMasterSpinLock, OldIrql ); } // // Make sure and return the first error to our caller. In the // case of the Lazy Writer, a popup will be issued. // if (PopupRequired) { IoStatus->Status = PopupStatus; } // // Let the Lazy writer know if we did anything, so he can DebugTrace(-1, me, "CcFlushCache -> VOID\n", 0 ); return; } VOID CcRepinBcb ( IN PVOID Bcb ) /*++ Routine Description: This routine may be called by a file system to pin a Bcb an additional time in order to reserve it for Write Through or error recovery. Typically the file system would do this the first time that it sets a pinned buffer dirty while processing a WriteThrough request, or any time that it determines that a buffer will be required for WriteThrough. The call to this routine must be followed by a call to CcUnpinRepinnedBcb. CcUnpinRepinnedBcb should normally be called during request completion after all other resources have been released. CcUnpinRepinnedBcb synchronously writes the buffer (for WriteThrough requests) and performs the matching unpin for this call. Arguments: Bcb - Supplies a pointer to a previously pinned Bcb Return Value: None. --*/ { KIRQL OldIrql; ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); ((PBCB)Bcb)->PinCount += 1; ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); } VOID CcUnpinRepinnedBcb ( IN PVOID Bcb, IN BOOLEAN WriteThrough, OUT PIO_STATUS_BLOCK IoStatus ) /*++ Routine Description: This routine may be called to Write a previously pinned buffer through to the file. It must have been preceded by a call to CcRepinBcb. As this routine must acquire the Bcb resource exclusive, the caller must be extremely careful to avoid deadlocks. Ideally the caller owns no resources at all when it calls this routine, or else the caller should guarantee that it has nothing else pinned in this same file. (The latter rule is the one used to avoid deadlocks in calls from CcCopyWrite and CcMdlWrite.) Arguments: Bcb - Pointer to a Bcb which was previously specified in a call to CcRepinBcb. WriteThrough - TRUE if the Bcb should be written through. IoStatus - Returns the I/O status for the operation. Return Value: None. --*/ { PSHARED_CACHE_MAP SharedCacheMap = ((PBCB)Bcb)->SharedCacheMap; DebugTrace(+1, me, "CcUnpinRepinnedBcb\n", 0 ); DebugTrace( 0, me, " Bcb = %08lx\n", Bcb ); DebugTrace( 0, me, " WriteThrough = %02lx\n", WriteThrough ); // // Set status to success for non write through case. // IoStatus->Status = STATUS_SUCCESS; if (WriteThrough) { // // Acquire Bcb exclusive to eliminate possible modifiers of the buffer, // since we are about to write its buffer. // if (FlagOn(SharedCacheMap->Flags, MODIFIED_WRITE_DISABLED)) { ExAcquireResourceExclusive( &((PBCB)Bcb)->Resource, TRUE ); } // // Now, there is a chance that the LazyWriter has already written // it, since the resource was free. We will only write it if it // is still dirty. // if (((PBCB)Bcb)->Dirty) { // // First we make sure that the dirty bit in the PFN database is set. // ASSERT( ((PBCB)Bcb)->BaseAddress != NULL ); MmSetAddressRangeModified( ((PBCB)Bcb)->BaseAddress, ((PBCB)Bcb)->ByteLength ); // // Now release the Bcb resource and set it clean. Note we do not check // here for errors, and just return the I/O status. Errors on writes // are rare to begin with. Nonetheless, our strategy is to rely on // one or more of the following (depending on the file system) to prevent // errors from getting to us. // // - Retries and/or other forms of error recovery in the disk driver // - Mirroring driver // - Hot fixing in the noncached path of the file system // // In the unexpected case that a write error does get through, we // report it to our caller, but go ahead and set the Bcb clean. There // seems to be no point in letting Bcbs (and pages in physical memory) // accumulate which can never go away because we get an unrecoverable I/O // error. // // // We specify TRUE here for ReadOnly so that we will keep the // resource during the flush. // CcUnpinFileData( (PBCB)Bcb, TRUE, SET_CLEAN ); // // Write it out. // MmFlushSection( ((PBCB)Bcb)->SharedCacheMap->FileObject->SectionObjectPointer, &((PBCB)Bcb)->FileOffset, ((PBCB)Bcb)->ByteLength, IoStatus, TRUE ); // // If we got verify required, we have to mark the buffer dirty again // so we will try again later. // if (RetryError(IoStatus->Status)) { CcSetDirtyPinnedData( (PBCB)Bcb, NULL ); } // // Now remove the final pin count now that we have set it clean. // CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN ); // // See if there is any deferred writes we can post. // if (!IsListEmpty(&CcDeferredWrites)) { CcPostDeferredWrites(); } } else { // // Lazy Writer got there first, just free the resource and unpin. // CcUnpinFileData( (PBCB)Bcb, FALSE, UNPIN ); } DebugTrace2(0, me, " Status, IoStatus->Information ); } // // Non-WriteThrough case // else { CcUnpinFileData( (PBCB)Bcb, TRUE, UNPIN ); // // Set status to success for non write through case. // IoStatus->Status = STATUS_SUCCESS; } DebugTrace(-1, me, "CcUnpinRepinnedBcb -> VOID\n", 0 ); } // // Internal Support Routine // BOOLEAN CcFindBcb ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN OUT PLARGE_INTEGER BeyondLastByte, OUT PBCB *Bcb ) /*++ Routine Description: This routine is called to find a Bcb describing the specified byte range of a file. It returns TRUE if it could at least find a Bcb which describes the beginning of the specified byte range, or else FALSE if the first part of the byte range is not present. In the latter case, the requested byte range (TrialLength) is truncated if there is currently a Bcb which describes bytes beyond the beginning of the byte range. The caller may see if the entire byte range is being returned by examining the Bcb, and the caller (or caller's caller) may then make subsequent calls if the data is not all returned. The BcbList SpinLock must be currently acquired. Arguments: SharedCacheMap - Supplies a pointer to the SharedCacheMap for the file in which the byte range is desired. FileOffset - Supplies the file offset for the beginning of the desired byte range. BeyondLastByte - Supplies the file offset of the ending of the desired byte range + 1. Note that this offset will be truncated on return if the Bcb was not found, but bytes beyond the beginning of the Bcb are contained in another Bcb. Bcb - returns a Bcb describing the beginning of the byte range if also returning TRUE, or else the point in the Bcb list to insert after. Return Value: FALSE - if no Bcb describes the beginning of the desired byte range TRUE - if a Bcb is being returned describing at least an initial part of the byte range. --*/ { PLIST_ENTRY BcbList; PBCB Bcbt; BOOLEAN Found = FALSE; DebugTrace(+1, me, "CcFindBcb:\n", 0 ); DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); DebugTrace2(0, me, " TrialLength = %08lx, %08lx\n", TrialLength->LowPart, TrialLength->HighPart ); // // We want to terminate scans by testing the NodeTypeCode field from the // BcbLinks, so we want to see the SharedCacheMap signature from the same // offset. // ASSERT(FIELD_OFFSET(SHARED_CACHE_MAP, BcbList) == FIELD_OFFSET(BCB, BcbLinks)); // // Similarly, when we hit one of the BcbListHeads in the array, small negative // offsets are all structure pointers, so we are counting on the Bcb signature // to have some non-Ulong address bits set. // ASSERT((CACHE_NTC_BCB & 3) != 0); // // Get address of Bcb listhead that is *after* the Bcb we are looking for, // for backwards scan. // BcbList = &SharedCacheMap->BcbList; if ((FileOffset->QuadPart + SIZE_PER_BCB_LIST) < SharedCacheMap->SectionSize.QuadPart) { BcbList = GetBcbListHead( SharedCacheMap, FileOffset->QuadPart + SIZE_PER_BCB_LIST ); } // // Search for an entry that overlaps the specified range, or until we hit // a listhead. // Bcbt = CONTAINING_RECORD(BcbList->Flink, BCB, BcbLinks); // // First see if we really have to do Large arithmetic or not, and // then use either a 32-bit loop or a 64-bit loop to search for // the Bcb. // if (FileOffset->HighPart == 0) { // // 32-bit - loop until we get back to a listhead. // while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) { // // Since the Bcb list is in descending order, we first check // if we are completely beyond the current entry, and if so // get out. // if (FileOffset->LowPart >= Bcbt->BeyondLastByte.LowPart) { break; } // // Next check if the first byte we are looking for is // contained in the current Bcb. If so, we either have // a partial hit and must truncate to the exact amount // we have found, or we may have a complete hit. In // either case we break with Found == TRUE. // if (FileOffset->LowPart >= Bcbt->FileOffset.LowPart) { Found = TRUE; break; } // // Now we know we must loop back and keep looking, but we // still must check for the case where the tail end of the // bytes we are looking for are described by the current // Bcb. If so we must truncate what we are looking for, // because this routine is only supposed to return bytes // from the start of the desired range. // if (BeyondLastByte->LowPart >= Bcbt->FileOffset.LowPart) { BeyondLastByte->LowPart = Bcbt->FileOffset.LowPart; } // // Advance to next entry in list (which is possibly back to // the listhead) and loop back. // Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink, BCB, BcbLinks ); } } else { // // 64-bit - Loop until we get back to a listhead. // while (Bcbt->NodeTypeCode == CACHE_NTC_BCB) { // // Since the Bcb list is in descending order, we first check // if we are completely beyond the current entry, and if so // get out. // if (FileOffset->QuadPart >= Bcbt->BeyondLastByte.QuadPart) { break; } // // Next check if the first byte we are looking for is // contained in the current Bcb. If so, we either have // a partial hit and must truncate to the exact amount // we have found, or we may have a complete hit. In // either case we break with Found == TRUE. // if (FileOffset->QuadPart >= Bcbt->FileOffset.QuadPart) { Found = TRUE; break; } // // Now we know we must loop back and keep looking, but we // still must check for the case where the tail end of the // bytes we are looking for are described by the current // Bcb. If so we must truncate what we are looking for, // because this routine is only supposed to return bytes // from the start of the desired range. // if (BeyondLastByte->QuadPart >= Bcbt->FileOffset.QuadPart) { BeyondLastByte->QuadPart = Bcbt->FileOffset.QuadPart; } // // Advance to next entry in list (which is possibly back to // the listhead) and loop back. // Bcbt = CONTAINING_RECORD( Bcbt->BcbLinks.Flink, BCB, BcbLinks ); } } *Bcb = Bcbt; DebugTrace2(0, me, " LowPart, TrialLength->HighPart ); DebugTrace( 0, me, " %02lx\n", Found ); return Found; } // // Internal Support Routine // PBCB CcAllocateInitializeBcb ( IN OUT PSHARED_CACHE_MAP SharedCacheMap OPTIONAL, IN OUT PBCB AfterBcb, IN PLARGE_INTEGER FileOffset, IN PLARGE_INTEGER TrialLength ) /*++ Routine Description: This routine allocates and initializes a Bcb to describe the specified byte range, and inserts it into the Bcb List of the specified Shared Cache Map. The Bcb List spin lock must currently be acquired. CcMasterSpinLock must be acquired on entry. Arguments: SharedCacheMap - Supplies the SharedCacheMap for the new Bcb. AfterBcb - Supplies where in the descending-order BcbList the new Bcb should be inserted: either the ListHead (masquerading as a Bcb) or a Bcb. FileOffset - Supplies File Offset for the desired data. TrialLength - Supplies length of desired data. Return Value: Address of the allocated and initialized Bcb --*/ { PBCB Bcb; CSHORT NodeIsInZone; ULONG RoundedBcbSize = (sizeof(BCB) + 7) & ~7; // // Loop until we have a new Work Queue Entry // while (TRUE) { PVOID Segment; ULONG SegmentSize; Bcb = ExAllocateFromZone( &LazyWriter.BcbZone ); if (Bcb != NULL) { NodeIsInZone = 1; break; } // // Allocation failure - on large systems, extend zone // if ( MmQuerySystemSize() == MmLargeSystem ) { SegmentSize = sizeof(ZONE_SEGMENT_HEADER) + RoundedBcbSize * 32; if ((Segment = ExAllocatePool( NonPagedPool, SegmentSize)) == NULL) { return NULL; } if (!NT_SUCCESS(ExExtendZone( &LazyWriter.BcbZone, Segment, SegmentSize ))) { CcBugCheck( 0, 0, 0 ); } } else { if ((Bcb = ExAllocatePool( NonPagedPool, sizeof(BCB))) == NULL) { return NULL; } NodeIsInZone = 0; break; } } // // Initialize the newly allocated Bcb. First zero it, then fill in // nonzero fields. // RtlZeroMemory( Bcb, RoundedBcbSize ); Bcb->NodeIsInZone = NodeIsInZone; // // For Mbcb's, SharedCacheMap is NULL, and the rest of this initialization // is not desired. // if (SharedCacheMap != NULL) { Bcb->NodeTypeCode = CACHE_NTC_BCB; Bcb->FileOffset = *FileOffset; Bcb->ByteLength = TrialLength->LowPart; Bcb->BeyondLastByte.QuadPart = FileOffset->QuadPart + TrialLength->QuadPart; Bcb->PinCount += 1; ExInitializeResource( &Bcb->Resource ); Bcb->SharedCacheMap = SharedCacheMap; // // Now insert the Bcb in the Bcb List // InsertTailList( &AfterBcb->BcbLinks, &Bcb->BcbLinks ); // // If this resource was no write behind, let Ex know that the // resource will never be acquired exclusive. Also disable // boost (I know this is useless, but KenR said I had to do it). // if (SharedCacheMap && FlagOn(SharedCacheMap->Flags, DISABLE_WRITE_BEHIND)) { #if DBG SetFlag(Bcb->Resource.Flag, ResourceNeverExclusive); #endif ExDisableResourceBoost( &Bcb->Resource ); } } return Bcb; } // // Internal support routine // VOID FASTCALL CcDeallocateBcb ( IN PBCB Bcb ) /*++ Routine Description: This routine deallocates a Bcb to the BcbZone. It must already be removed from the BcbList. CcMasterSpinLock must be acquired on entry. Arguments: Bcb - the Bcb to deallocate Return Value: None --*/ { // // Deallocate Resource structures // if (Bcb->NodeTypeCode == CACHE_NTC_BCB) { ExDeleteResource( &Bcb->Resource ); } if ( Bcb->NodeIsInZone ) { // // Synchronize access to the BcbZone // ExFreeToZone( &LazyWriter.BcbZone, Bcb ); } else { ExFreePool(Bcb); } return; } // // Internal Support Routine // BOOLEAN CcMapAndRead( IN PSHARED_CACHE_MAP SharedCacheMap, IN PLARGE_INTEGER FileOffset, IN ULONG Length, IN ULONG ZeroFlags, IN BOOLEAN Wait, OUT PVACB *Vacb, OUT PVOID *BaseAddress ) /*++ Routine Description: This routine may be called to insure that the specified data is mapped, read into memory and locked. If TRUE is returned, then the correct I/O status for the transfer is also returned, along with a system-space address for the data. Arguments: SharedCacheMap - Supplies the address of the SharedCacheMap for the data. FileOffset - Supplies the file offset of the desired data. Length - Supplies the total amount of data desired. ZeroFlags - Defines which pages may be zeroed if not resident. Wait - Supplies FALSE if the caller is not willing to block for the data, or TRUE if the caller is willing to block. Vacb - Returns the address of the Vacb which is mapping the enclosing virtual address range. BaseAddress - Returns the system base address at which the data may be accessed. Return Value: FALSE - if the caller supplied Wait = FALSE and the data could not be returned without blocking. TRUE - if the data is being returned. Note: this routine may raise an exception due to a map or read failure, however, this can only happen if Wait was specified as TRUE, since mapping and reading will not be performed if the caller cannot wait. --*/ { ULONG ReceivedLength; ULONG ZeroCase; ULONG SavedState; BOOLEAN Result = FALSE; PETHREAD Thread = PsGetCurrentThread(); DebugTrace(+1, me, "CcMapAndRead:\n", 0 ); DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " Length = %08lx\n", Length ); *BaseAddress = NULL; *Vacb = NULL; *BaseAddress = CcGetVirtualAddress( SharedCacheMap, *FileOffset, Vacb, &ReceivedLength ); ASSERT( ReceivedLength >= Length ); MmSavePageFaultReadAhead( Thread, &SavedState ); // // try around everything for cleanup. // try { PVOID CacheBuffer; ULONG PagesToGo; // // If we got more than we need, make sure to only use // the right amount. // if (ReceivedLength > Length) { ReceivedLength = Length; } // // Now loop to touch all of the pages, calling MM to insure // that if we fault, we take in exactly the number of pages // we need. // CacheBuffer = *BaseAddress; PagesToGo = COMPUTE_PAGES_SPANNED( CacheBuffer, ReceivedLength ); // // Loop to touch or zero the pages. // ZeroCase = ZERO_FIRST_PAGE; while (PagesToGo) { // // If we cannot zero this page, or Mm failed to return // a zeroed page, then just fault it in. // MmSetPageFaultReadAhead( Thread, (PagesToGo - 1) ); if (!FlagOn(ZeroFlags, ZeroCase) || !MmCheckCachedPageState(CacheBuffer, TRUE)) { // // If we get here, it is almost certainly due to the fact // that we can not take a zero page. MmCheckCachedPageState // will so rarely return FALSE, that we will not worry // about it. We will only check if the page is there if // Wait is FALSE, so that we can do the right thing. // if (!MmCheckCachedPageState(CacheBuffer, FALSE) && !Wait) { try_return( Result = FALSE ); } } CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; PagesToGo -= 1; if (PagesToGo == 1) { ZeroCase = ZERO_LAST_PAGE; } else { ZeroCase = ZERO_MIDDLE_PAGES; } } try_return( Result = TRUE ); try_exit: NOTHING; } // // Cleanup on the way out. // finally { MmResetPageFaultReadAhead(Thread, SavedState); // // If not successful, cleanup on the way out. Most of the errors // can only occur as the result of an abnormal termination after // successfully checking and locking the pages. // if (Result == FALSE) { CcFreeVirtualAddress( *Vacb ); *Vacb = NULL; *BaseAddress = NULL; } } DebugTrace( 0, me, " %02lx\n", Result ); return Result; } // // Internal Support Routine // VOID CcFreeActiveVacb ( IN PSHARED_CACHE_MAP SharedCacheMap, IN PVACB ActiveVacb OPTIONAL, IN ULONG ActivePage, IN ULONG PageIsDirty ) /*++ Routine Description: This routine may be called to zero the end of a locked page or free the ActiveVacb for a Shared Cache Map, if there is one. Note that some callers are not synchronized with foreground activity, and may therefore not have an ActiveVacb. Examples of unsynchronized callers are CcZeroEndOfLastPage (which is called by MM) and any flushing done by CcWriteBehind. Arguments: SharedCacheMap - SharedCacheMap to examine for page to be zeroed. ActiveVacb - Vacb to free ActivePage - Page that was used PageIsDirty - ACTIVE_PAGE_IS_DIRTY if the active page is dirty Return Value: None --*/ { LARGE_INTEGER ActiveOffset; PVOID ActiveAddress; ULONG BytesLeftInPage; KIRQL OldIrql; // // If the page was locked, then unlock it. // if (SharedCacheMap->NeedToZero != NULL) { // // Zero the rest of the page under spinlock control, // and then clear the address field. This field makes // zero->nonzero transitions only when the file is exclusive, // but it can make nonzero->zero transitions any time the // spinlock is not held. // ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); // // The address could already be gone. // ActiveAddress = SharedCacheMap->NeedToZero; if (ActiveAddress != NULL) { BytesLeftInPage = PAGE_SIZE - ((((ULONG)ActiveAddress - 1) & (PAGE_SIZE - 1)) + 1); RtlZeroBytes( ActiveAddress, BytesLeftInPage ); SharedCacheMap->NeedToZero = NULL; } ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); // // Now call MM to unlock the address. Note we will never store the // address at the start of the page, but we can sometimes store // the start of the next page when we have exactly filled the page. // if (ActiveAddress != NULL) { MmUnlockCachedPage( (PVOID)((PCHAR)ActiveAddress - 1) ); } } // // See if caller actually has an ActiveVacb // if (ActiveVacb != NULL) { // // See if the page is dirty // if (PageIsDirty) { ActiveOffset.QuadPart = (LONGLONG)ActivePage << PAGE_SHIFT; ActiveAddress = (PVOID)((PCHAR)ActiveVacb->BaseAddress + (ActiveOffset.LowPart & (VACB_MAPPING_GRANULARITY - 1))); // // Tell the Lazy Writer to write the page. // CcSetDirtyInMask( SharedCacheMap, &ActiveOffset, PAGE_SIZE ); // // Now we need to clear the flag and decrement some counts if there is // no other active Vacb which snuck in. // ExAcquireFastLock( &CcMasterSpinLock, &OldIrql ); ExAcquireSpinLockAtDpcLevel( &SharedCacheMap->ActiveVacbSpinLock ); if ((SharedCacheMap->ActiveVacb == NULL) && FlagOn(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY)) { ClearFlag(SharedCacheMap->Flags, ACTIVE_PAGE_IS_DIRTY); SharedCacheMap->DirtyPages -= 1; CcTotalDirtyPages -= 1; } ExReleaseSpinLockFromDpcLevel( &SharedCacheMap->ActiveVacbSpinLock ); ExReleaseFastLock( &CcMasterSpinLock, OldIrql ); } // // Now free the Vacb. // CcFreeVirtualAddress( ActiveVacb ); } } // // Internal Support Routine // VOID CcMapAndCopy( IN PSHARED_CACHE_MAP SharedCacheMap, IN PVOID UserBuffer, IN PLARGE_INTEGER FileOffset, IN ULONG Length, IN ULONG ZeroFlags, IN BOOLEAN WriteThrough ) /*++ Routine Description: This routine may be called to copy the specified user data to the cache via a special Mm routine which copies the data to uninitialized pages and returns. Arguments: SharedCacheMap - Supplies the address of the SharedCacheMap for the data. UserBuffer - unsafe buffer supplying the user's data to be written FileOffset - Supplies the file offset to be modified Length - Supplies the total amount of data ZeroFlags - Defines which pages may be zeroed if not resident. WriteThrough - Supplies whether the data is to be written through or not Return Value: None --*/ { ULONG ReceivedLength; ULONG ZeroCase; PVOID CacheBuffer; PVOID SavedMappedBuffer; ULONG SavedMappedLength; ULONG ActivePage; KIRQL OldIrql; LARGE_INTEGER PFileOffset; IO_STATUS_BLOCK IoStatus; NTSTATUS Status; ULONG SavedState; BOOLEAN MorePages; ULONG SavedTotalLength = Length; LARGE_INTEGER LocalOffset = *FileOffset; ULONG PageOffset = FileOffset->LowPart & (PAGE_SIZE - 1); PVACB Vacb = NULL; PETHREAD Thread = PsGetCurrentThread(); // // Initialize SavePage to TRUE to skip the finally clause on zero-length // writes. // BOOLEAN SavePage = TRUE; DebugTrace(+1, me, "CcMapAndCopy:\n", 0 ); DebugTrace( 0, me, " SharedCacheMap = %08lx\n", SharedCacheMap ); DebugTrace2(0, me, " FileOffset = %08lx, %08lx\n", FileOffset->LowPart, FileOffset->HighPart ); DebugTrace( 0, me, " Length = %08lx\n", Length ); MmSavePageFaultReadAhead( Thread, &SavedState ); // // try around everything for cleanup. // try { while (Length != 0) { CacheBuffer = CcGetVirtualAddress( SharedCacheMap, LocalOffset, &Vacb, &ReceivedLength ); // // If we got more than we need, make sure to only use // the right amount. // if (ReceivedLength > Length) { ReceivedLength = Length; } SavedMappedBuffer = CacheBuffer; SavedMappedLength = ReceivedLength; Length -= ReceivedLength; // // Now loop to touch all of the pages, calling MM to insure // that if we fault, we take in exactly the number of pages // we need. // CacheBuffer = (PVOID)((PCHAR)CacheBuffer - PageOffset); ReceivedLength += PageOffset; // // Loop to touch or zero the pages. // ZeroCase = ZERO_FIRST_PAGE; // // Set up offset to page for use below. // PFileOffset = LocalOffset; PFileOffset.LowPart -= PageOffset; while (TRUE) { // // Calculate whether we wish to save an active page // or not. // SavePage = ((Length == 0) && (ReceivedLength < PAGE_SIZE) && (SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough && (SharedCacheMap->FileObject->SectionObjectPointer->ImageSectionObject == NULL) && (SharedCacheMap->Mbcb != NULL) && ((ULONG)((ULONGLONG)PFileOffset.QuadPart >> PAGE_SHIFT) < (SharedCacheMap->Mbcb->Bitmap.SizeOfBitMap - 1))); MorePages = (ReceivedLength > PAGE_SIZE); // // Copy the data to the user buffer. // try { // // It is possible that there is a locked page // hanging around, and so we need to nuke it here. // if (SharedCacheMap->NeedToZero != NULL) { CcFreeActiveVacb( SharedCacheMap, NULL, 0, 0 ); } Status = STATUS_SUCCESS; if (FlagOn(ZeroFlags, ZeroCase)) { Status = MmCopyToCachedPage( CacheBuffer, UserBuffer, PageOffset, MorePages ? (PAGE_SIZE - PageOffset) : (ReceivedLength - PageOffset), SavePage ); if (!NT_SUCCESS(Status)) { ExRaiseStatus( FsRtlNormalizeNtstatus( Status, STATUS_INVALID_USER_BUFFER )); } // // Otherwise, we have to actually copy the data ourselves. // } else { MmSetPageFaultReadAhead( Thread, (MorePages && FlagOn(ZeroFlags, ZERO_LAST_PAGE)) ? 1 : 0); RtlCopyBytes( (PVOID)((PCHAR)CacheBuffer + PageOffset), UserBuffer, MorePages ? (PAGE_SIZE - PageOffset) : (ReceivedLength - PageOffset) ); MmResetPageFaultReadAhead( Thread, SavedState ); } } except( CcCopyReadExceptionFilter( GetExceptionInformation(), &Status ) ) { // // If we got an access violation, then the user buffer went // away. Otherwise we must have gotten an I/O error trying // to bring the data in. // if (Status == STATUS_ACCESS_VIOLATION) { ExRaiseStatus( STATUS_INVALID_USER_BUFFER ); } else { ExRaiseStatus( FsRtlNormalizeNtstatus( Status, STATUS_UNEXPECTED_IO_ERROR )); } } // // Now get out quickly if it is a small write and we want // to save the page. // if (SavePage) { ActivePage = (ULONG)( (ULONGLONG)Vacb->Overlay.FileOffset.QuadPart >> PAGE_SHIFT ) + (((PCHAR)CacheBuffer - (PCHAR)Vacb->BaseAddress) >> PAGE_SHIFT); PFileOffset.LowPart += ReceivedLength; // // If the cache page was not locked, then clear the address // to zero from. // if (Status == STATUS_CACHE_PAGE_LOCKED) { ExAcquireFastLock( &SharedCacheMap->ActiveVacbSpinLock, &OldIrql ); ASSERT(SharedCacheMap->NeedToZero == NULL); SharedCacheMap->NeedToZero = (PVOID)((PCHAR)CacheBuffer + (PFileOffset.LowPart & (PAGE_SIZE - 1))); SharedCacheMap->NeedToZeroPage = ActivePage; ExReleaseFastLock( &SharedCacheMap->ActiveVacbSpinLock, OldIrql ); } SetActiveVacb( SharedCacheMap, OldIrql, Vacb, ActivePage, ACTIVE_PAGE_IS_DIRTY ); try_return( NOTHING ); } // // If it looks like we may save a page and exit on the next loop, // then we must make sure to mark the current page dirty. Note // that Cc[Fast]CopyWrite will finish the last part of any page // before allowing us to free the Active Vacb above, therefore // this case only occurs for a small random write. // if ((SavedTotalLength <= (PAGE_SIZE / 2)) && !WriteThrough) { CcSetDirtyInMask( SharedCacheMap, &PFileOffset, ReceivedLength ); } UserBuffer = (PVOID)((PCHAR)UserBuffer + (PAGE_SIZE - PageOffset)); PageOffset = 0; // // If there is more than a page to go (including what we just // copied), then adjust our buffer pointer and counts, and // determine if we are to the last page yet. // if (MorePages) { CacheBuffer = (PCHAR)CacheBuffer + PAGE_SIZE; ReceivedLength -= PAGE_SIZE; // // Update our offset to the page. Note that 32-bit // add is ok since we cannot cross a Vacb boundary // and we reinitialize this offset before entering // this loop again. // PFileOffset.LowPart += PAGE_SIZE; if (ReceivedLength > PAGE_SIZE) { ZeroCase = ZERO_MIDDLE_PAGES; } else { ZeroCase = ZERO_LAST_PAGE; } } else { break; } } // // If there is still more to write (ie. we are going to step // onto the next vacb) AND we just dirtied more than 64K, then // do a vicarious MmFlushSection here. This prevents us from // creating unlimited dirty pages while holding the file // resource exclusive. We also do not need to set the pages // dirty in the mask in this case. // if (Length > CcMaxDirtyWrite) { MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength ); MmFlushSection( SharedCacheMap->FileObject->SectionObjectPointer, &LocalOffset, SavedMappedLength, &IoStatus, TRUE ); if (!NT_SUCCESS(IoStatus.Status)) { ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status, STATUS_UNEXPECTED_IO_ERROR )); } // // For write through files, call Mm to propagate the dirty bits // here while we have the view mapped, so we know the flush will // work below. Again - do not set dirty in the mask. // } else if (WriteThrough) { MmSetAddressRangeModified( SavedMappedBuffer, SavedMappedLength ); // // For the normal case, just set the pages dirty for the Lazy Writer // now. // } else { CcSetDirtyInMask( SharedCacheMap, &LocalOffset, SavedMappedLength ); } CcFreeVirtualAddress( Vacb ); Vacb = NULL; // // If we have to loop back to get at least a page, it will be ok to // zero the first page. If we are not getting at least a page, we // must make sure we clear the ZeroFlags if we cannot zero the last // page. // if (Length >= PAGE_SIZE) { ZeroFlags |= ZERO_FIRST_PAGE; } else if ((ZeroFlags & ZERO_LAST_PAGE) == 0) { ZeroFlags = 0; } // // Note that if ReceivedLength (and therefore SavedMappedLength) // was truncated to the transfer size then the new LocalOffset // computed below is not correct. This is not an issue since // in that case (Length == 0) and we would never get here. // LocalOffset.QuadPart = LocalOffset.QuadPart + (LONGLONG)SavedMappedLength; } try_exit: NOTHING; } // // Cleanup on the way out. // finally { MmResetPageFaultReadAhead( Thread, SavedState ); // // We have no work to do if we have squirreled away the Vacb. // if (!SavePage || AbnormalTermination()) { // // Make sure we do not leave anything mapped or dirty in the PTE // on the way out. // if (Vacb != NULL) { CcFreeVirtualAddress( Vacb ); } // // Either flush the whole range because of write through, or // mark it dirty for the lazy writer. // if (WriteThrough) { MmFlushSection ( SharedCacheMap->FileObject->SectionObjectPointer, FileOffset, SavedTotalLength, &IoStatus, TRUE ); if (!NT_SUCCESS(IoStatus.Status)) { ExRaiseStatus( FsRtlNormalizeNtstatus( IoStatus.Status, STATUS_UNEXPECTED_IO_ERROR )); } // // Advance ValidDataGoal // LocalOffset.QuadPart = FileOffset->QuadPart + (LONGLONG)SavedTotalLength; if (LocalOffset.QuadPart > SharedCacheMap->ValidDataGoal.QuadPart) { SharedCacheMap->ValidDataGoal = LocalOffset; } } } } DebugTrace(-1, me, "CcMapAndCopy -> %02lx\n", Result ); return; } #ifdef CCDBG VOID CcDump ( IN PVOID Ptr ) { PVOID Junk = Ptr; } #endif