summaryrefslogtreecommitdiffstats
path: root/private/ntos/tdi/tcpip/tcp/tcprcv.c
diff options
context:
space:
mode:
Diffstat (limited to '')
-rw-r--r--private/ntos/tdi/tcpip/tcp/tcprcv.c3397
1 files changed, 3397 insertions, 0 deletions
diff --git a/private/ntos/tdi/tcpip/tcp/tcprcv.c b/private/ntos/tdi/tcpip/tcp/tcprcv.c
new file mode 100644
index 000000000..46698a2b0
--- /dev/null
+++ b/private/ntos/tdi/tcpip/tcp/tcprcv.c
@@ -0,0 +1,3397 @@
+/********************************************************************/
+/** Microsoft LAN Manager **/
+/** Copyright(c) Microsoft Corp., 1990-1993 **/
+/********************************************************************/
+/* :ts=4 */
+
+//** TCPRCV.C - TCP receive protocol code.
+//
+// This file contains the code for handling incoming TCP packets.
+//
+
+#include "oscfg.h"
+#include "ndis.h"
+#include "cxport.h"
+#include "ip.h"
+#include "tdi.h"
+#ifdef VXD
+#include "tdivxd.h"
+#include "tdistat.h"
+#endif
+#ifdef NT
+#include "tdint.h"
+#include "tdistat.h"
+#endif
+#include "queue.h"
+#include "addr.h"
+#include "tcp.h"
+#include "tcb.h"
+#include "tcpconn.h"
+#include "tcpsend.h"
+#include "tcprcv.h"
+#include "tcpdeliv.h"
+#include "tlcommon.h"
+#include "info.h"
+#include "tcpcfg.h"
+#include "secfltr.h"
+
+uint RequestCompleteFlags;
+
+Queue ConnRequestCompleteQ;
+Queue SendCompleteQ;
+
+Queue TCBDelayQ;
+
+#ifdef SYN_ATTACK
+DEFINE_LOCK_STRUCTURE(SynAttLock)
+#endif
+DEFINE_LOCK_STRUCTURE(RequestCompleteLock)
+DEFINE_LOCK_STRUCTURE(TCBDelayLock)
+
+ulong TCBDelayRtnCount;
+ulong TCBDelayRtnLimit;
+#define TCB_DELAY_RTN_LIMIT 4
+
+EXTERNAL_LOCK(TCBTableLock)
+EXTERNAL_LOCK(AddrObjTableLock)
+EXTERNAL_LOCK(ConnTableLock)
+
+extern IPInfo LocalNetInfo;
+
+#define PERSIST_TIMEOUT MS_TO_TICKS(500)
+
+
+void ResetSendNext(TCB *SeqTCB, SeqNum NewSeq);
+
+#if FAST_RETRANSMIT
+extern uint MaxDupAcks;
+void ResetAndFastSend(TCB *SeqTCB, SeqNum NewSeq);
+#endif
+
+
+#ifdef NT
+
+NTSTATUS
+TCPPrepareIrpForCancel(
+ PTCP_CONTEXT TcpContext,
+ PIRP Irp,
+ PDRIVER_CANCEL CancelRoutine
+ );
+
+extern void
+TCPRequestComplete(
+ void *Context,
+ unsigned int Status,
+ unsigned int UnUsed
+ );
+
+VOID
+TCPCancelRequest(
+ PDEVICE_OBJECT Device,
+ PIRP Irp
+ );
+
+//
+// All of the init code can be discarded.
+//
+#ifdef ALLOC_PRAGMA
+
+int InitTCPRcv(void);
+void UnInitTCPRcv(void);
+
+#pragma alloc_text(INIT, InitTCPRcv)
+#pragma alloc_text(INIT, UnInitTCPRcv)
+
+#endif // ALLOC_PRAGMA
+
+#ifdef RASAUTODIAL
+extern BOOLEAN fAcdLoadedG;
+#endif
+
+#endif // NT
+
+//* AdjustRcvWin - Adjust the receive window on a TCB.
+//
+// A utility routine that adjusts the receive window to an even multiple of
+// the local segment size. We round it up to the next closest multiple, or
+// leave it alone if it's already an event multiple. We assume we have
+// exclusive access to the input TCB.
+//
+// Input: WinTCB - TCB to be adjusted.
+//
+// Returns: Nothing.
+//
+void
+AdjustRcvWin(TCB *WinTCB)
+{
+ ushort LocalMSS;
+ uchar FoundMSS;
+ ulong SegmentsInWindow;
+
+ CTEAssert(WinTCB->tcb_defaultwin != 0);
+ CTEAssert(WinTCB->tcb_rcvwin != 0);
+ CTEAssert(WinTCB->tcb_remmss != 0);
+
+ if (WinTCB->tcb_flags & WINDOW_SET)
+ return;
+
+ // First, get the local MSS by calling IP.
+
+ FoundMSS = (*LocalNetInfo.ipi_getlocalmtu)(WinTCB->tcb_saddr, &LocalMSS);
+
+ // If we didn't find it, error out.
+ if (!FoundMSS) {
+ CTEAssert(FALSE);
+ return;
+ }
+
+ LocalMSS -= sizeof(TCPHeader);
+ LocalMSS = MIN(LocalMSS, WinTCB->tcb_remmss);
+
+ SegmentsInWindow = WinTCB->tcb_defaultwin / (ulong)LocalMSS;
+
+ // Make sure we have at least 4 segments in window, if that wouldn't make
+ // the window too big.
+ if (SegmentsInWindow < 4) {
+
+ // We have fewer than four segments in the window. Round up to 4
+ // if we can do so without exceeding the maximum window size; otherwise
+ // use the maximum multiple that we can fit in 64K. The exception is if
+ // we can only fit one integral multiple in the window - in that case
+ // we'll use a window of 0xffff.
+ if (LocalMSS <= (0xffff/4)) {
+ WinTCB->tcb_defaultwin = (uint)(4 * LocalMSS);
+ } else {
+ ulong SegmentsInMaxWindow;
+
+ // Figure out the maximum number of segments we could possibly
+ // fit in a window. If this is > 1, use that as the basis for
+ // our window size. Otherwise use a maximum size window.
+
+ SegmentsInMaxWindow = 0xffff/(ulong)LocalMSS;
+ if (SegmentsInMaxWindow != 1)
+ WinTCB->tcb_defaultwin = SegmentsInMaxWindow * (ulong)LocalMSS;
+ else
+ WinTCB->tcb_defaultwin = 0xffff;
+ }
+
+ WinTCB->tcb_rcvwin = WinTCB->tcb_defaultwin;
+
+ } else
+ // If it's not already an even multiple, bump the default and current
+ // windows to the nearest multiple.
+ if ((SegmentsInWindow * (ulong)LocalMSS) != WinTCB->tcb_defaultwin) {
+ ulong NewWindow;
+
+ NewWindow = (SegmentsInWindow + 1) * (ulong)LocalMSS;
+
+ // Don't let the new window be > 64K.
+ if (NewWindow <= 0xffff) {
+ WinTCB->tcb_defaultwin = (uint)NewWindow;
+ WinTCB->tcb_rcvwin = (uint)NewWindow;
+ }
+ }
+
+}
+
+//* CompleteRcvs - Complete rcvs on a TCB.
+//
+// Called when we need to complete rcvs on a TCB. We'll pull things from
+// the TCB's rcv queue, as long as there are rcvs that have the PUSH bit
+// set.
+//
+// Input: CmpltTCB - TCB to complete on.
+//
+// Returns: Nothing.
+//
+void
+CompleteRcvs(TCB *CmpltTCB)
+{
+ CTELockHandle TCBHandle;
+ TCPRcvReq *CurrReq, *NextReq, *IndReq;
+
+ CTEStructAssert(CmpltTCB, tcb);
+ CTEAssert(CmpltTCB->tcb_refcnt != 0);
+
+ CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
+
+ if (!CLOSING(CmpltTCB) && !(CmpltTCB->tcb_flags & RCV_CMPLTING)
+ && (CmpltTCB->tcb_rcvhead != NULL)) {
+
+ CmpltTCB->tcb_flags |= RCV_CMPLTING;
+
+ for (;;) {
+
+ CurrReq = CmpltTCB->tcb_rcvhead;
+ IndReq = NULL;
+ do {
+ CTEStructAssert(CurrReq, trr);
+
+ if (CurrReq->trr_flags & TRR_PUSHED) {
+ // Need to complete this one. If this is the current rcv
+ // advance the current rcv to the next one in the list.
+ // Then set the list head to the next one in the list.
+
+ CTEAssert(CurrReq->trr_amt != 0 ||
+ !DATA_RCV_STATE(CmpltTCB->tcb_state));
+
+ NextReq = CurrReq->trr_next;
+ if (CmpltTCB->tcb_currcv == CurrReq)
+ CmpltTCB->tcb_currcv = NextReq;
+
+ CmpltTCB->tcb_rcvhead = NextReq;
+
+ if (NextReq == NULL) {
+ // We've just removed the last buffer. Set the
+ // rcvhandler to PendData, in case something
+ // comes in during the callback.
+ CTEAssert(CmpltTCB->tcb_rcvhndlr != IndicateData);
+ CmpltTCB->tcb_rcvhndlr = PendData;
+ }
+
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ if (CurrReq->trr_uflags != NULL)
+ *(CurrReq->trr_uflags) =
+ TDI_RECEIVE_NORMAL | TDI_RECEIVE_ENTIRE_MESSAGE;
+
+ (*CurrReq->trr_rtn)(CurrReq->trr_context, TDI_SUCCESS,
+ CurrReq->trr_amt);
+ if (IndReq != NULL)
+ FreeRcvReq(CurrReq);
+ else
+ IndReq = CurrReq;
+ CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
+ CurrReq = CmpltTCB->tcb_rcvhead;
+
+ } else
+ // This one isn't to be completed, so bail out.
+ break;
+ } while (CurrReq != NULL);
+
+ // Now see if we've completed all of the requests. If we have, we
+ // may need to deal with pending data and/or reset the rcv. handler.
+ if (CurrReq == NULL) {
+ // We've completed everything that can be, so stop the push
+ // timer. We don't stop it if CurrReq isn't NULL because we
+ // want to make sure later data is eventually pushed.
+ STOP_TCB_TIMER(CmpltTCB->tcb_pushtimer);
+
+ CTEAssert(IndReq != NULL);
+ // No more recv. requests.
+ if (CmpltTCB->tcb_pendhead == NULL) {
+ FreeRcvReq(IndReq);
+ // No pending data. Set the rcv. handler to either PendData
+ // or IndicateData.
+ if (!(CmpltTCB->tcb_flags & (DISC_PENDING | GC_PENDING))) {
+ if (CmpltTCB->tcb_rcvind != NULL &&
+ CmpltTCB->tcb_indicated == 0)
+ CmpltTCB->tcb_rcvhndlr = IndicateData;
+ else
+ CmpltTCB->tcb_rcvhndlr = PendData;
+ } else {
+ goto Complete_Notify;
+ }
+
+ } else {
+ // We have pending data to deal with.
+ if (CmpltTCB->tcb_rcvind != NULL &&
+ CmpltTCB->tcb_indicated == 0) {
+ // There's a rcv. indicate handler on this TCB. Call
+ // the indicate handler with the pending data.
+#ifdef VXD
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ IndicatePendingData(CmpltTCB, IndReq);
+ SendACK(CmpltTCB);
+ CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
+#else
+ IndicatePendingData(CmpltTCB, IndReq, TCBHandle);
+ SendACK(CmpltTCB);
+ CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
+#endif
+ // See if a buffer has been posted. If so, we'll need
+ // to check and see if it needs to be completed.
+ if (CmpltTCB->tcb_rcvhead != NULL)
+ continue;
+ else {
+ // If the pending head is now NULL, we've used up
+ // all the data.
+ if (CmpltTCB->tcb_pendhead == NULL &&
+ (CmpltTCB->tcb_flags &
+ (DISC_PENDING | GC_PENDING)))
+ goto Complete_Notify;
+ }
+
+ } else {
+ // No indicate handler, so nothing to do. The rcv.
+ // handler should already be set to PendData.
+ FreeRcvReq(IndReq);
+ CTEAssert(CmpltTCB->tcb_rcvhndlr == PendData);
+ }
+ }
+ } else {
+ if (IndReq != NULL)
+ FreeRcvReq(IndReq);
+ CTEAssert(CmpltTCB->tcb_rcvhndlr == BufferData);
+ }
+
+ break;
+ }
+ CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
+ }
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ return;
+
+Complete_Notify:
+ // Something is pending. Figure out what it is, and do
+ // it.
+ if (CmpltTCB->tcb_flags & GC_PENDING) {
+ CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
+ // Bump the refcnt, because GracefulClose will
+ // deref the TCB and we're not really done with
+ // it yet.
+ CmpltTCB->tcb_refcnt++;
+ GracefulClose(CmpltTCB,
+ CmpltTCB->tcb_flags & TW_PENDING, TRUE,
+ TCBHandle);
+
+ } else
+ if (CmpltTCB->tcb_flags & DISC_PENDING) {
+ CmpltTCB->tcb_flags &= ~DISC_PENDING;
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ NotifyOfDisc(CmpltTCB, NULL, TDI_GRACEFUL_DISC);
+
+ CTEGetLock(&CmpltTCB->tcb_lock, &TCBHandle);
+ CmpltTCB->tcb_flags &= ~RCV_CMPLTING;
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ } else {
+ CTEAssert(FALSE);
+ CTEFreeLock(&CmpltTCB->tcb_lock, TCBHandle);
+ }
+
+ return;
+
+}
+
+//* ProcessTCBDelayQ - Process TCBs on the delayed Q.
+//
+// Called at various times to process TCBs on the delayed Q.
+//
+// Entry: Nothing.
+//
+// Returns: Nothing.
+//
+void
+ProcessTCBDelayQ(void)
+{
+ CTELockHandle QHandle;
+ TCB *DelayTCB;
+ CTELockHandle TCBHandle;
+
+ CTEGetLock(&TCBDelayLock, &QHandle);
+
+ // Check for recursion. We do not stop recursion completely, only
+ // limit it. This is done to allow multiple threads to process the
+ // TCBDelayQ simultaneously.
+
+ TCBDelayRtnCount++;
+ if (TCBDelayRtnCount > TCBDelayRtnLimit) {
+ TCBDelayRtnCount--;
+ CTEFreeLock(&TCBDelayLock, QHandle);
+ return;
+ }
+
+ while (!EMPTYQ(&TCBDelayQ)) {
+
+ DEQUEUE(&TCBDelayQ, DelayTCB, TCB, tcb_delayq);
+ CTEStructAssert(DelayTCB, tcb);
+ CTEAssert(DelayTCB->tcb_refcnt != 0);
+ CTEAssert(DelayTCB->tcb_flags & IN_DELAY_Q);
+ CTEFreeLock(&TCBDelayLock, QHandle);
+
+ CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
+
+ while (!CLOSING(DelayTCB) && (DelayTCB->tcb_flags & DELAYED_FLAGS)) {
+
+ if (DelayTCB->tcb_flags & NEED_RCV_CMPLT) {
+ DelayTCB->tcb_flags &= ~NEED_RCV_CMPLT;
+ CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
+ CompleteRcvs(DelayTCB);
+ CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
+ }
+
+ if (DelayTCB->tcb_flags & NEED_OUTPUT) {
+ DelayTCB->tcb_flags &= ~NEED_OUTPUT;
+ DelayTCB->tcb_refcnt++;
+#ifdef VXD
+ CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
+ TCPSend(DelayTCB);
+#else
+ TCPSend(DelayTCB, TCBHandle);
+#endif
+ CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
+ }
+
+ if (DelayTCB->tcb_flags & NEED_ACK) {
+ DelayTCB->tcb_flags &= ~NEED_ACK;
+ CTEFreeLock(&DelayTCB->tcb_lock, TCBHandle);
+ SendACK(DelayTCB);
+ CTEGetLock(&DelayTCB->tcb_lock, &TCBHandle);
+ }
+
+ }
+
+ DelayTCB->tcb_flags &= ~IN_DELAY_Q;
+ DerefTCB(DelayTCB, TCBHandle);
+ CTEGetLock(&TCBDelayLock, &QHandle);
+
+ }
+
+ TCBDelayRtnCount--;
+ CTEFreeLock(&TCBDelayLock, QHandle);
+
+}
+
+//* DelayAction - Put a TCB on the queue for a delayed action.
+//
+// Called when we want to put a TCB on the DelayQ for a delayed action at
+// rcv. complete or some other time. The lock on the TCB must be held when
+// this is called.
+//
+// Input: DelayTCB - TCB which we're going to sched.
+// Action - Action we're scheduling.
+//
+// Returns: Nothing.
+//
+void
+DelayAction(TCB *DelayTCB, uint Action)
+{
+ CTELockHandle DQHandle;
+
+ // Schedule the completion.
+ CTEGetLockAtDPC(&TCBDelayLock, &DQHandle);
+ DelayTCB->tcb_flags |= Action;
+ if (!(DelayTCB->tcb_flags & IN_DELAY_Q)) {
+ DelayTCB->tcb_flags |= IN_DELAY_Q;
+ DelayTCB->tcb_refcnt++; // Reference this for later.
+ ENQUEUE(&TCBDelayQ, &DelayTCB->tcb_delayq);
+ }
+ CTEFreeLockFromDPC(&TCBDelayLock, DQHandle);
+
+}
+
+//* TCPRcvComplete - Handle a receive complete.
+//
+// Called by the lower layers when we're done receiving. We look to see if
+// we have and pending requests to complete. If we do, we complete them. Then
+// we look to see if we have any TCBs pending for output. If we do, we
+// get them going.
+//
+// Input: Nothing.
+//
+// Returns: Nothing.
+//
+void
+TCPRcvComplete(void)
+{
+ CTELockHandle CompleteHandle;
+ TCPReq *Req;
+
+ if (RequestCompleteFlags & ANY_REQUEST_COMPLETE) {
+ CTEGetLock(&RequestCompleteLock, &CompleteHandle);
+ if (!(RequestCompleteFlags & IN_RCV_COMPLETE)) {
+ RequestCompleteFlags |= IN_RCV_COMPLETE;
+ do {
+ if (RequestCompleteFlags & CONN_REQUEST_COMPLETE) {
+ if (!EMPTYQ(&ConnRequestCompleteQ)) {
+ DEQUEUE(&ConnRequestCompleteQ, Req, TCPReq, tr_q);
+ CTEStructAssert(Req, tr);
+ CTEStructAssert(*(TCPConnReq **)&Req, tcr);
+
+ CTEFreeLock(&RequestCompleteLock, CompleteHandle);
+ (*Req->tr_rtn)(Req->tr_context, Req->tr_status, 0);
+ FreeConnReq((TCPConnReq *)Req);
+ CTEGetLock(&RequestCompleteLock, &CompleteHandle);
+
+ } else
+ RequestCompleteFlags &= ~CONN_REQUEST_COMPLETE;
+ }
+
+ if (RequestCompleteFlags & SEND_REQUEST_COMPLETE) {
+ if (!EMPTYQ(&SendCompleteQ)) {
+ TCPSendReq *SendReq;
+
+ DEQUEUE(&SendCompleteQ, Req, TCPReq, tr_q);
+ CTEStructAssert(Req, tr);
+ SendReq = (TCPSendReq *)Req;
+ CTEStructAssert(SendReq, tsr);
+
+ CTEFreeLock(&RequestCompleteLock, CompleteHandle);
+ (*Req->tr_rtn)(Req->tr_context, Req->tr_status,
+ Req->tr_status == TDI_SUCCESS ? SendReq->tsr_size
+ : 0);
+ FreeSendReq((TCPSendReq *)Req);
+ CTEGetLock(&RequestCompleteLock, &CompleteHandle);
+
+ } else
+ RequestCompleteFlags &= ~SEND_REQUEST_COMPLETE;
+ }
+
+ } while (RequestCompleteFlags & ANY_REQUEST_COMPLETE);
+
+ RequestCompleteFlags &= ~IN_RCV_COMPLETE;
+ }
+ CTEFreeLock(&RequestCompleteLock, CompleteHandle);
+ }
+
+ ProcessTCBDelayQ();
+
+}
+
+//* CompleteConnReq - Complete a connection request on a TCB.
+//
+// A utility function to complete a connection request on a TCB. We remove
+// the connreq, and put it on the ConnReqCmpltQ where it will be picked
+// off later during RcvCmplt processing. We assume the TCB lock is held when
+// we're called.
+//
+// Input: CmpltTCB - TCB from which to complete.
+// OptInfo - IP OptInfo for completeion.
+// Status - Status to complete with.
+//
+// Returns: Nothing.
+//
+void
+CompleteConnReq(TCB *CmpltTCB, IPOptInfo *OptInfo, TDI_STATUS Status)
+{
+ TCPConnReq *ConnReq;
+ CTELockHandle QueueHandle;
+
+ CTEStructAssert(CmpltTCB, tcb);
+
+ ConnReq = CmpltTCB->tcb_connreq;
+ if (ConnReq != NULL) {
+
+ // There's a connreq on this TCB. Fill in the connection information
+ // before returning it.
+
+ CmpltTCB->tcb_connreq = NULL;
+ UpdateConnInfo(ConnReq->tcr_conninfo, OptInfo, CmpltTCB->tcb_daddr,
+ CmpltTCB->tcb_dport);
+
+ ConnReq->tcr_req.tr_status = Status;
+ CTEGetLockAtDPC(&RequestCompleteLock, &QueueHandle);
+ RequestCompleteFlags |= CONN_REQUEST_COMPLETE;
+ ENQUEUE(&ConnRequestCompleteQ, &ConnReq->tcr_req.tr_q);
+ CTEFreeLockFromDPC(&RequestCompleteLock, QueueHandle);
+ } else
+ DEBUGCHK;
+
+}
+
+
+#ifdef SYN_ATTACK
+void
+SynAttChk ( AddrObj *ListenAO )
+//
+// function to check whether certain thresholds relevant to containing a
+// SYN attack are being crossed.
+//
+// This function is called from FindListenConn when a connection has been
+// found to handle the SYN request
+//
+{
+ BOOLEAN RexmitCntChanged = FALSE;
+ CTELockHandle Handle;
+
+ CTEGetLockAtDPC(&SynAttLock, &Handle);
+
+ //
+ // We are putting a connection in the syn_rcvd state. Check
+ // if we have reached the threshold. If we have reduce the
+ // number of retries to a lower value.
+ //
+ if ((++TCPHalfOpen >= TCPMaxHalfOpen) && (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
+ if (TCPHalfOpenRetried >= TCPMaxHalfOpenRetried) {
+ MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
+ RexmitCntChanged = TRUE;
+ }
+ }
+
+ //
+ // if this connection limit for a port was reached earlier.
+ // Check if the lower watermark is getting hit now.
+ //
+
+ if (ListenAO->ConnLimitReached)
+ {
+ ListenAO->ConnLimitReached = FALSE;
+ if (!RexmitCntChanged && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
+
+ CTEAssert(TCPPortsExhausted > 0);
+ //
+ // The fact that FindListenConn found a connection on the port
+ // indicates that we had a connection available. This port
+ // was therefore not exhausted of connections. Set state
+ // appropriately. If the port has no more connections now,
+ // it will get added to the Exhausted count next time a syn for
+ // the port comes along.
+ //
+ if (--TCPPortsExhausted <= TCPMaxPortsExhaustedLW) {
+ MaxConnectResponseRexmitCountTmp =
+ MAX_CONNECT_RESPONSE_REXMIT_CNT;
+ }
+ }
+ }
+
+ CTEFreeLockFromDPC(&SynAttLock, Handle);
+ return;
+}
+#endif
+
+
+//* FindListenConn - Find (or fabricate) a listening connection.
+//
+// Called by our Receive handler to decide what to do about an incoming
+// SYN. We walk down the list of connections associated with the destination
+// address, and if we find any in the listening state that can be used for
+// the incoming request we'll take them, possibly returning a listen in the
+// process. If we don't find any appropriate listening connections, we'll
+// call the Connect Event handler if one is registerd. If all else fails,
+// we'll return NULL and the SYN will be RST.
+//
+// The caller must hold the AddrObjTableLock before calling this routine,
+// and that lock must have been taken at DPC level. This routine will free
+// that lock back to DPC level.
+//
+// Input: ListenAO - Pointer to AddrObj for local address.
+// Src - Source IP address of SYN.
+// SrcPort - Source port of SYN.
+// OptInfo - IP options info from SYN.
+//
+// Returns: Pointer to found TCB, or NULL if we can't find one.
+//
+TCB *
+FindListenConn(AddrObj *ListenAO, IPAddr Src, ushort SrcPort, IPOptInfo *OptInfo)
+{
+ CTELockHandle Handle; // Lock handle on AO, TCB.
+ TCB *CurrentTCB = NULL;
+ TCPConn *CurrentConn = NULL;
+ TCPConnReq *ConnReq = NULL;
+ CTELockHandle ConnHandle;
+ Queue *Temp;
+ uint FoundConn = FALSE;
+
+ CTEStructAssert(ListenAO, ao);
+
+ CTEGetLockAtDPC(&ConnTableLock, &ConnHandle);
+ CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
+
+#ifdef NT
+ CTEFreeLockFromDPC(&AddrObjTableLock, DISPATCH_LEVEL);
+#endif
+
+
+ // We have the lock on the AddrObj. Walk down it's list, looking
+ // for connections in the listening state.
+
+ if (AO_VALID(ListenAO)) {
+ if (ListenAO->ao_listencnt != 0) {
+ CTELockHandle TCBHandle;
+
+ Temp = QHEAD(&ListenAO->ao_listenq);
+ while (Temp != QEND(&ListenAO->ao_listenq)) {
+
+ CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
+ CTEStructAssert(CurrentConn, tc);
+
+ // If this TCB is in the listening state, with no delete
+ // pending, it's a candidate. Look at the pending listen
+ // info. to see if we should take it.
+ if ((CurrentTCB = CurrentConn->tc_tcb) != NULL) {
+
+ CTEStructAssert(CurrentTCB, tcb);
+ CTEAssert(CurrentTCB->tcb_state == TCB_LISTEN);
+
+ CTEGetLockAtDPC(&CurrentTCB->tcb_lock, &TCBHandle);
+
+ if (CurrentTCB->tcb_state == TCB_LISTEN &&
+ !PENDING_ACTION(CurrentTCB)) {
+
+ // Need to see if we can take it.
+ // See if the addresses specifed in the ConnReq
+ // match.
+ if ((IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
+ NULL_IP_ADDR) ||
+ IP_ADDR_EQUAL(CurrentTCB->tcb_daddr,
+ Src)) &&
+ (CurrentTCB->tcb_dport == 0 ||
+ CurrentTCB->tcb_dport == SrcPort)) {
+ FoundConn = TRUE;
+ break;
+ }
+
+ // Otherwise, this didn't match, so we'll check the
+ // next one.
+ }
+ CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
+ }
+
+ Temp = QNEXT(Temp);;
+ }
+
+ // See why we've exited the loop.
+ if (FoundConn) {
+ CTEStructAssert(CurrentTCB, tcb);
+
+ // We exited because we found a TCB. If it's pre-accepted,
+ // we're done.
+ CurrentTCB->tcb_refcnt++;
+
+ CTEAssert(CurrentTCB->tcb_connreq != NULL);
+
+ ConnReq = CurrentTCB->tcb_connreq;
+ // If QUERY_ACCEPT isn't set, turn on the CONN_ACCEPTED bit.
+ if (!(ConnReq->tcr_flags & TDI_QUERY_ACCEPT))
+ CurrentTCB->tcb_flags |= CONN_ACCEPTED;
+
+ CurrentTCB->tcb_state = TCB_SYN_RCVD;
+
+ ListenAO->ao_listencnt--;
+
+ // Since he's no longer listening, remove him from the listen
+ // queue and put him on the active queue.
+ REMOVEQ(&CurrentConn->tc_q);
+ ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
+#ifdef SYN_ATTACK
+ if (SynAttackProtect) {
+ SynAttChk(ListenAO);
+ }
+#endif
+
+ CTEFreeLockFromDPC(&CurrentTCB->tcb_lock, TCBHandle);
+ CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
+ CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
+ return CurrentTCB;
+ } else {
+ // Since we have a listening count, this should never happen
+ // if that count was non-zero initially.
+ CTEAssert(FALSE);
+ }
+ }
+
+ // We didn't find a matching TCB. If there's a connect indication
+ // handler, call it now to find a connection to accept on.
+
+ CTEAssert(FoundConn == FALSE);
+
+ if (ListenAO->ao_connect != NULL) {
+ uchar TAddress[TCP_TA_SIZE];
+ PVOID ConnContext;
+ PConnectEvent Event;
+ PVOID EventContext;
+ TDI_STATUS Status;
+ TCB *AcceptTCB;
+ TCPConnReq *ConnReq;
+#ifdef NT
+ ConnectEventInfo *EventInfo;
+#else
+ ConnectEventInfo EventInfo;
+#endif
+
+
+ // He has a connect handler. Put the transport address together,
+ // and call him. We also need to get the necessary resources
+ // first.
+ AcceptTCB = AllocTCB();
+ ConnReq = GetConnReq();
+
+ if (AcceptTCB != NULL && ConnReq != NULL) {
+ Event = ListenAO->ao_connect;
+ EventContext = ListenAO->ao_conncontext;
+
+ BuildTDIAddress(TAddress, Src, SrcPort);
+ REF_AO(ListenAO);
+
+ AcceptTCB->tcb_state = TCB_LISTEN;
+ AcceptTCB->tcb_connreq = ConnReq;
+ AcceptTCB->tcb_flags |= CONN_ACCEPTED;
+
+ CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
+ CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
+
+ IF_TCPDBG(TCP_DEBUG_CONNECT) {
+ TCPTRACE(("indicating connect request\n"));
+ }
+
+ Status = (*Event)(EventContext, TCP_TA_SIZE,
+ (PTRANSPORT_ADDRESS)TAddress, 0, NULL,
+ OptInfo->ioi_optlength, OptInfo->ioi_options,
+ &ConnContext, &EventInfo);
+
+ if (Status == TDI_MORE_PROCESSING) {
+#ifdef NT
+ PIO_STACK_LOCATION IrpSp;
+ PTDI_REQUEST_KERNEL_ACCEPT AcceptRequest;
+
+ IrpSp = IoGetCurrentIrpStackLocation(EventInfo);
+
+ Status = TCPPrepareIrpForCancel(
+ (PTCP_CONTEXT) IrpSp->FileObject->FsContext,
+ EventInfo,
+ TCPCancelRequest
+ );
+
+ if (!NT_SUCCESS(Status)) {
+ Status = TDI_NOT_ACCEPTED;
+ EventInfo = NULL;
+ goto AcceptIrpCancelled;
+ }
+
+#endif // NT
+
+ // He accepted it. Find the connection on the AddrObj.
+ CTEGetLockAtDPC(&ConnTableLock, &ConnHandle);
+ CTEGetLockAtDPC(&ListenAO->ao_lock, &Handle);
+#ifdef NT
+ {
+
+ IF_TCPDBG(TCP_DEBUG_CONNECT) {
+ TCPTRACE((
+ "connect indication accepted, queueing request\n"
+ ));
+ }
+
+ AcceptRequest = (PTDI_REQUEST_KERNEL_ACCEPT)
+ &(IrpSp->Parameters);
+ ConnReq->tcr_conninfo =
+ AcceptRequest->ReturnConnectionInformation;
+ ConnReq->tcr_req.tr_rtn = TCPRequestComplete;
+ ConnReq->tcr_req.tr_context = EventInfo;
+
+ }
+#else // NT
+ ConnReq->tcr_req.tr_rtn = EventInfo.cei_rtn;
+ ConnReq->tcr_req.tr_context = EventInfo.cei_context;
+ ConnReq->tcr_conninfo = EventInfo.cei_conninfo;
+#endif // NT
+ Temp = QHEAD(&ListenAO->ao_idleq);;
+ CurrentTCB = NULL;
+ Status = TDI_INVALID_CONNECTION;
+
+ while (Temp != QEND(&ListenAO->ao_idleq)) {
+
+ CurrentConn = QSTRUCT(TCPConn, Temp, tc_q);
+
+ CTEStructAssert(CurrentConn, tc);
+ if ((CurrentConn->tc_context == ConnContext) &&
+ !(CurrentConn->tc_flags & CONN_INVALID)) {
+
+ // We think we have a match. The connection
+ // shouldn't have a TCB associated with it. If it
+ // does, it's an error. InitTCBFromConn will
+ // handle all this.
+
+ AcceptTCB->tcb_refcnt = 1;
+#ifdef NT
+ Status = InitTCBFromConn(CurrentConn, AcceptTCB,
+ AcceptRequest->RequestConnectionInformation,
+ TRUE);
+#else // NT
+ Status = InitTCBFromConn(CurrentConn, AcceptTCB,
+ EventInfo.cei_acceptinfo,
+ TRUE);
+#endif // NT
+
+ if (Status == TDI_SUCCESS) {
+ FoundConn = TRUE;
+ AcceptTCB->tcb_state = TCB_SYN_RCVD;
+ AcceptTCB->tcb_conn = CurrentConn;
+ CurrentConn->tc_tcb = AcceptTCB;
+ CurrentConn->tc_refcnt++;
+
+ // Move him from the idle q to the active
+ // queue.
+ REMOVEQ(&CurrentConn->tc_q);
+ ENQUEUE(&ListenAO->ao_activeq, &CurrentConn->tc_q);
+ }
+
+ // In any case, we're done now.
+ break;
+
+ }
+ Temp = QNEXT(Temp);
+ }
+
+ if (!FoundConn) {
+ // Didn't find a match, or had an error. Status
+ // code is set.
+ // Complete the ConnReq and free the resources.
+ CompleteConnReq(AcceptTCB, OptInfo, Status);
+ FreeTCB(AcceptTCB);
+ AcceptTCB = NULL;
+ }
+#ifdef SYN_ATTACK
+ else {
+ if (SynAttackProtect) {
+ SynAttChk(ListenAO);
+ }
+ }
+#endif
+
+ LOCKED_DELAY_DEREF_AO(ListenAO);
+ CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
+ CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
+
+ return AcceptTCB;
+ }
+#ifdef SYN_ATTACK
+
+ if (SynAttackProtect) {
+ CTELockHandle Handle;
+
+ //
+ // If we need to Trigger to a lower retry count
+ //
+
+ if (!ListenAO->ConnLimitReached) {
+ ListenAO->ConnLimitReached = TRUE;
+ CTEGetLockAtDPC(&SynAttLock, &Handle);
+ if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
+ (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
+
+ MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
+ }
+ CTEFreeLockFromDPC(&SynAttLock, Handle);
+ }
+ }
+#endif
+
+#ifdef NT
+
+AcceptIrpCancelled:
+
+#endif // NT
+ // The event handler didn't take it. Dereference it, free
+ // the resources, and return NULL.
+ FreeConnReq(ConnReq);
+ FreeTCB(AcceptTCB);
+ DELAY_DEREF_AO(ListenAO);
+ return NULL;
+
+ } else {
+ // We couldn't get a needed resource. Free any that we
+ // did get, and fall through to the 'return NULL' code.
+ if (ConnReq != NULL)
+ FreeConnReq(ConnReq);
+ if (AcceptTCB != NULL)
+ FreeTCB(AcceptTCB);
+ }
+
+ }
+#ifdef SYN_ATTACK
+ else {
+ if (SynAttackProtect) {
+ CTELockHandle Handle;
+
+ //
+ // If we need to Trigger to a lower retry count
+ //
+
+ if (!ListenAO->ConnLimitReached) {
+ ListenAO->ConnLimitReached = TRUE;
+ CTEGetLockAtDPC(&SynAttLock, &Handle);
+ if ((++TCPPortsExhausted >= TCPMaxPortsExhausted) &&
+ (MaxConnectResponseRexmitCountTmp == MAX_CONNECT_RESPONSE_REXMIT_CNT)) {
+
+ MaxConnectResponseRexmitCountTmp = ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT;
+ }
+ CTEFreeLockFromDPC(&SynAttLock, Handle);
+ }
+ }
+ }
+#endif
+
+ // No event handler, or no resource. Free the locks, and return NULL.
+ CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
+ CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
+ return NULL;
+ }
+
+ // If we get here, the address object wasn't valid.
+ CTEFreeLockFromDPC(&ListenAO->ao_lock, Handle);
+ CTEFreeLockFromDPC(&ConnTableLock, ConnHandle);
+ return NULL;
+}
+
+
+//* FindMSS - Find the MSS option in a segment.
+//
+// Called when a SYN is received to find the MSS option in a segment. If we
+// don't find one, we assume the worst and return 536.
+//
+// Input: TCPH - TCP header to be searched.
+//
+// Returns: MSS to be used.
+//
+ushort
+FindMSS(TCPHeader UNALIGNED *TCPH)
+{
+ uint OptSize;
+ uchar *OptPtr;
+
+ OptSize = TCP_HDR_SIZE(TCPH) - sizeof(TCPHeader);
+
+ OptPtr = (uchar *)(TCPH + 1);
+
+ while (OptSize) {
+
+ if (*OptPtr == TCP_OPT_EOL)
+ break;
+
+ if (*OptPtr == TCP_OPT_NOP) {
+ OptPtr++;
+ OptSize--;
+ continue;
+ }
+
+ if (*OptPtr == TCP_OPT_MSS) {
+ if (OptPtr[1] == MSS_OPT_SIZE) {
+ ushort TempMss = *(ushort UNALIGNED *)(OptPtr + 2);
+ if (TempMss != 0)
+ return net_short(TempMss);
+ else
+ break; // MSS size of 0, use default.
+ } else
+ break; // Bad option size, use default.
+ } else {
+ // Unknown option.
+ if (OptPtr[1] == 0 || OptPtr[1] > OptSize)
+ break; // Bad option length, bail out.
+
+ OptSize -= OptPtr[1];
+ OptPtr += OptPtr[1];
+ }
+ }
+
+ return MAX_REMOTE_MSS;
+
+}
+
+//* ACKAndDrop - Acknowledge a segment, and drop it.
+//
+// Called from within the receive code when we need to drop a segment that's
+// outside the receive window.
+//
+// Input: RI - Receive info for incoming segment.
+// RcvTCB - TCB for incoming segment.
+//
+// Returns: Nothing.
+//
+void
+ACKAndDrop(TCPRcvInfo *RI, TCB *RcvTCB)
+{
+ CTELockHandle Handle;
+
+#ifdef VXD
+#ifdef DEBUG
+ Handle = DEFAULT_SIMIRQL;
+#endif
+#else
+ Handle = DISPATCH_LEVEL;
+#endif
+
+ if (!(RI->tri_flags & TCP_FLAG_RST)) {
+
+ if (RcvTCB->tcb_state == TCB_TIME_WAIT)
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer, MAX_REXMIT_TO);
+
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, Handle);
+
+ SendACK(RcvTCB);
+
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &Handle);
+ }
+ DerefTCB(RcvTCB, Handle);
+
+}
+
+//* ACKData - Acknowledge data.
+//
+// Called from the receive handler to acknowledge data. We're given the
+// TCB and the new value of senduna. We walk down the send q. pulling
+// off sends and putting them on the complete q until we hit the end
+// or we acknowledge the specified number of bytes of data.
+//
+// NOTE: We manipulate the send refcnt and acked flag without taking a lock.
+// This is OK in the VxD version where locks don't mean anything anyway, but
+// in the port to NT we'll need to add locking. The lock will have to be
+// taken in the transmit complete routine. We can't use a lock in the TCB,
+// since the TCB could go away before the transmit complete happens, and a lock
+// in the TSR would be overkill, so it's probably best to use a global lock
+// for this. If that causes too much contention, we could use a set of locks
+// and pass a pointer to the appropriate lock back as part of the transmit
+// confirm context. This lock pointer would also need to be stored in the
+// TCB.
+//
+// Input: ACKTcb - TCB from which to pull data.
+// SendUNA - New value of send una.
+//
+// Returns: Nothing.
+//
+void
+ACKData(TCB *ACKTcb, SeqNum SendUNA)
+{
+ Queue *End, *Current; // End and current elements.
+ Queue *TempQ, *EndQ;
+ Queue *LastCmplt; // Last one we completed.
+ TCPSendReq *CurrentTSR; // Current send req we're
+ // looking at.
+ PNDIS_BUFFER CurrentBuffer; // Current NDIS_BUFFER.
+ uint Updated = FALSE;
+ uint BufLength;
+ int Amount, OrigAmount;
+ long Result;
+ CTELockHandle Handle;
+ uint Temp;
+
+ CTEStructAssert(ACKTcb, tcb);
+
+ CheckTCBSends(ACKTcb);
+
+ Amount = SendUNA - ACKTcb->tcb_senduna;
+ CTEAssert(Amount > 0);
+
+ // Do a quick check to see if this acks everything that we have. If it does,
+ // handle it right away. We can only do this in the ESTABLISHED state,
+ // because we blindly update sendnext, and that can only work if we
+ // haven't sent a FIN.
+ if ((Amount == (int) ACKTcb->tcb_unacked) && ACKTcb->tcb_state == TCB_ESTAB) {
+
+ // Everything is acked.
+ CTEAssert(!EMPTYQ(&ACKTcb->tcb_sendq));
+
+ TempQ = ACKTcb->tcb_sendq.q_next;
+
+ INITQ(&ACKTcb->tcb_sendq);
+
+ ACKTcb->tcb_sendnext = SendUNA;
+ ACKTcb->tcb_senduna = SendUNA;
+
+ CTEAssert(ACKTcb->tcb_sendnext == ACKTcb->tcb_sendmax);
+ ACKTcb->tcb_cursend = NULL;
+ ACKTcb->tcb_sendbuf = NULL;
+ ACKTcb->tcb_sendofs = 0;
+ ACKTcb->tcb_sendsize = 0;
+ ACKTcb->tcb_unacked = 0;
+
+ // Now walk down the list of send requests. If the reference count
+ // has gone to 0, put it on the send complete queue.
+ CTEGetLock(&RequestCompleteLock, &Handle);
+ EndQ = &ACKTcb->tcb_sendq;
+ do {
+ CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
+ tsr_req);
+
+ CTEStructAssert(CurrentTSR, tsr);
+
+ TempQ = CurrentTSR->tsr_req.tr_q.q_next;
+
+ CurrentTSR->tsr_req.tr_status = TDI_SUCCESS;
+ Result = CTEInterlockedDecrementLong(&CurrentTSR->tsr_refcnt);
+
+ CTEAssert(Result >= 0);
+
+
+ if (Result <= 0) {
+ // No more references are outstanding, the send can be
+ // completed.
+
+ // If we've sent directly from this send, NULL out the next
+ // pointer for the last buffer in the chain.
+ if (CurrentTSR->tsr_lastbuf != NULL) {
+ NDIS_BUFFER_LINKAGE(CurrentTSR->tsr_lastbuf) = NULL;
+ CurrentTSR->tsr_lastbuf = NULL;
+ }
+ ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
+ Temp = ACKTcb->tcb_bcountlow;
+ ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
+ ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
+
+ ENQUEUE(&SendCompleteQ, &CurrentTSR->tsr_req.tr_q);
+ }
+
+ } while (TempQ != EndQ);
+
+ RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
+ CTEFreeLock(&RequestCompleteLock, Handle);
+
+ CheckTCBSends(ACKTcb);
+ return;
+ }
+
+ OrigAmount = Amount;
+ End = QEND(&ACKTcb->tcb_sendq);
+ Current = QHEAD(&ACKTcb->tcb_sendq);
+
+ LastCmplt = NULL;
+
+ while (Amount > 0 && Current != End) {
+ CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
+ tsr_req);
+ CTEStructAssert(CurrentTSR, tsr);
+
+
+ if (Amount >= (int) CurrentTSR->tsr_unasize) {
+ // This is completely acked. Just advance to the next one.
+ Amount -= CurrentTSR->tsr_unasize;
+
+ LastCmplt = Current;
+
+ Current = QNEXT(Current);
+ continue;
+ }
+
+ // This one is only partially acked. Update his offset and NDIS buffer
+ // pointer, and break out. We know that Amount is < the unacked size
+ // in this buffer, we we can walk the NDIS buffer chain without fear
+ // of falling off the end.
+ CurrentBuffer = CurrentTSR->tsr_buffer;
+ CTEAssert(CurrentBuffer != NULL);
+ CTEAssert(Amount < (int) CurrentTSR->tsr_unasize);
+ CurrentTSR->tsr_unasize -= Amount;
+
+ BufLength = NdisBufferLength(CurrentBuffer) - CurrentTSR->tsr_offset;
+
+ if (Amount >= (int) BufLength) {
+ do {
+ Amount -= BufLength;
+ CurrentBuffer = NDIS_BUFFER_LINKAGE(CurrentBuffer);
+ CTEAssert(CurrentBuffer != NULL);
+ BufLength = NdisBufferLength(CurrentBuffer);
+ } while (Amount >= (int) BufLength);
+
+ CurrentTSR->tsr_offset = Amount;
+ CurrentTSR->tsr_buffer = CurrentBuffer;
+
+ } else
+ CurrentTSR->tsr_offset += Amount;
+
+ Amount = 0;
+
+ break;
+ }
+
+#ifdef DEBUG
+ // We should always be able to remove at least Amount bytes, except in
+ // the case where a FIN has been sent. In that case we should be off
+ // by exactly one. In the debug builds we'll check this.
+ if (Amount != 0 && (!(ACKTcb->tcb_flags & FIN_SENT) || Amount != 1))
+ DEBUGCHK;
+#endif
+
+ if (SEQ_GT(SendUNA, ACKTcb->tcb_sendnext)) {
+
+ if (Current != End) {
+ // Need to reevaluate CurrentTSR, in case we bailed out of the
+ // above loop after updating Current but before updating
+ // CurrentTSR.
+ CurrentTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
+ tsr_req);
+ CTEStructAssert(CurrentTSR, tsr);
+ ACKTcb->tcb_cursend = CurrentTSR;
+ ACKTcb->tcb_sendbuf = CurrentTSR->tsr_buffer;
+ ACKTcb->tcb_sendofs = CurrentTSR->tsr_offset;
+ ACKTcb->tcb_sendsize = CurrentTSR->tsr_unasize;
+ } else {
+ ACKTcb->tcb_cursend = NULL;
+ ACKTcb->tcb_sendbuf = NULL;
+ ACKTcb->tcb_sendofs = 0;
+ ACKTcb->tcb_sendsize = 0;
+ }
+
+ ACKTcb->tcb_sendnext = SendUNA;
+ }
+
+ // Now update tcb_unacked with the amount we tried to ack minus the
+ // amount we didn't ack (Amount should be 0 or 1 here).
+ CTEAssert(Amount == 0 || Amount == 1);
+
+ ACKTcb->tcb_unacked -= OrigAmount - Amount;
+ CTEAssert(*(int *)&ACKTcb->tcb_unacked >= 0);
+
+ ACKTcb->tcb_senduna = SendUNA;
+
+ // If we've acked any here, LastCmplt will be non-null, and Current will
+ // point to the send that should be at the start of the queue. Splice
+ // out the completed ones and put them on the end of the send completed
+ // queue, and update the TCB send q.
+ if (LastCmplt != NULL) {
+ Queue *FirstCmplt;
+ TCPSendReq *FirstTSR, *EndTSR;
+
+ CTEAssert(!EMPTYQ(&ACKTcb->tcb_sendq));
+
+ FirstCmplt = QHEAD(&ACKTcb->tcb_sendq);
+
+ // If we've acked everything, just reinit the queue.
+ if (Current == End) {
+ INITQ(&ACKTcb->tcb_sendq);
+ } else {
+ // There's still something on the queue. Just update it.
+ ACKTcb->tcb_sendq.q_next = Current;
+ Current->q_prev = &ACKTcb->tcb_sendq;
+ }
+
+ CheckTCBSends(ACKTcb);
+
+ // Now walk down the lists of things acked. If the refcnt on the send
+ // is 0, go ahead and put him on the send complete Q. Otherwise set
+ // the ACKed bit in the send, and he'll be completed when the count
+ // goes to 0 in the transmit confirm.
+ //
+ // Note that we haven't done any locking here. This will probably
+ // need to change in the port to NT.
+
+ // Set FirstTSR to the first TSR we'll complete, and EndTSR to be
+ // the first TSR that isn't completed.
+
+ FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, FirstCmplt, tr_q),
+ tsr_req);
+ EndTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, Current, tr_q),
+ tsr_req);
+
+ CTEStructAssert(FirstTSR, tsr);
+ CTEAssert(FirstTSR != EndTSR);
+
+ // Now walk the list of ACKed TSRs. If we can complete one, put him
+ // on the complete queue.
+ CTEGetLockAtDPC(&RequestCompleteLock, &Handle);
+ while (FirstTSR != EndTSR) {
+
+
+ TempQ = QNEXT(&FirstTSR->tsr_req.tr_q);
+
+ CTEStructAssert(FirstTSR, tsr);
+ FirstTSR->tsr_req.tr_status = TDI_SUCCESS;
+
+ // The tsr_lastbuf->Next field is zapped to 0 when the tsr_refcnt
+ // goes to 0, so we don't need to do it here.
+
+ // Decrement the reference put on the send buffer when it was
+ // initialized indicating the send has been acknowledged.
+ Result = CTEInterlockedDecrementLong(&(FirstTSR->tsr_refcnt));
+
+ CTEAssert(Result >= 0);
+ if (Result <= 0) {
+ // No more references are outstanding, the send can be
+ // completed.
+
+ // If we've sent directly from this send, NULL out the next
+ // pointer for the last buffer in the chain.
+ if (FirstTSR->tsr_lastbuf != NULL) {
+ NDIS_BUFFER_LINKAGE(FirstTSR->tsr_lastbuf) = NULL;
+ FirstTSR->tsr_lastbuf = NULL;
+ }
+
+ ACKTcb->tcb_totaltime += (TCPTime - CurrentTSR->tsr_time);
+ Temp = ACKTcb->tcb_bcountlow;
+ ACKTcb->tcb_bcountlow += CurrentTSR->tsr_size;
+ ACKTcb->tcb_bcounthi += (Temp > ACKTcb->tcb_bcountlow ? 1 : 0);
+ ENQUEUE(&SendCompleteQ, &FirstTSR->tsr_req.tr_q);
+ }
+
+ FirstTSR = STRUCT_OF(TCPSendReq, QSTRUCT(TCPReq, TempQ, tr_q),
+ tsr_req);
+ }
+ RequestCompleteFlags |= SEND_REQUEST_COMPLETE;
+ CTEFreeLockFromDPC(&RequestCompleteLock, Handle);
+ }
+
+}
+
+//* TrimRcvBuf - Trim the front edge of a receive buffer.
+//
+// A utility routine to trim the front of a receive buffer. We take in a
+// a count (which may be 0) and adjust the pointer in the first buffer in
+// the chain by that much. If there isn't that much in the first buffer,
+// we move onto the next one. If we run out of buffers we'll return a pointer
+// to the last buffer in the chain, with a size of 0. It's the caller's
+// responsibility to catch this.
+//
+// Input: RcvBuf - Buffer to be trimmed.
+// Count - Amount to be trimmed.
+//
+// Returns: A pointer to the new start, or NULL.
+//
+IPRcvBuf *
+TrimRcvBuf(IPRcvBuf *RcvBuf, uint Count)
+{
+ uint TrimThisTime;
+
+ CTEAssert(RcvBuf != NULL);
+
+ while (Count) {
+ CTEAssert(RcvBuf != NULL);
+
+ TrimThisTime = MIN(Count, RcvBuf->ipr_size);
+ Count -= TrimThisTime;
+ RcvBuf->ipr_buffer += TrimThisTime;
+ if ((RcvBuf->ipr_size -= TrimThisTime) == 0) {
+ if (RcvBuf->ipr_next != NULL)
+ RcvBuf = RcvBuf->ipr_next;
+ else {
+ // Ran out of buffers. Just return this one.
+ break;
+ }
+ }
+ }
+
+ return RcvBuf;
+
+}
+
+//* FreeRBChain - Free an RB chain.
+//
+// Called to free a chain of RBs. If we're the owner of each RB, we'll
+// free it.
+//
+// Input: RBChain - RBChain to be freed.
+//
+// Returns: Nothing.
+//
+void
+FreeRBChain(IPRcvBuf *RBChain)
+{
+ while (RBChain != NULL) {
+
+ if (RBChain->ipr_owner == IPR_OWNER_TCP) {
+ IPRcvBuf *Temp;
+
+ Temp = RBChain->ipr_next;
+ CTEFreeMem(RBChain);
+ RBChain = Temp;
+ } else
+ RBChain = RBChain->ipr_next;
+ }
+
+}
+
+IPRcvBuf DummyBuf;
+
+//* PullFromRAQ - Pull segments from the reassembly queue.
+//
+// Called when we've received frames out of order, and have some segments
+// on the reassembly queue. We'll walk down the reassembly list, segments that
+// are overlapped by the current rcv. next variable. When we get
+// to one that doesn't completely overlap we'll trim it to fit the next
+// rcv. seq. number, and pull it from the queue.
+//
+// Input: RcvTCB - TCB to pull from.
+// RcvInfo - Pointer to TCPRcvInfo structure for current seg.
+// Size - Pointer to size for current segment. We'll update
+// this when we're done.
+//
+// Returns: Nothing.
+//
+IPRcvBuf *
+PullFromRAQ(TCB *RcvTCB, TCPRcvInfo *RcvInfo, uint *Size)
+{
+ TCPRAHdr *CurrentTRH; // Current TCP RA Header being examined.
+ TCPRAHdr *TempTRH; // Temporary variable.
+ SeqNum NextSeq; // Next sequence number we want.
+ IPRcvBuf *NewBuf;
+ SeqNum NextTRHSeq; // Seq. number immediately after
+ // current TRH.
+ int Overlap; // Overlap between current TRH and
+ // NextSeq.
+
+ CTEStructAssert(RcvTCB, tcb);
+
+ CurrentTRH = RcvTCB->tcb_raq;
+ NextSeq = RcvTCB->tcb_rcvnext;
+
+ while (CurrentTRH != NULL) {
+ CTEStructAssert(CurrentTRH, trh);
+ CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
+
+ // If the flags for the current reassembly segment contains a FIN,
+ // it should be the last segment on the queue. This assert checks
+ // that.
+ CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_FIN) ||
+ CurrentTRH->trh_next == NULL);
+
+ if (SEQ_LT(NextSeq, CurrentTRH->trh_start)) {
+#ifdef DEBUG
+ *Size = 0;
+#endif
+ return NULL; // The next TRH starts too far down.
+ }
+
+
+ NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
+ ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
+
+ if (SEQ_GTE(NextSeq, NextTRHSeq)) {
+ // The current TRH is overlapped completely. Free it and continue.
+ FreeRBChain(CurrentTRH->trh_buffer);
+ TempTRH = CurrentTRH->trh_next;
+ CTEFreeMem(CurrentTRH);
+ CurrentTRH = TempTRH;
+ RcvTCB->tcb_raq = TempTRH;
+ if (TempTRH == NULL) {
+ // We've just cleaned off the RAQ. We can go back on the
+ // fast path now.
+ if (--(RcvTCB->tcb_slowcount) == 0) {
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+ break;
+ }
+ } else {
+ Overlap = NextSeq - CurrentTRH->trh_start;
+ RcvInfo->tri_seq = NextSeq;
+ RcvInfo->tri_flags = CurrentTRH->trh_flags;
+ RcvInfo->tri_urgent = CurrentTRH->trh_urg;
+
+ if (Overlap != (int) CurrentTRH->trh_size) {
+ NewBuf = FreePartialRB(CurrentTRH->trh_buffer, Overlap);
+ *Size = CurrentTRH->trh_size - Overlap;
+ } else {
+ // This completely overlaps the data in this segment, but the
+ // sequence number doesn't overlap completely. There must
+ // be a FIN in the TRH. If we called FreePartialRB with this
+ // we'd end up returning NULL, which is the signal for failure.
+ // Instead we'll just return some bogus value that nobody
+ // will look at with a size of 0.
+ FreeRBChain(CurrentTRH->trh_buffer);
+ CTEAssert(CurrentTRH->trh_flags & TCP_FLAG_FIN);
+ NewBuf = &DummyBuf;
+ *Size = 0;
+ }
+
+ RcvTCB->tcb_raq = CurrentTRH->trh_next;
+ if (RcvTCB->tcb_raq == NULL) {
+ // We've just cleaned off the RAQ. We can go back on the
+ // fast path now.
+ if (--(RcvTCB->tcb_slowcount) == 0) {
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+
+ }
+ CTEFreeMem(CurrentTRH);
+ return NewBuf;
+ }
+
+
+ }
+
+#ifdef DEBUG
+ *Size = 0;
+#endif
+ return NULL;
+
+}
+
+//* CreateTRH - Create a TCP reassembly header.
+//
+// This function tries to create a TCP reassembly header. We take as input
+// a pointer to the previous TRH in the chain, the RcvBuffer to put on,
+// etc. and try to create and link in a TRH. The caller must hold the lock
+// on the TCB when this is called.
+//
+// Input: PrevTRH - Pointer to TRH to insert after.
+// RcvBuf - Pointer to IP RcvBuf chain.
+// RcvInfo - Pointer to RcvInfo for this TRH.
+// Size - Size in bytes of data.
+//
+// Returns: TRUE if we created it, FALSE otherwise.
+//
+uint
+CreateTRH(TCPRAHdr *PrevTRH, IPRcvBuf *RcvBuf, TCPRcvInfo *RcvInfo, int Size)
+{
+ TCPRAHdr *NewTRH;
+ IPRcvBuf *NewRcvBuf;
+
+ CTEAssert((Size > 0) || (RcvInfo->tri_flags & TCP_FLAG_FIN));
+
+ NewTRH = CTEAllocMem(sizeof(TCPRAHdr));
+ if (NewTRH == NULL)
+ return FALSE;
+
+ NewRcvBuf = CTEAllocMem(sizeof(IPRcvBuf) + Size);
+ if (NewRcvBuf == NULL) {
+ CTEFreeMem(NewTRH);
+ return FALSE;
+ }
+
+#ifdef DEBUG
+ NewTRH->trh_sig = trh_signature;
+#endif
+ NewRcvBuf->ipr_owner = IPR_OWNER_TCP;
+ NewRcvBuf->ipr_size = (uint)Size;
+ NewRcvBuf->ipr_next = NULL;
+ NewRcvBuf->ipr_buffer = (uchar *)(NewRcvBuf + 1);
+ if (Size != 0)
+ CopyRcvToBuffer(NewRcvBuf->ipr_buffer, RcvBuf, Size, 0);
+
+ NewTRH->trh_start = RcvInfo->tri_seq;
+ NewTRH->trh_flags = RcvInfo->tri_flags;
+ NewTRH->trh_size = Size;
+ NewTRH->trh_urg = RcvInfo->tri_urgent;
+ NewTRH->trh_buffer = NewRcvBuf;
+ NewTRH->trh_end = NewRcvBuf;
+
+ NewTRH->trh_next = PrevTRH->trh_next;
+ PrevTRH->trh_next = NewTRH;
+ return TRUE;
+
+}
+
+//* PutOnRAQ - Put a segment on the reassembly queue.
+//
+// Called during segment reception to put a segment on the reassembly
+// queue. We try to use as few reassembly headers as possible, so if this
+// segment has some overlap with an existing entry in the queue we'll just
+// update the existing entry. If there is no overlap we'll create a new
+// reassembly header. Combining URGENT data with non-URGENT data is tricky.
+// If we get a segment that has urgent data that overlaps the front of a
+// reassembly header we'll always mark the whole chunk as urgent - the value
+// of the urgent pointer will mark the end of urgent data, so this is OK. If it
+// only overlaps at the end, however, we won't combine, since we would have to
+// mark previously non-urgent data as urgent. We'll trim the
+// front of the incoming segment and create a new reassembly header. Also,
+// if we have non-urgent data that overlaps at the front of a reassembly
+// header containing urgent data we can't combine these two, since again we
+// would mark non-urgent data as urgent.
+// Our search will stop if we find an entry with a FIN.
+// We assume that the TCB lock is held by the caller.
+//
+// Entry: RcvTCB - TCB on which to reassemble.
+// RcvInfo - Pointer to RcvInfo for new segment.
+// RcvBuf - IP RcvBuf chain for this segment.
+// Size - Size in bytes of data in this segment.
+//
+// Returns: Nothing.
+//
+void
+PutOnRAQ(TCB *RcvTCB, TCPRcvInfo *RcvInfo, IPRcvBuf *RcvBuf, uint Size)
+{
+ TCPRAHdr *PrevTRH, *CurrentTRH; // Prev. and current TRH
+ // pointers.
+ SeqNum NextSeq; // Seq. number of first byte
+ // after segment being
+ // reassembled.
+ SeqNum NextTRHSeq; // Seq. number of first byte
+ // after current TRH.
+ uint Created;
+
+ CTEStructAssert(RcvTCB, tcb);
+ CTEAssert(RcvTCB->tcb_rcvnext != RcvInfo->tri_seq);
+ CTEAssert(!(RcvInfo->tri_flags & TCP_FLAG_SYN));
+ NextSeq = RcvInfo->tri_seq + Size +
+ ((RcvInfo->tri_flags & TCP_FLAG_FIN) ? 1 : 0);
+
+ PrevTRH = STRUCT_OF(TCPRAHdr, &RcvTCB->tcb_raq, trh_next);
+ CurrentTRH = PrevTRH->trh_next;
+
+ // Walk down the reassembly queue, looking for the correct place to
+ // insert this, until we hit the end.
+ while (CurrentTRH != NULL) {
+ CTEStructAssert(CurrentTRH, trh);
+
+ CTEAssert(!(CurrentTRH->trh_flags & TCP_FLAG_SYN));
+ NextTRHSeq = CurrentTRH->trh_start + CurrentTRH->trh_size +
+ ((CurrentTRH->trh_flags & TCP_FLAG_FIN) ? 1 : 0);
+
+ // First, see if it starts beyond the end of the current TRH.
+ if (SEQ_LTE(RcvInfo->tri_seq, NextTRHSeq)) {
+ // We know the incoming segment doesn't start beyond the end
+ // of this TRH, so we'll either create a new TRH in front of
+ // this one or we'll merge the new segment onto this TRH.
+ // If the end of the current segment is in front of the start
+ // of the current TRH, we'll need to create a new TRH. Otherwise
+ // we'll merge these two.
+ if (SEQ_LT(NextSeq, CurrentTRH->trh_start))
+ break;
+ else {
+ // There's some overlap. If there's actually data in the
+ // incoming segment we'll merge it.
+ if (Size != 0) {
+ int FrontOverlap, BackOverlap;
+ IPRcvBuf *NewRB;
+
+ // We need to merge. If there's a FIN on the incoming
+ // segment that would fall inside this current TRH, we
+ // have a protocol violation from the remote peer. In this
+ // case just return, discarding the incoming segment.
+ if ((RcvInfo->tri_flags & TCP_FLAG_FIN) &&
+ SEQ_LTE(NextSeq, NextTRHSeq))
+ return;
+
+ // We have some overlap. Figure out how much.
+ FrontOverlap = CurrentTRH->trh_start - RcvInfo->tri_seq;
+ if (FrontOverlap > 0) {
+ // Have overlap in front. Allocate an IPRcvBuf to
+ // to hold it, and copy it, unless we would have to
+ // combine non-urgent with urgent.
+ if (!(RcvInfo->tri_flags & TCP_FLAG_URG) &&
+ (CurrentTRH->trh_flags & TCP_FLAG_URG)) {
+ if (CreateTRH(PrevTRH, RcvBuf, RcvInfo,
+ CurrentTRH->trh_start - RcvInfo->tri_seq)) {
+ PrevTRH = PrevTRH->trh_next;
+ CurrentTRH = PrevTRH->trh_next;
+ }
+ FrontOverlap = 0;
+
+ } else {
+ NewRB = CTEAllocMem(sizeof(IPRcvBuf) + FrontOverlap);
+ if (NewRB == NULL)
+ return; // Couldn't get the buffer.
+
+ NewRB->ipr_owner = IPR_OWNER_TCP;
+ NewRB->ipr_size = FrontOverlap;
+ NewRB->ipr_buffer = (uchar *)(NewRB + 1);
+ CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
+ FrontOverlap, 0);
+ CurrentTRH->trh_size += FrontOverlap;
+ NewRB->ipr_next = CurrentTRH->trh_buffer;
+ CurrentTRH->trh_buffer = NewRB;
+ CurrentTRH->trh_start = RcvInfo->tri_seq;
+ }
+ }
+
+ // We've updated the starting sequence number of this TRH
+ // if we needed to. Now look for back overlap. There can't
+ // be any back overlap if the current TRH has a FIN. Also
+ // we'll need to check for urgent data if there is back
+ // overlap.
+ if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
+ BackOverlap = RcvInfo->tri_seq + Size - NextTRHSeq;
+ if ((BackOverlap > 0) &&
+ (RcvInfo->tri_flags & TCP_FLAG_URG) &&
+ !(CurrentTRH->trh_flags & TCP_FLAG_URG) &&
+ (FrontOverlap <= 0)) {
+ int AmountToTrim;
+ // The incoming segment has urgent data and overlaps
+ // on the back but not the front, and the current
+ // TRH has no urgent data. We can't combine into
+ // this TRH, so trim the front of the incoming
+ // segment to NextTRHSeq and move to the next
+ // TRH.
+ AmountToTrim = NextTRHSeq - RcvInfo->tri_seq;
+ CTEAssert(AmountToTrim >= 0);
+ CTEAssert(AmountToTrim < (int) Size);
+ RcvBuf = FreePartialRB(RcvBuf, (uint)AmountToTrim);
+ RcvInfo->tri_seq += AmountToTrim;
+ RcvInfo->tri_urgent -= AmountToTrim;
+ PrevTRH = CurrentTRH;
+ CurrentTRH = PrevTRH->trh_next;
+ continue;
+ }
+
+ } else
+ BackOverlap = 0;
+
+ // Now if we have back overlap, copy it.
+ if (BackOverlap > 0) {
+ // We have back overlap. Get a buffer to copy it into.
+ // If we can't get one, we won't just return, because
+ // we may have updated the front and may need to
+ // update the urgent info.
+ NewRB = CTEAllocMem(sizeof(IPRcvBuf) + BackOverlap);
+ if (NewRB != NULL) {
+ // Got the buffer.
+ NewRB->ipr_owner = IPR_OWNER_TCP;
+ NewRB->ipr_size = BackOverlap;
+ NewRB->ipr_buffer = (uchar *)(NewRB + 1);
+ CopyRcvToBuffer(NewRB->ipr_buffer, RcvBuf,
+ BackOverlap, NextTRHSeq - RcvInfo->tri_seq);
+ CurrentTRH->trh_size += BackOverlap;
+ NewRB->ipr_next = CurrentTRH->trh_end->ipr_next;
+ CurrentTRH->trh_end->ipr_next = NewRB;
+ CurrentTRH->trh_end = NewRB;
+ }
+ }
+
+ // Everything should be consistent now. If there's an
+ // urgent data pointer in the incoming segment, update the
+ // one in the TRH now.
+ if (RcvInfo->tri_flags & TCP_FLAG_URG) {
+ SeqNum UrgSeq;
+ // Have an urgent pointer. If the current TRH already
+ // has an urgent pointer, see which is bigger. Otherwise
+ // just use this one.
+ UrgSeq = RcvInfo->tri_seq + RcvInfo->tri_urgent;
+ if (CurrentTRH->trh_flags & TCP_FLAG_URG) {
+ SeqNum TRHUrgSeq;
+
+ TRHUrgSeq = CurrentTRH->trh_start +
+ CurrentTRH->trh_urg;
+ if (SEQ_LT(UrgSeq, TRHUrgSeq))
+ UrgSeq = TRHUrgSeq;
+ } else
+ CurrentTRH->trh_flags |= TCP_FLAG_URG;
+
+ CurrentTRH->trh_urg = UrgSeq - CurrentTRH->trh_start;
+ }
+
+ } else {
+ // We have a 0 length segment. The only interesting thing
+ // here is if there's a FIN on the segment. If there is,
+ // and the seq. # of the incoming segment is exactly after
+ // the current TRH, OR matches the FIN in the current TRH,
+ // we note it.
+ if (RcvInfo->tri_flags & TCP_FLAG_FIN) {
+ if (!(CurrentTRH->trh_flags & TCP_FLAG_FIN)) {
+ if (SEQ_EQ(NextTRHSeq, RcvInfo->tri_seq))
+ CurrentTRH->trh_flags |= TCP_FLAG_FIN;
+ else
+ DEBUGCHK;
+ }
+ else {
+ if ( !(SEQ_EQ((NextTRHSeq-1), RcvInfo->tri_seq)) ) {
+ DEBUGCHK;
+ }
+ }
+ }
+ }
+ return;
+ }
+ } else {
+ // Look at the next TRH, unless the current TRH has a FIN. If he
+ // has a FIN, we won't save any data beyond that anyway.
+ if (CurrentTRH->trh_flags & TCP_FLAG_FIN)
+ return;
+
+ PrevTRH = CurrentTRH;
+ CurrentTRH = PrevTRH->trh_next;
+ }
+ }
+
+ // When we get here, we need to create a new TRH. If we create one and
+ // there was previously nothing on the reassembly queue, we'll have to
+ // move off the fast receive path.
+
+ CurrentTRH = RcvTCB->tcb_raq;
+ Created = CreateTRH(PrevTRH, RcvBuf, RcvInfo, (int)Size);
+
+ if (Created && CurrentTRH == NULL) {
+ RcvTCB->tcb_slowcount++;
+ RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+
+
+}
+
+
+//* TCPRcv - Receive a TCP segment.
+//
+// This is the routine called by IP when we need to receive a TCP segment.
+// In general, we follow the RFC 793 event processing section pretty closely,
+// but there is a 'fast path' where we make some quick checks on the incoming
+// segment, and if it matches we deliver it immediately.
+//
+// Entry: IPContext - IPContext identifying physical i/f that
+// received the data.
+// Dest - IPAddr of destionation.
+// Src - IPAddr of source.
+// LocalAddr - Local address of network which caused this to be
+// received.
+// SrcAddr - Address of local interface which received the packet
+// IPH - IP Header.
+// IPHLength - Bytes in IPH.
+// RcvBuf - Pointer to receive buffer chain containing data.
+// Size - Size in bytes of data received.
+// IsBCast - Boolean indicator of whether or not this came in as
+// a bcast.
+// Protocol - Protocol this came in on - should be TCP.
+// OptInfo - Pointer to info structure for received options.
+//
+// Returns: Status of reception. Anything other than IP_SUCCESS will cause
+// IP to send a 'port unreachable' message.
+//
+IP_STATUS
+TCPRcv(void *IPContext, IPAddr Dest, IPAddr Src, IPAddr LocalAddr,
+ IPAddr SrcAddr, IPHeader UNALIGNED *IPH, uint IPHLength, IPRcvBuf *RcvBuf,
+ uint Size, uchar IsBCast, uchar Protocol, IPOptInfo *OptInfo)
+{
+ TCPHeader UNALIGNED *TCPH; // The TCP header.
+ TCB *RcvTCB; // TCB on which to receive the packet.
+ CTELockHandle TableHandle, TCBHandle;
+ TCPRcvInfo RcvInfo; // Local swapped copy of rcv info.
+ uint DataOffset; // Offset from start of header to data.
+ uint Actions;
+ uint BytesTaken;
+ uint NewSize;
+
+ CheckRBList(RcvBuf, Size);
+
+ TStats.ts_insegs++;
+
+ // Checksum it, to make sure it's valid.
+ TCPH = (TCPHeader *)RcvBuf->ipr_buffer;
+
+ if (!IsBCast) {
+
+ if (Size >= sizeof(TCPHeader) && XsumRcvBuf(PHXSUM(Src, Dest, PROTOCOL_TCP,
+ Size), RcvBuf) == 0xffff) {
+
+ // The packet is valid. Get the info we need and byte swap it,
+ // and then try to find a matching TCB.
+
+ RcvInfo.tri_seq = net_long(TCPH->tcp_seq);
+ RcvInfo.tri_ack = net_long(TCPH->tcp_ack);
+ RcvInfo.tri_window = (uint)net_short(TCPH->tcp_window);
+ RcvInfo.tri_urgent = (uint)net_short(TCPH->tcp_urgent);
+ RcvInfo.tri_flags = (uint)TCPH->tcp_flags;
+ DataOffset = TCP_HDR_SIZE(TCPH);
+
+ if (DataOffset <= Size) {
+
+ Size -= DataOffset;
+ CTEAssert(DataOffset <= RcvBuf->ipr_size);
+ RcvBuf->ipr_size -= DataOffset;
+ RcvBuf->ipr_buffer += DataOffset;
+
+ CTEGetLockAtDPC(&TCBTableLock, &TableHandle);
+ RcvTCB = FindTCB(Dest, Src, TCPH->tcp_src, TCPH->tcp_dest);
+ if (RcvTCB != NULL) {
+ // Found one. Get the lock on it, and continue.
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TCBHandle);
+ CTEFreeLockFromDPC(&TCBTableLock, TCBHandle);
+ } else {
+ uchar DType;
+
+ // Didn't find a matching TCB. If this segment carries a SYN,
+ // find a matching address object and see it it has a listen
+ // indication. If it does, call it. Otherwise send a RST
+ // back to the sender.
+ CTEFreeLockFromDPC(&TCBTableLock, TableHandle);
+
+
+ // Make sure that the source address isn't a broadcast
+ // before proceeding.
+ if ((*LocalNetInfo.ipi_invalidsrc)(Src))
+ return IP_SUCCESS;
+
+ // If it doesn't have a SYN (and only a SYN), we'll send a
+ // reset.
+ if ((RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_ACK | TCP_FLAG_RST)) ==
+ TCP_FLAG_SYN) {
+ AddrObj *AO;
+
+ //
+ // This segment had a SYN.
+ //
+ //
+#ifdef NT
+ CTEGetLockAtDPC(&AddrObjTableLock, &TableHandle);
+#endif
+
+#ifdef SECFLTR
+ // See if we are filtering the
+ // destination interface/port.
+ //
+ if ( (!SecurityFilteringEnabled ||
+ IsPermittedSecurityFilter(
+ LocalAddr,
+ IPContext,
+ PROTOCOL_TCP,
+ (ulong) net_short(TCPH->tcp_dest)
+ ))
+ )
+ {
+#else // SECFLTR
+ if ( 1 ) {
+#endif // SECFLTR
+
+ //
+ // Find a matching address object, and then try and find a
+ // listening connection on that AO.
+ //
+ AO = GetBestAddrObj(Dest, TCPH->tcp_dest, PROTOCOL_TCP);
+ if (AO != NULL) {
+
+ // Found an AO. Try and find a listening connection.
+ // FindListenConn will free the lock on the AddrObjTable.
+ RcvTCB = FindListenConn(AO, Src, TCPH->tcp_src, OptInfo);
+
+ if (RcvTCB != NULL) {
+ uint Inserted;
+
+ CTEStructAssert(RcvTCB, tcb);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+
+ // We found a listening connection. Initialize it
+ // now, and if it is actually to be accepted we'll
+ // send a SYN-ACK also.
+
+ CTEAssert(RcvTCB->tcb_state == TCB_SYN_RCVD);
+
+ RcvTCB->tcb_daddr = Src;
+ RcvTCB->tcb_saddr = Dest;
+ RcvTCB->tcb_dport = TCPH->tcp_src;
+ RcvTCB->tcb_sport = TCPH->tcp_dest;
+ RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_remmss = FindMSS(TCPH);
+ TStats.ts_passiveopens++;
+ RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+
+ Inserted = InsertTCB(RcvTCB);
+
+ // Get the lock on it, and see if it's been
+ // accepted.
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ if (!Inserted) {
+ // Couldn't insert it!.
+ CompleteConnReq(RcvTCB, OptInfo,
+ TDI_CONNECTION_ABORTED);
+ RcvTCB->tcb_refcnt--;
+#ifdef NT
+ TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, DISPATCH_LEVEL);
+#else
+ TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, TableHandle);
+#endif
+ return IP_SUCCESS;
+ }
+
+
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
+ if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
+ RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
+ DelayAction(RcvTCB, NEED_OUTPUT);
+ }
+
+ // We'll need to update the options, in any case.
+ if (OptInfo->ioi_options != NULL) {
+ if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
+ (*LocalNetInfo.ipi_updateopts)(OptInfo,
+ &RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
+ }
+ }
+
+ if (RcvTCB->tcb_flags & CONN_ACCEPTED) {
+
+ // The connection was accepted. Finish the
+ // initialization, and send the SYN ack.
+
+#ifdef NT
+ AcceptConn(RcvTCB, DISPATCH_LEVEL);
+#else
+ AcceptConn(RcvTCB, TableHandle);
+#endif
+
+ return IP_SUCCESS;
+ } else {
+
+ // We don't know what to do about the
+ // connection yet. Return the pending listen,
+ // dereference the connection, and return.
+
+ CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
+
+#ifdef NT
+ DerefTCB(RcvTCB, DISPATCH_LEVEL);
+#else
+ DerefTCB(RcvTCB, TableHandle);
+#endif
+
+ return IP_SUCCESS;
+ }
+
+ }
+ // No listening connection. AddrObjTableLock was
+ // released by FindListenConn. Fall through to send
+ // RST code.
+
+ } else {
+ // No address object. Free the lock, and fall through
+ // to the send RST code.
+ CTEFreeLockFromDPC(&AddrObjTableLock, TableHandle);
+ }
+ }
+ else {
+ // Operation not permitted. Free the lock, and fall through
+ // to the send RST code.
+ CTEFreeLockFromDPC(&AddrObjTableLock, TableHandle);
+ }
+
+ }
+
+ // Toss out any segments containing RST.
+ if (RcvInfo.tri_flags & TCP_FLAG_RST)
+ return IP_SUCCESS;
+
+ // Not a SYN, no AddrObj available, or port filtered.
+ // Send a RST back.
+ SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
+
+ return IP_SUCCESS;
+ }
+
+ // Do the fast path check. We can hit the fast path if the incoming
+ // sequence number matches our receive next and the masked flags
+ // match our 'predicted' flags.
+ CheckTCBRcv(RcvTCB);
+ RcvTCB->tcb_alive = TCPTime;
+
+ if (RcvTCB->tcb_rcvnext == RcvInfo.tri_seq &&
+ (RcvInfo.tri_flags & TCP_FLAGS_ALL) == RcvTCB->tcb_fastchk){
+
+ Actions = 0;
+ RcvTCB->tcb_refcnt++;
+
+ // The fast path. We know all we have to do here is ack sends and
+ // deliver data. First try and ack data.
+
+
+ if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+
+ uint CWin;
+ uint MSS;
+
+ // The ack acknowledes something. Pull the
+ // appropriate amount off the send q.
+ ACKData(RcvTCB, RcvInfo.tri_ack);
+
+ // If this acknowledges something we were running a RTT on,
+ // update that stuff now.
+ if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
+ RcvTCB->tcb_rttseq)) {
+ short RTT;
+
+ RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
+ RcvTCB->tcb_rtt = 0;
+ RTT -= (RcvTCB->tcb_smrtt >> 3);
+ RcvTCB->tcb_smrtt += RTT;
+ RTT = (RTT >= 0 ? RTT : -RTT);
+ RTT -= (RcvTCB->tcb_delta >> 3);
+ RcvTCB->tcb_delta += RTT;
+ RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
+ MIN_RETRAN_TICKS), MAX_REXMIT_TO);
+ }
+
+ // Update the congestion window now.
+ CWin = RcvTCB->tcb_cwin;
+ MSS = RcvTCB->tcb_mss;
+ if (CWin < RcvTCB->tcb_maxwin) {
+ if (CWin < RcvTCB->tcb_ssthresh)
+ CWin += MSS;
+ else
+ CWin += (MSS * MSS)/CWin;
+
+ RcvTCB->tcb_cwin = CWin;
+ }
+
+ CTEAssert(*(int *)&RcvTCB->tcb_cwin > 0);
+
+ // We've acknowledged something, so reset the rexmit count.
+ // If there's still stuff outstanding, restart the rexmit
+ // timer.
+ RcvTCB->tcb_rexmitcnt = 0;
+ if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+ else
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer, RcvTCB->tcb_rexmit);
+
+ // Since we've acknowledged data, we need to update the window.
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin, RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ // We've updated the window, remember to send some more.
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+
+#if FAST_RETRANSMIT
+ {
+ // If the receiver has already sent dup acks, but we are not
+ // sending because the SendWin is less than a segment, then
+ // to avoid time outs on the previous send (receiver is waiting for
+ // retransmitted data but we are not sending the segment..) prematurely
+ // timeout (set rexmittimer to 1 tick)
+ //
+
+ int SendWin;
+ uint AmtOutstanding,AmtUnsent;
+
+ AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
+ RcvTCB->tcb_senduna);
+ AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
+
+ SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
+ AmtOutstanding);
+
+
+ if ((Size == 0) &&
+ (SendWin < RcvTCB->tcb_mss) && (RcvTCB->tcb_dup > 0)) {
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer, 1);
+ }
+ }
+
+ RcvTCB->tcb_dup = 0;
+#endif
+
+ } else {
+ // It doesn't ack anything. If it's an ack for something
+ // larger than we've sent then ACKAndDrop it, otherwise
+ // ignore it.
+ if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ } else
+
+ //SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax
+ // If the ack matches our existing UNA, we need to see if
+ // we can update the window.
+ // Or check if fast retransmit is needed
+
+#if FAST_RETRANSMIT
+ // If it is a pure duplicate ack, check if it is
+ // time to retransmit immediately
+
+ if ( (Size == 0) && SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (RcvTCB->tcb_sendwin == RcvInfo.tri_window) ) {
+
+ RcvTCB->tcb_dup++;
+
+ if ((RcvTCB->tcb_dup == MaxDupAcks) ) {
+
+ //Okay. Time to retransmit the segment the receiver is asking for
+
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+
+ RcvTCB->tcb_rtt = 0;
+
+ if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
+
+ // Don't let the slow start threshold go below 2
+ // segments
+
+ RcvTCB->tcb_ssthresh =
+ MAX(
+ MIN(RcvTCB->tcb_cwin,RcvTCB->tcb_sendwin) / 2,
+ (uint) RcvTCB->tcb_mss * 2 );
+ RcvTCB->tcb_cwin = RcvTCB->tcb_mss;
+ }
+
+ // Recall the segment in question and send it out
+ // Note that tcb_lock will be dereferenced by the caller
+
+ ResetAndFastSend (RcvTCB, RcvTCB->tcb_senduna);
+
+ return IP_SUCCESS;
+
+
+ } else if ((RcvTCB->tcb_dup > MaxDupAcks) ) {
+
+ int SendWin;
+ uint AmtOutstanding,AmtUnsent;
+
+ if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
+ (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
+ SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
+
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
+ RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+
+ // Since we've updated the window, remember to send
+ // some more.
+
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+ }
+
+ // Update the cwin to reflect the fact that the dup ack
+ // indicates the previous frame was received by the
+ // receiver
+
+
+ RcvTCB->tcb_cwin += RcvTCB->tcb_mss;
+
+ if ((RcvTCB->tcb_cwin+RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin ) {
+ AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
+ RcvTCB->tcb_senduna);
+ AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
+
+ SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
+ AmtOutstanding);
+
+ if (SendWin < RcvTCB->tcb_mss) {
+ RcvTCB->tcb_force=1;
+ }
+ }
+
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+
+ } else if ((RcvTCB->tcb_dup < MaxDupAcks)) {
+
+ int SendWin;
+ uint AmtOutstanding,AmtUnsent;
+
+ if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
+ (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
+ SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
+
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
+ RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+
+ // Since we've updated the window, remember to send
+ // some more.
+ }
+
+ // Check if we need to set tcb_force.
+
+ if ((RcvTCB->tcb_cwin+RcvTCB->tcb_mss) < RcvTCB->tcb_sendwin ) {
+
+ AmtOutstanding = (uint)(RcvTCB->tcb_sendnext -
+ RcvTCB->tcb_senduna);
+ AmtUnsent = RcvTCB->tcb_unacked - AmtOutstanding;
+
+ SendWin = (int)(MIN(RcvTCB->tcb_sendwin, RcvTCB->tcb_cwin) -
+ AmtOutstanding);
+ if (SendWin < RcvTCB->tcb_mss){
+ RcvTCB->tcb_force=1;
+ }
+ }
+
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+
+
+ } // End of all MaxDupAck cases
+
+ } else { // not a pure duplicate ack (size == 0 )
+
+ // Size !=0 or recvr is advertizing new window.
+ // update the window and check if
+ // anything needs to be sent
+
+ RcvTCB->tcb_dup = 0;
+
+ if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
+ (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
+ SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
+ RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ // Since we've updated the window, remember to send
+ // some more.
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+ }
+
+ } // for SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax) case
+
+#else //FAST_RETRANSMIT
+
+ if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
+ (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
+ SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
+ RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ // Since we've updated the window, remember to send
+ // some more.
+ Actions = (RcvTCB->tcb_unacked ? NEED_OUTPUT : 0);
+ }
+#endif //FAST_RETRANSMIT
+
+ }
+
+
+ NewSize = MIN((int) Size, RcvTCB->tcb_rcvwin);
+ if (NewSize != 0) {
+ RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
+#ifdef VXD
+ CTEFreeLock(&RcvTCB->tcb_lock, TableHandle);
+ BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
+ RcvBuf, NewSize);
+ CTEGetLock(&RcvTCB->tcb_lock, &TableHandle);
+#else
+ BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB, RcvInfo.tri_flags,
+ RcvBuf, NewSize);
+#endif
+ RcvTCB->tcb_rcvnext += BytesTaken;
+ RcvTCB->tcb_rcvwin -= BytesTaken;
+ CheckTCBRcv(RcvTCB);
+
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
+
+ Actions |= (RcvTCB->tcb_flags & SEND_AFTER_RCV ?
+ NEED_OUTPUT : 0);
+
+ RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
+ if ((RcvTCB->tcb_flags & ACK_DELAYED) || (BytesTaken != NewSize))
+ Actions |= NEED_ACK;
+ else {
+ RcvTCB->tcb_flags |= ACK_DELAYED;
+ START_TCB_TIMER(RcvTCB->tcb_delacktimer, DEL_ACK_TICKS);
+ }
+ } else {
+ // The new size is 0. If the original size was not 0, we must
+ // have a 0 rcv. win and hence need to send an ACK to this
+ // probe.
+ Actions |= (Size ? NEED_ACK : 0);
+ }
+
+ if (Actions)
+ DelayAction(RcvTCB, Actions);
+
+#ifndef VXD
+ TableHandle = DISPATCH_LEVEL;
+#endif
+ DerefTCB(RcvTCB, TableHandle);
+
+ return IP_SUCCESS;
+ }
+
+#ifndef VXD
+ TableHandle = DISPATCH_LEVEL;
+#endif
+ // Make sure we can handle this frame. We can't handle it if we're
+ // in SYN_RCVD and the accept is still pending, or we're in a
+ // non-established state and already in the receive handler.
+ if ((RcvTCB->tcb_state == TCB_SYN_RCVD &&
+ !(RcvTCB->tcb_flags & CONN_ACCEPTED)) ||
+ (RcvTCB->tcb_state != TCB_ESTAB && (RcvTCB->tcb_fastchk &
+ TCP_FLAG_IN_RCV))) {
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ // If it's closed, it's a temporary zombie TCB. Reset the sender.
+ if (RcvTCB->tcb_state == TCB_CLOSED || CLOSING(RcvTCB) ||
+ ((RcvTCB->tcb_flags & (GC_PENDING | TW_PENDING)) == GC_PENDING)) {
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
+ return IP_SUCCESS;
+ }
+
+ // At this point, we have a connection, and it's locked. Following
+ // the 'Segment Arrives' section of 793, the next thing to check is
+ // if this connection is in SynSent state.
+
+ if (RcvTCB->tcb_state == TCB_SYN_SENT) {
+
+ CTEAssert(RcvTCB->tcb_flags & ACTIVE_OPEN);
+
+ // Check the ACK bit. Since we don't send data with our SYNs, the
+ // check we make is for the ack to exactly match our SND.NXT.
+ if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
+ // ACK is set.
+ if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendnext)) {
+ // Bad ACK value.
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ // Send a RST back at him.
+ SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
+ return IP_SUCCESS;
+ }
+ }
+
+ if (RcvInfo.tri_flags & TCP_FLAG_RST) {
+ // There's an acceptable RST. We'll persist here, sending
+ // another SYN in PERSIST_TIMEOUT ms, until we fail from too
+ // many retrys.
+ if (RcvTCB->tcb_rexmitcnt == MaxConnectRexmitCount) {
+ // We've had a positive refusal, and one more rexmit
+ // would time us out, so close the connection now.
+ CompleteConnReq(RcvTCB, OptInfo, TDI_CONN_REFUSED);
+
+ TryToCloseTCB(RcvTCB, TCB_CLOSE_REFUSED, TableHandle);
+ } else {
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer, PERSIST_TIMEOUT);
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ }
+ return IP_SUCCESS;
+ }
+
+ // See if we have a SYN. If we do, we're going to change state
+ // somehow (either to ESTABLISHED or SYN_RCVD).
+ if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
+ RcvTCB->tcb_refcnt++;
+
+ // We have a SYN. Go ahead and record the sequence number and
+ // window info.
+ RcvTCB->tcb_rcvnext = ++RcvInfo.tri_seq;
+
+ if (RcvInfo.tri_flags & TCP_FLAG_URG) {
+ // Urgent data. Update the pointer.
+ if (RcvInfo.tri_urgent != 0)
+ RcvInfo.tri_urgent--;
+ else
+ RcvInfo.tri_flags &= ~TCP_FLAG_URG;
+ }
+
+ RcvTCB->tcb_remmss = FindMSS(TCPH);
+
+ // If there are options, update them now. We already have an
+ // RCE open, so if we have new options we'll have to close
+ // it and open a new one.
+ if (OptInfo->ioi_options != NULL) {
+ if (!(RcvTCB->tcb_flags & CLIENT_OPTIONS)) {
+ (*LocalNetInfo.ipi_updateopts)(OptInfo,
+ &RcvTCB->tcb_opt, Src, NULL_IP_ADDR);
+ (*LocalNetInfo.ipi_closerce)(RcvTCB->tcb_rce);
+ InitRCE(RcvTCB);
+ }
+ } else{
+ RcvTCB->tcb_mss = MIN(RcvTCB->tcb_mss, RcvTCB->tcb_remmss);
+
+ CTEAssert(RcvTCB->tcb_mss > 0);
+
+ }
+
+ RcvTCB->tcb_rexmitcnt = 0;
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+
+ AdjustRcvWin(RcvTCB);
+
+ if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
+ // Our SYN has been acked. Update SND.UNA and stop the
+ // retrans timer.
+ RcvTCB->tcb_senduna = RcvInfo.tri_ack;
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = RcvInfo.tri_window;
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ GoToEstab(RcvTCB);
+
+#ifdef RASAUTODIAL
+ //
+ // Set a bit that informs TCBTimeout to notify
+ // the automatic connection driver of this new
+ // connection. Only set this flag if we
+ // have binded succesfully with the automatic
+ // connection driver.
+ //
+ if (fAcdLoadedG)
+ RcvTCB->tcb_flags |= ACD_CONN_NOTIF;
+#endif // RASAUTODIAL
+
+ // Remove whatever command exists on this connection.
+ CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
+
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ SendACK(RcvTCB);
+
+ // Now handle other data and controls. To do this we need
+ // to reaquire the lock, and make sure we haven't started
+ // closing it.
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ if (!CLOSING(RcvTCB)) {
+ // We haven't started closing it. Turn off the
+ // SYN flag and continue processing.
+ RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
+ if ((RcvInfo.tri_flags & TCP_FLAGS_ALL) != TCP_FLAG_ACK ||
+ Size != 0)
+ goto NotSYNSent;
+ }
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ } else {
+ // A SYN, but not an ACK. Go to SYN_RCVD.
+ RcvTCB->tcb_state = TCB_SYN_RCVD;
+ RcvTCB->tcb_sendnext = RcvTCB->tcb_senduna;
+ SendSYN(RcvTCB, TableHandle);
+
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ } else {
+ // No SYN, just toss the frame.
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ }
+
+ RcvTCB->tcb_refcnt++;
+
+NotSYNSent:
+ // Not in the SYN-SENT state. Check the sequence number. If my window
+ // is 0, I'll truncate all incoming frames but look at some of the
+ // control fields. Otherwise I'll try and make this segment fit into
+ // the window.
+ if (RcvTCB->tcb_rcvwin != 0) {
+ int StateSize; // Size, including state info.
+ SeqNum LastValidSeq; // Sequence number of last valid
+ // byte at RWE.
+
+ // We are offering a window. If this segment starts in front of my
+ // receive window, clip off the front part.
+#if 1 // Bug #63900
+ //Check for the sanity of received sequence.
+ //This is to fix the 1 bit error(MSB) case in the rcv seq.
+ // Also, check the incoming size.
+
+
+ if ((SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) &&
+ ((int)Size >= 0) &&
+ (RcvTCB->tcb_rcvnext - RcvInfo.tri_seq ) > 0) {
+
+#else
+
+ if (SEQ_LT(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
+#endif
+
+ int AmountToClip, FinByte;
+
+ if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
+ // Had a SYN. Clip it off and update the sequence number.
+ RcvInfo.tri_flags &= ~TCP_FLAG_SYN;
+ RcvInfo.tri_seq++;
+ RcvInfo.tri_urgent--;
+ }
+
+ // Advance the receive buffer to point at the new data.
+ AmountToClip = RcvTCB->tcb_rcvnext - RcvInfo.tri_seq;
+ CTEAssert(AmountToClip >= 0);
+
+ // If there's a FIN on this segment, we'll need to account for
+ // it.
+ FinByte = ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
+
+ if (AmountToClip >= (((int) Size) + FinByte)) {
+ // Falls entirely before the window. We have more special
+ // case code here - if the ack. number acks something,
+ // we'll go ahead and take it, faking the sequence number
+ // to be rcvnext. This prevents problems on full duplex
+ // connections, where data has been received but not acked,
+ // and retransmission timers reset the seq. number to
+ // below our rcvnext.
+ if ((RcvInfo.tri_flags & TCP_FLAG_ACK) &&
+ SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ // This contains valid ACK info. Fudge the information
+ // to get through the rest of this.
+ Size = 0;
+ AmountToClip = 0;
+ RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
+ RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN |
+ TCP_FLAG_RST | TCP_FLAG_URG);
+#ifdef DEBUG
+ FinByte = 1; // Fake out assert below.
+#endif
+ } else {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ }
+ }
+
+ // Trim what we have to. If we can't trim enough, the frame
+ // is too short. This shouldn't happen, but it it does we'll
+ // drop the frame.
+ Size -= AmountToClip;
+ RcvInfo.tri_seq += AmountToClip;
+ RcvInfo.tri_urgent -= AmountToClip;
+ RcvBuf = TrimRcvBuf(RcvBuf, AmountToClip);
+ CTEAssert(RcvBuf != NULL);
+ CTEAssert(RcvBuf->ipr_size != 0 ||
+ (Size == 0 && FinByte));
+
+ if (*(int *)&RcvInfo.tri_urgent < 0) {
+ RcvInfo.tri_urgent = 0;
+ RcvInfo.tri_flags &= ~TCP_FLAG_URG;
+ }
+
+ }
+
+ // We've made sure the front is OK. Now make sure part of it doesn't
+ // fall outside of the right edge of the window. If it does,
+ // we'll truncate the frame (removing the FIN, if any). If we
+ // truncate the whole frame we'll ACKAndDrop it.
+ StateSize = Size + ((RcvInfo.tri_flags & TCP_FLAG_SYN) ? 1: 0) +
+ ((RcvInfo.tri_flags & TCP_FLAG_FIN) ? 1: 0);
+
+ if (StateSize)
+ StateSize--;
+
+ // Now the incoming sequence number (RcvInfo.tri_seq) + StateSize
+ // it the last sequence number in the segment. If this is greater
+ // than the last valid byte in the window, we have some overlap
+ // to chop off.
+
+ CTEAssert(StateSize >= 0);
+ LastValidSeq = RcvTCB->tcb_rcvnext + RcvTCB->tcb_rcvwin - 1;
+ if (SEQ_GT(RcvInfo.tri_seq + StateSize, LastValidSeq)) {
+ int AmountToChop;
+
+ // At least some part of the frame is outside of our window.
+ // See if it starts outside our window.
+
+ if (SEQ_GT(RcvInfo.tri_seq, LastValidSeq)) {
+ // Falls entirely outside the window. We have special
+ // case code to deal with a pure ack that falls exactly at
+ // our right window edge. Otherwise we ack and drop it.
+ if (!SEQ_EQ(RcvInfo.tri_seq, LastValidSeq+1) || Size != 0
+ || (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ }
+ } else {
+
+ // At least some part of it is in the window. If there's a
+ // FIN, chop that off and see if that moves us inside.
+ if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
+ RcvInfo.tri_flags &= ~TCP_FLAG_FIN;
+ StateSize--;
+ }
+
+ // Now figure out how much to chop off.
+ AmountToChop = (RcvInfo.tri_seq + StateSize) - LastValidSeq;
+ CTEAssert(AmountToChop >= 0);
+ Size -= AmountToChop;
+
+ }
+ }
+ } else {
+ if (!SEQ_EQ(RcvTCB->tcb_rcvnext, RcvInfo.tri_seq)) {
+
+ // If there's a RST on this segment, and he's only off by 1,
+ // take it anyway. This can happen if the remote peer is
+ // probing and sends with the seq. # after the probe.
+ if (!(RcvInfo.tri_flags & TCP_FLAG_RST) ||
+ !(SEQ_EQ(RcvTCB->tcb_rcvnext, (RcvInfo.tri_seq - 1)))) {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ } else
+ RcvInfo.tri_seq = RcvTCB->tcb_rcvnext;
+ }
+
+ // He's in sequence, but we have a window of 0. Truncate the
+ // size, and clear any sequence consuming bits.
+ if (Size != 0 ||
+ (RcvInfo.tri_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN))) {
+ RcvInfo.tri_flags &= ~(TCP_FLAG_SYN | TCP_FLAG_FIN);
+ Size = 0;
+ if (!(RcvInfo.tri_flags & TCP_FLAG_RST))
+ DelayAction(RcvTCB, NEED_ACK);
+ }
+ }
+
+ // At this point, the segment is in our window and does not overlap
+ // on either end. If it's the next sequence number we expect, we can
+ // handle the data now. Otherwise we'll queue it for later. In either
+ // case we'll handle RST and ACK information right now.
+ CTEAssert((*(int *)&Size) >= 0);
+
+ // Now, following 793, we check the RST bit.
+ if (RcvInfo.tri_flags & TCP_FLAG_RST) {
+ uchar Reason;
+ // We can't go back into the LISTEN state from SYN-RCVD here,
+ // because we may have notified the client via a listen completing
+ // or a connect indication. So, if came from an active open we'll
+ // give back a 'connection refused' notice. For all other cases
+ // we'll just destroy the connection.
+
+ if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
+ if (RcvTCB->tcb_flags & ACTIVE_OPEN)
+ Reason = TCB_CLOSE_REFUSED;
+ else
+ Reason = TCB_CLOSE_RST;
+ } else
+ Reason = TCB_CLOSE_RST;
+
+ TryToCloseTCB(RcvTCB, Reason, TableHandle);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+
+ if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ RemoveTCBFromConn(RcvTCB);
+ NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ }
+
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ // Next check the SYN bit.
+ if (RcvInfo.tri_flags & TCP_FLAG_SYN) {
+ // Again, we can't quietly go back into the LISTEN state here, even
+ // if we came from a passive open.
+ TryToCloseTCB(RcvTCB, TCB_CLOSE_ABORTED, TableHandle);
+ SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
+
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+
+ if (RcvTCB->tcb_state != TCB_TIME_WAIT) {
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ RemoveTCBFromConn(RcvTCB);
+ NotifyOfDisc(RcvTCB, OptInfo, TDI_CONNECTION_RESET);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ }
+
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ // Check the ACK field. If it's not on drop the segment.
+ if (RcvInfo.tri_flags & TCP_FLAG_ACK) {
+ uint UpdateWindow;
+
+ // If we're in SYN-RCVD, go to ESTABLISHED.
+ if (RcvTCB->tcb_state == TCB_SYN_RCVD) {
+ if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ // The ack is valid.
+
+#ifdef SYN_ATTACK
+ if (SynAttackProtect) {
+ CTELockHandle Handle;
+
+ //
+ // We will be reiniting the tcprexmitcnt to 0. If we are
+ // configured for syn-attack protection and the rexmit cnt
+ // is >1, decrement the count of connections that are
+ // in the half-open-retried state. Check whether we are
+ // below a low-watermark. If we are, increase the rexmit
+ // count back to configured values
+ //
+ CTEGetLockAtDPC(&SynAttLock, &Handle);
+ if (RcvTCB->tcb_rexmitcnt >= ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT) {
+ BOOLEAN Trigger;
+ Trigger = (TCPHalfOpen < TCPMaxHalfOpen) ||
+ (--TCPHalfOpenRetried <= TCPMaxHalfOpenRetriedLW);
+ if (Trigger && (MaxConnectResponseRexmitCountTmp == ADAPTED_MAX_CONNECT_RESPONSE_REXMIT_CNT))
+ {
+ MaxConnectResponseRexmitCountTmp = MAX_CONNECT_RESPONSE_REXMIT_CNT;
+ }
+
+ }
+ //
+ // Decrement the # of conn. in half open state
+ //
+ TCPHalfOpen--;
+ CTEFreeLockFromDPC(&SynAttLock, Handle);
+ }
+#endif
+ RcvTCB->tcb_rexmitcnt = 0;
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+ RcvTCB->tcb_senduna++;
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = RcvInfo.tri_window;
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ GoToEstab(RcvTCB);
+
+ // Now complete whatever we can here.
+ CompleteConnReq(RcvTCB, OptInfo, TDI_SUCCESS);
+ } else {
+ DerefTCB(RcvTCB, TableHandle);
+ SendRSTFromHeader(TCPH, Size, Src, Dest, OptInfo);
+ return IP_SUCCESS;
+ }
+ } else {
+
+ // We're not in SYN-RCVD. See if this acknowledges anything.
+ if (SEQ_LT(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ SEQ_LTE(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ uint CWin;
+
+ // The ack acknowledes something. Pull the
+ // appropriate amount off the send q.
+ ACKData(RcvTCB, RcvInfo.tri_ack);
+
+ // If this acknowledges something we were running a RTT on,
+ // update that stuff now.
+ if (RcvTCB->tcb_rtt != 0 && SEQ_GT(RcvInfo.tri_ack,
+ RcvTCB->tcb_rttseq)) {
+ short RTT;
+
+ RTT = (short)(TCPTime - RcvTCB->tcb_rtt);
+ RcvTCB->tcb_rtt = 0;
+ RTT -= (RcvTCB->tcb_smrtt >> 3);
+ RcvTCB->tcb_smrtt += RTT;
+ RTT = (RTT >= 0 ? RTT : -RTT);
+ RTT -= (RcvTCB->tcb_delta >> 3);
+ RcvTCB->tcb_delta += RTT;
+ RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
+ MIN_RETRAN_TICKS), MAX_REXMIT_TO);
+ }
+
+ // If we're probing for a PMTU black hole we've found one, so turn off
+ // the detection. The size is already down, so leave it there.
+ if (RcvTCB->tcb_flags & PMTU_BH_PROBE) {
+ RcvTCB->tcb_flags &= ~PMTU_BH_PROBE;
+ RcvTCB->tcb_bhprobecnt = 0;
+ if (--(RcvTCB->tcb_slowcount) == 0) {
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+ }
+
+ // Update the congestion window now.
+ CWin = RcvTCB->tcb_cwin;
+ if (CWin < RcvTCB->tcb_maxwin) {
+ if (CWin < RcvTCB->tcb_ssthresh)
+ CWin += RcvTCB->tcb_mss;
+ else
+ CWin += (RcvTCB->tcb_mss * RcvTCB->tcb_mss)/CWin;
+
+ RcvTCB->tcb_cwin = MIN(CWin, RcvTCB->tcb_maxwin);
+ }
+
+ CTEAssert(*(int *)&RcvTCB->tcb_cwin > 0);
+
+ // We've acknowledged something, so reset the rexmit count.
+ // If there's still stuff outstanding, restart the rexmit
+ // timer.
+ RcvTCB->tcb_rexmitcnt = 0;
+ if (!SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax))
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
+ RcvTCB->tcb_rexmit);
+ else
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+
+ // If we've sent a FIN, and this acknowledges it, we
+ // need to complete the client's close request and
+ // possibly transition our state.
+
+ if (RcvTCB->tcb_flags & FIN_SENT) {
+ // We have sent a FIN. See if it's been acknowledged.
+ // Once we've sent a FIN, tcb_sendmax
+ // can't advance, so our FIN must have seq. number
+ // tcb_sendmax - 1. Thus our FIN is acknowledged
+ // if the incoming ack is equal to tcb_sendmax.
+ if (SEQ_EQ(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ // He's acked our FIN. Turn off the flags,
+ // and complete the request. We'll leave the
+ // FIN_OUTSTANDING flag alone, to force early
+ // outs in the send code.
+ RcvTCB->tcb_flags &= ~(FIN_NEEDED | FIN_SENT);
+
+ CTEAssert(RcvTCB->tcb_unacked == 0);
+ CTEAssert(RcvTCB->tcb_sendnext ==
+ RcvTCB->tcb_sendmax);
+
+ // Now figure out what we need to do. In FIN_WAIT1
+ // or FIN_WAIT, just complete the disconnect req.
+ // and continue. Otherwise, it's a bit trickier,
+ // since we can't complete the connreq until we
+ // remove the TCB from it's connection.
+ switch (RcvTCB->tcb_state) {
+
+ case TCB_FIN_WAIT1:
+ RcvTCB->tcb_state = TCB_FIN_WAIT2;
+ CompleteConnReq(RcvTCB, OptInfo,
+ TDI_SUCCESS);
+
+ // Start a timer in case we never get
+ // out of FIN_WAIT2. Set the retransmit
+ // count high to force a timeout the
+ // first time the timer fires.
+ RcvTCB->tcb_rexmitcnt = MaxDataRexmitCount;
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
+ FinWait2TO);
+
+ // Fall through to FIN-WAIT-2 processing.
+ case TCB_FIN_WAIT2:
+ break;
+ case TCB_CLOSING:
+ GracefulClose(RcvTCB, TRUE, FALSE,
+ TableHandle);
+ return IP_SUCCESS;
+ break;
+ case TCB_LAST_ACK:
+ GracefulClose(RcvTCB, FALSE, FALSE,
+ TableHandle);
+ return IP_SUCCESS;
+ break;
+ default:
+ DEBUGCHK;
+ break;
+ }
+ }
+
+ }
+ UpdateWindow = TRUE;
+ } else {
+ // It doesn't ack anything. If it's an ack for something
+ // larger than we've sent then ACKAndDrop it, otherwise
+ // ignore it. If we're in FIN_WAIT2, we'll restart the timer.
+ // We don't make this check above because we know no
+ // data can be acked when we're in FIN_WAIT2.
+
+ if (RcvTCB->tcb_state == TCB_FIN_WAIT2)
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer, FinWait2TO);
+
+ if (SEQ_GT(RcvInfo.tri_ack, RcvTCB->tcb_sendmax)) {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ } else {
+ // Now update the window if we can.
+ if (SEQ_EQ(RcvTCB->tcb_senduna, RcvInfo.tri_ack) &&
+ (SEQ_LT(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) ||
+ (SEQ_EQ(RcvTCB->tcb_sendwl1, RcvInfo.tri_seq) &&
+ SEQ_LTE(RcvTCB->tcb_sendwl2, RcvInfo.tri_ack)))) {
+ UpdateWindow = TRUE;
+ } else
+ UpdateWindow = FALSE;
+ }
+ }
+
+ if (UpdateWindow) {
+ RcvTCB->tcb_sendwin = RcvInfo.tri_window;
+ RcvTCB->tcb_maxwin = MAX(RcvTCB->tcb_maxwin,
+ RcvInfo.tri_window);
+ RcvTCB->tcb_sendwl1 = RcvInfo.tri_seq;
+ RcvTCB->tcb_sendwl2 = RcvInfo.tri_ack;
+ if (RcvInfo.tri_window == 0) {
+ // We've got a zero window.
+ if (!EMPTYQ(&RcvTCB->tcb_sendq)) {
+ RcvTCB->tcb_flags &= ~NEED_OUTPUT;
+ RcvTCB->tcb_rexmitcnt = 0;
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
+ RcvTCB->tcb_rexmit);
+ if (!(RcvTCB->tcb_flags & FLOW_CNTLD)) {
+ RcvTCB->tcb_flags |= FLOW_CNTLD;
+ RcvTCB->tcb_slowcount++;
+ RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+ }
+ } else {
+ if (RcvTCB->tcb_flags & FLOW_CNTLD) {
+ RcvTCB->tcb_rexmitcnt = 0;
+ RcvTCB->tcb_rexmit = MIN(MAX(REXMIT_TO(RcvTCB),
+ MIN_RETRAN_TICKS), MAX_REXMIT_TO);
+ if (TCB_TIMER_RUNNING(RcvTCB->tcb_rexmittimer)) {
+ START_TCB_TIMER(RcvTCB->tcb_rexmittimer,
+ RcvTCB->tcb_rexmit);
+ }
+ RcvTCB->tcb_flags &= ~(FLOW_CNTLD | FORCE_OUTPUT);
+ // Reset send next to the left edge of the window,
+ // because it might be at senduna+1 if we've been
+ // probing.
+ ResetSendNext(RcvTCB, RcvTCB->tcb_senduna);
+ if (--(RcvTCB->tcb_slowcount) == 0) {
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+ }
+
+ // Since we've updated the window, see if we can send
+ // some more.
+ if (RcvTCB->tcb_unacked != 0 ||
+ (RcvTCB->tcb_flags & FIN_NEEDED))
+ DelayAction(RcvTCB, NEED_OUTPUT);
+
+ }
+ }
+
+ }
+
+ // We've handled all the acknowledgment stuff. If the size
+ // is greater than 0 or important bits are set process it further,
+ // otherwise it's a pure ack and we're done with it.
+ if (Size > 0 || (RcvInfo.tri_flags & TCP_FLAG_FIN)) {
+
+ // If we're not in a state where we can process incoming data
+ // or FINs, there's no point in going further. Just send an
+ // ack and drop this segment.
+ if (!DATA_RCV_STATE(RcvTCB->tcb_state) ||
+ (RcvTCB->tcb_flags & GC_PENDING)) {
+ ACKAndDrop(&RcvInfo, RcvTCB);
+ return IP_SUCCESS;
+ }
+
+ // If it's in sequence process it now, otherwise reassemble it.
+ if (SEQ_EQ(RcvInfo.tri_seq, RcvTCB->tcb_rcvnext)) {
+
+ // If we're already in the recv. handler, this is a
+ // duplicate. We'll just toss it.
+ if (RcvTCB->tcb_fastchk & TCP_FLAG_IN_RCV) {
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ RcvTCB->tcb_fastchk |= TCP_FLAG_IN_RCV;
+
+ // Now loop, pulling things from the reassembly queue, until
+ // the queue is empty, or we can't take all of the data,
+ // or we hit a FIN.
+
+ do {
+
+ // Handle urgent data, if any.
+ if (RcvInfo.tri_flags & TCP_FLAG_URG) {
+ HandleUrgent(RcvTCB, &RcvInfo, RcvBuf, &Size);
+
+ // Since we may have freed the lock, we need to recheck
+ // and see if we're closing here.
+ if (CLOSING(RcvTCB))
+ break;
+
+ }
+
+
+ // OK, the data is in sequence, we've updated the
+ // reassembly queue and handled any urgent data. If we
+ // have any data go ahead and process it now.
+ if (Size > 0) {
+
+#ifdef VXD
+ CTEFreeLock(&RcvTCB->tcb_lock, TableHandle);
+ BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB,
+ RcvInfo.tri_flags, RcvBuf, Size);
+ CTEGetLock(&RcvTCB->tcb_lock, &TableHandle);
+#else
+ BytesTaken = (*RcvTCB->tcb_rcvhndlr)(RcvTCB,
+ RcvInfo.tri_flags, RcvBuf, Size);
+#endif
+ RcvTCB->tcb_rcvnext += BytesTaken;
+ RcvTCB->tcb_rcvwin -= BytesTaken;
+
+ CheckTCBRcv(RcvTCB);
+ if (RcvTCB->tcb_flags & ACK_DELAYED)
+ DelayAction(RcvTCB, NEED_ACK);
+ else {
+ RcvTCB->tcb_flags |= ACK_DELAYED;
+ START_TCB_TIMER(RcvTCB->tcb_delacktimer,
+ DEL_ACK_TICKS);
+ }
+
+ if (BytesTaken != Size) {
+ // We didn't take everything we could. No
+ // use in further processing, just bail
+ // out.
+ DelayAction(RcvTCB, NEED_ACK);
+ break;
+ }
+
+ // If we're closing now, we're done, so get out.
+ if (CLOSING(RcvTCB))
+ break;
+ }
+
+ // See if we need to advance over some urgent data.
+ if (RcvTCB->tcb_flags & URG_VALID) {
+ uint AdvanceNeeded;
+
+ // We only need to advance if we're not doing
+ // urgent inline. Urgent inline also has some
+ // implications for when we can clear the URG_VALID
+ // flag. If we're not doing urgent inline, we can
+ // clear it when rcvnext advances beyond urgent end.
+ // If we are doing inline, we clear it when rcvnext
+ // advances one receive window beyond urgend.
+ if (!(RcvTCB->tcb_flags & URG_INLINE)) {
+ if (RcvTCB->tcb_rcvnext == RcvTCB->tcb_urgstart)
+ RcvTCB->tcb_rcvnext = RcvTCB->tcb_urgend +
+ 1;
+ else
+ CTEAssert(SEQ_LT(RcvTCB->tcb_rcvnext,
+ RcvTCB->tcb_urgstart) ||
+ SEQ_GT(RcvTCB->tcb_rcvnext,
+ RcvTCB->tcb_urgend));
+ AdvanceNeeded = 0;
+ } else
+ AdvanceNeeded = RcvTCB->tcb_defaultwin;
+
+ // See if we can clear the URG_VALID flag.
+ if (SEQ_GT(RcvTCB->tcb_rcvnext - AdvanceNeeded,
+ RcvTCB->tcb_urgend)) {
+ RcvTCB->tcb_flags &= ~URG_VALID;
+ if (--(RcvTCB->tcb_slowcount) == 0) {
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ }
+ }
+
+ }
+
+ // We've handled the data. If the FIN bit is set, we
+ // have more processing.
+ if (RcvInfo.tri_flags & TCP_FLAG_FIN) {
+ uint Notify = FALSE;
+
+ RcvTCB->tcb_rcvnext++;
+ DelayAction(RcvTCB, NEED_ACK);
+
+ PushData(RcvTCB);
+
+ switch (RcvTCB->tcb_state) {
+
+ case TCB_SYN_RCVD:
+ // I don't think we can get here - we
+ // should have discarded the frame if it
+ // had no ACK, or gone to established if
+ // it did.
+ DEBUGCHK;
+ case TCB_ESTAB:
+ RcvTCB->tcb_state = TCB_CLOSE_WAIT;
+ // We left established, we're off the
+ // fast path.
+ RcvTCB->tcb_slowcount++;
+ RcvTCB->tcb_fastchk |= TCP_FLAG_SLOW;
+ CheckTCBRcv(RcvTCB);
+ Notify = TRUE;
+ break;
+ case TCB_FIN_WAIT1:
+ RcvTCB->tcb_state = TCB_CLOSING;
+ Notify = TRUE;
+ break;
+ case TCB_FIN_WAIT2:
+ // Stop the FIN_WAIT2 timer.
+ STOP_TCB_TIMER(RcvTCB->tcb_rexmittimer);
+ RcvTCB->tcb_refcnt++;
+ GracefulClose(RcvTCB, TRUE, TRUE,
+ TableHandle);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock,
+ &TableHandle);
+ break;
+ default:
+ DEBUGCHK;
+ break;
+ }
+
+ if (Notify) {
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock,
+ TableHandle);
+ NotifyOfDisc(RcvTCB, OptInfo, TDI_GRACEFUL_DISC);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock,
+ &TableHandle);
+ }
+
+ break; // Exit out of WHILE loop.
+ }
+
+ // If the reassembly queue isn't empty, get what we
+ // can now.
+ RcvBuf = PullFromRAQ(RcvTCB, &RcvInfo, &Size);
+
+ CheckRBList(RcvBuf, Size);
+
+ } while (RcvBuf != NULL);
+
+ RcvTCB->tcb_fastchk &= ~TCP_FLAG_IN_RCV;
+ if (RcvTCB->tcb_flags & SEND_AFTER_RCV) {
+ RcvTCB->tcb_flags &= ~SEND_AFTER_RCV;
+ DelayAction(RcvTCB, NEED_OUTPUT);
+ }
+
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+
+ } else {
+
+ // It's not in sequence. Since it needs further processing,
+ // put in on the reassembly queue.
+ if (DATA_RCV_STATE(RcvTCB->tcb_state) &&
+ !(RcvTCB->tcb_flags & GC_PENDING)) {
+ PutOnRAQ(RcvTCB, &RcvInfo, RcvBuf, Size);
+ CTEFreeLockFromDPC(&RcvTCB->tcb_lock, TableHandle);
+ SendACK(RcvTCB);
+ CTEGetLockAtDPC(&RcvTCB->tcb_lock, &TableHandle);
+ DerefTCB(RcvTCB, TableHandle);
+ } else
+ ACKAndDrop(&RcvInfo, RcvTCB);
+
+ return IP_SUCCESS;
+ }
+ }
+
+ } else {
+ // No ACK. Just drop the segment and return.
+ DerefTCB(RcvTCB, TableHandle);
+ return IP_SUCCESS;
+ }
+
+ DerefTCB(RcvTCB, TableHandle);
+ } else // DataOffset <= Size
+ TStats.ts_inerrs++;
+ } else {
+ // Bump bad xsum counter.
+ TStats.ts_inerrs++;
+
+ }
+
+ } else // IsBCast
+ TStats.ts_inerrs++;
+
+
+ return IP_SUCCESS;
+
+}
+
+#pragma BEGIN_INIT
+
+//* InitTCPRcv - Initialize TCP receive side.
+//
+// Called during init time to initialize our TCP receive side.
+//
+// Input: Nothing.
+//
+// Returns: TRUE.
+//
+int
+InitTCPRcv(void)
+{
+#ifdef NT
+ ExInitializeSListHead(&TCPRcvReqFree);
+#endif
+
+ CTEInitLock(&RequestCompleteLock);
+ CTEInitLock(&TCBDelayLock);
+ CTEInitLock(&TCPRcvReqFreeLock);
+ INITQ(&ConnRequestCompleteQ);
+ INITQ(&SendCompleteQ);
+ INITQ(&TCBDelayQ);
+ RequestCompleteFlags = 0;
+ TCBDelayRtnCount = 0;
+
+#ifdef VXD
+ TCBDelayRtnLimit = 1;
+#endif
+#ifdef NT
+ TCBDelayRtnLimit = (uint) (** (PCHAR *) &KeNumberProcessors);
+ if (TCBDelayRtnLimit > TCB_DELAY_RTN_LIMIT)
+ TCBDelayRtnLimit = TCB_DELAY_RTN_LIMIT;
+#endif
+
+ DummyBuf.ipr_owner = IPR_OWNER_IP;
+ DummyBuf.ipr_size = 0;
+ DummyBuf.ipr_next = 0;
+ DummyBuf.ipr_buffer = NULL;
+ return TRUE;
+}
+
+//* UnInitTCPRcv - Uninitialize our receive side.
+//
+// Called if initialization fails to uninitialize our receive side.
+//
+//
+// Input: Nothing.
+//
+// Returns: Nothing.
+//
+void
+UnInitTCPRcv(void)
+{
+
+}
+
+
+#pragma END_INIT
+
+