kolibrios/kernel/trunk/network/tcp_input.inc
hidnplayr c20f1efa82 TCP: Ack every other received full MSS segment, bugfixes.
git-svn-id: svn://kolibrios.org@7974 a494cfbc-eb01-0410-851d-a64ba20cac60
2020-05-23 15:20:41 +00:00

1928 lines
60 KiB
PHP

;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; ;;
;; Copyright (C) KolibriOS team 2004-2020. All rights reserved. ;;
;; Distributed under terms of the GNU General Public License ;;
;; ;;
;; Part of the TCP/IP network stack for KolibriOS ;;
;; ;;
;; Written by hidnplayr@kolibrios.org ;;
;; ;;
;; Based on the algorithms used in 4.4BSD ;;
;; ;;
;; GNU GENERAL PUBLIC LICENSE ;;
;; Version 2, June 1991 ;;
;; ;;
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
$Revision$
TCP_BIT_NEEDOUTPUT = 1 shl 0
TCP_BIT_TIMESTAMP = 1 shl 1
TCP_BIT_DROPSOCKET = 1 shl 2
TCP_BIT_FIN_IS_ACKED = 1 shl 3
;-----------------------------------------------------------------;
; ;
; TCP_input: Add a segment to the incoming TCP queue. ;
; ;
; IN: [esp] = ptr to buffer ;
; ebx = ptr to device struct ;
; ecx = TCP segment size ;
; edx = ptr to IPv4 header ;
; esi = ptr to TCP segment ;
; edi = interface number*4 ;
; ;
; OUT: / ;
; ;
;-----------------------------------------------------------------;
align 4
tcp_input:
; record the current time
push [timer_ticks] ; in 1/100 seconds
push ebx ecx esi edx ; mind the order (see TCP_queue_entry struct)
mov esi, esp
push edi
add_to_queue TCP_queue, TCP_QUEUE_SIZE, sizeof.TCP_queue_entry, .fail
pop edi
add esp, sizeof.TCP_queue_entry
inc [TCP_segments_rx + edi]
xor edx, edx
mov eax, [TCP_input_event]
mov ebx, [eax + EVENT.id]
xor esi, esi
call raise_event
ret
.fail:
pop edi
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP incoming queue is full, discarding packet!\n"
call net_ptr_to_num4
inc [TCP_segments_missed + edi]
add esp, sizeof.TCP_queue_entry - 4
call net_buff_free
ret
;-----------------------------------------------------------------;
; ;
; TCP_process_input: Process segments from the incoming TCP queue.;
; ;
; IN: / ;
; OUT: / ;
; ;
;-----------------------------------------------------------------;
align 4
proc tcp_process_input
locals
dataoffset dd ?
timestamp dd ?
temp_bits db ?
device dd ?
endl
xor esi, esi
mov ecx, MANUAL_DESTROY
call create_event
mov [TCP_input_event], eax
.wait:
mov eax, [TCP_input_event]
mov ebx, [eax + EVENT.id]
call wait_event
.loop:
get_from_queue TCP_queue, TCP_QUEUE_SIZE, sizeof.TCP_queue_entry, .wait
push [esi + TCP_queue_entry.timestamp]
pop [timestamp]
push [esi + TCP_queue_entry.buffer_ptr]
mov ebx, [esi + TCP_queue_entry.device_ptr]
mov [device], ebx
mov ecx, [esi + TCP_queue_entry.segment_size]
mov edi, [esi + TCP_queue_entry.ip_ptr] ; ptr to ipv4 header
mov esi, [esi + TCP_queue_entry.segment_ptr] ; change esi last
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: size=%u time=%d\n", ecx, [timer_ticks]
mov edx, esi
; Verify the checksum (if not already done by hw)
test [ebx + NET_DEVICE.hwacc], NET_HWACC_TCP_IPv4_IN
jnz .checksum_ok
push ecx esi
pushw [esi + TCP_header.Checksum]
mov [esi + TCP_header.Checksum], 0
tcp_checksum (edi+IPv4_header.SourceAddress), (edi+IPv4_header.DestinationAddress)
pop cx ; previous checksum
cmp cx, dx
pop edx ecx
jne .drop_no_socket
.checksum_ok:
; Verify the data offset
movzx eax, [edx + TCP_header.DataOffset]
and al, 0xf0 ; Calculate TCP segment header size (throwing away unused reserved bits in TCP header)
shr al, 2
cmp al, sizeof.TCP_header ; Now see if it's at least the size of a standard TCP header
jb .drop_no_socket ; If not, drop the packet
mov [dataoffset], eax
sub ecx, eax ; substract TCP header size from total segment size
jb .drop_no_socket ; If total segment size is less then the advertised header size, drop packet
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: %u bytes of data\n", ecx
;-------------------------------------------
; Convert Big-endian values to little endian
ntohd [edx + TCP_header.SequenceNumber]
ntohd [edx + TCP_header.AckNumber]
ntohw [edx + TCP_header.Window]
ntohw [edx + TCP_header.UrgentPointer]
;-----------------------------------------------------------------------------------
;
; Find the socket pointer
;
;-----------------------------------------------------------------------------------
; IP Packet TCP Destination Port = local Port
; (IP Packet SenderAddress = Remote IP) OR (Remote IP = 0)
; (IP Packet TCP Source Port = remote Port) OR (remote Port = 0)
.findpcb:
pusha
mov ecx, socket_mutex
call mutex_lock
popa
mov ebx, net_sockets
mov si, [edx + TCP_header.DestinationPort]
.socket_loop:
mov ebx, [ebx + SOCKET.NextPtr]
or ebx, ebx
jz .no_socket ;respond_seg_reset
cmp [ebx + SOCKET.Domain], AF_INET4
jne .socket_loop
cmp [ebx + SOCKET.Protocol], IP_PROTO_TCP
jne .socket_loop
cmp [ebx + TCP_SOCKET.LocalPort], si
jne .socket_loop
mov eax, [ebx + IP_SOCKET.RemoteIP]
cmp eax, [edi + IPv4_header.SourceAddress]
je @f
test eax, eax
jnz .socket_loop
@@:
mov ax, [ebx + TCP_SOCKET.RemotePort]
cmp [edx + TCP_header.SourcePort], ax
je .found_socket
test ax, ax
jnz .socket_loop
.found_socket: ; ebx now contains the socketpointer
pusha
mov ecx, socket_mutex
call mutex_unlock
popa
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: socket ptr=%x state=%u flags=%x\n", ebx, [ebx + TCP_SOCKET.t_state], [edx + TCP_header.Flags]:2
;----------------------------
; Check if socket isnt closed
cmp [ebx + TCP_SOCKET.t_state], TCPS_CLOSED
je .drop_no_socket
;----------------
; Lock the socket
pusha
lea ecx, [ebx + SOCKET.mutex]
call mutex_lock
popa
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: socket locked\n"
;---------------------------
; disable all temporary bits
mov [temp_bits], 0
;---------------------------------------
; unscale the window into a 32 bit value
movzx eax, [edx + TCP_header.Window]
push ecx
mov cl, [ebx + TCP_SOCKET.SND_SCALE]
shl eax, cl
mov dword[edx + TCP_header.Window], eax ; word after window is checksum, we dont need checksum anymore
pop ecx
;-----------------------------------------------------------------------------------
;
; Accept incoming connections
;
;-----------------------------------------------------------------------------------
test [ebx + SOCKET.options], SO_ACCEPTCON
jz .no_accept
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Accepting new connection\n"
; Unlock current socket
pusha
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
popa
; Fork it
push ecx edx esi edi
call socket_fork
pop edi esi edx ecx
test eax, eax
jz .drop_no_socket
; Success! Use the new socket from now on (it is already locked)
mov ebx, eax
mov [temp_bits], TCP_BIT_DROPSOCKET
push [edi + IPv4_header.DestinationAddress]
pop [ebx + IP_SOCKET.LocalIP]
push [edx + TCP_header.DestinationPort]
pop [ebx + TCP_SOCKET.LocalPort]
mov [ebx + TCP_SOCKET.t_state], TCPS_LISTEN
.no_accept:
;-------------------------------------
; Reset idle timer and keepalive timer
mov [ebx + TCP_SOCKET.t_idle], 0
mov [ebx + TCP_SOCKET.timer_keepalive], TCP_time_keep_idle
or [ebx + TCP_SOCKET.timer_flags], timer_flag_keepalive
;-----------------------------------------------------------------------------------
;
; Process TCP options
;
;-----------------------------------------------------------------------------------
;;; FIXME: for LISTEN, options should be called after we determined route, we need it for MSS
;;; cmp [ebx + TCP_SOCKET.t_state], TCPS_LISTEN ; no options when in listen state
;;; jz .not_uni_xfer ; also no header prediction
push ecx
mov ecx, [dataoffset]
cmp ecx, sizeof.TCP_header ; Does header contain any options?
je .no_options
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Segment has options\n"
add ecx, edx
lea esi, [edx + sizeof.TCP_header]
.opt_loop:
cmp esi, ecx ; are we scanning outside of header?
jae .no_options
lodsb
cmp al, TCP_OPT_EOL ; end of option list?
je .no_options
cmp al, TCP_OPT_NOP
je .opt_loop
cmp al, TCP_OPT_MAXSEG
je .opt_maxseg
cmp al, TCP_OPT_WINDOW
je .opt_window
cmp al, TCP_OPT_SACK_PERMIT
je .opt_sack_permit
; cmp al, TCP_OPT_SACK
; je .opt_sack
cmp al, TCP_OPT_TIMESTAMP
je .opt_timestamp
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: unknown option:%u\n", al
jmp .no_options ; If we reach here, some unknown options were received, skip them all!
.opt_maxseg:
lodsb
cmp al, 4
jne .no_options ; error occured, ignore all options!
test [edx + TCP_header.Flags], TH_SYN
jz @f
xor eax, eax
lodsw
rol ax, 8
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Maxseg=%u\n", eax
call tcp_mss
@@:
jmp .opt_loop
.opt_window:
lodsb
cmp al, 3
jne .no_options
test [edx + TCP_header.Flags], TH_SYN
jz @f
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Got window scale option\n"
or [ebx + TCP_SOCKET.t_flags], TF_RCVD_SCALE
lodsb
mov [ebx + TCP_SOCKET.SND_SCALE], al
;;;;; TODO
@@:
jmp .opt_loop
.opt_sack_permit:
lodsb
cmp al, 2
jne .no_options
test [edx + TCP_header.Flags], TH_SYN
jz @f
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Selective Acknowledgement permitted\n"
or [ebx + TCP_SOCKET.t_flags], TF_SACK_PERMIT
@@:
jmp .opt_loop
.opt_timestamp:
lodsb
cmp al, 10 ; length must be 10
jne .no_options
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Got timestamp option\n"
test [edx + TCP_header.Flags], TH_SYN
jz @f
or [ebx + TCP_SOCKET.t_flags], TF_RCVD_TSTMP
@@:
lodsd
bswap eax
mov [ebx + TCP_SOCKET.ts_val], eax
lodsd ; timestamp echo reply
mov [ebx + TCP_SOCKET.ts_ecr], eax
or [temp_bits], TCP_BIT_TIMESTAMP
; Since we have a timestamp, lets do the paws test right away!
test [edx + TCP_header.Flags], TH_RST
jnz .no_paws
mov eax, [ebx + TCP_SOCKET.ts_recent]
test eax, eax
jz .no_paws
cmp eax, [ebx + TCP_SOCKET.ts_val]
jbe .no_paws
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: PAWS: detected an old segment\n"
mov eax, [timestamp]
sub eax, [ebx + TCP_SOCKET.ts_recent_age]
pop ecx
cmp eax, TCP_PAWS_IDLE
jle .paws_drop
push ecx
mov [ebx + TCP_SOCKET.ts_recent], 0 ; timestamp was invalid, fix it.
.no_paws:
jmp .opt_loop
.paws_drop:
inc [TCPS_rcvduppack]
add [TCPS_rcvdupbyte], ecx
inc [TCPS_pawsdrop]
jmp .drop_after_ack
.no_options:
pop ecx
;-----------------------------------------------------------------------------------
;
; Header prediction
;
;-----------------------------------------------------------------------------------
; According to Van Jacobson, there are two common cases for an uni-directional data transfer.
;
; General rule: the packets has no control flags, is in-sequence,
; window width didnt change and we're not retransmitting.
;
; Second rules:
; - If the length is 0 and the ACK moved forward, we're the sender side of the transfer.
; In this case we'll free the ACK'ed data and notify higher levels that we have free space in buffer
;
; - If the length is not 0 and the ACK didn't move, we're the receiver side of the transfer.
; If the packets are in order (data queue is empty), add the data to the socket buffer and request a delayed ACK
cmp [ebx + TCP_SOCKET.t_state], TCPS_ESTABLISHED
jnz .not_uni_xfer
test [edx + TCP_header.Flags], TH_SYN + TH_FIN + TH_RST + TH_URG
jnz .not_uni_xfer
test [edx + TCP_header.Flags], TH_ACK
jz .not_uni_xfer
mov eax, [edx + TCP_header.SequenceNumber]
cmp eax, [ebx + TCP_SOCKET.RCV_NXT]
jne .not_uni_xfer
mov eax, dword[edx + TCP_header.Window]
cmp eax, [ebx + TCP_SOCKET.SND_WND]
jne .not_uni_xfer
mov eax, [ebx + TCP_SOCKET.SND_NXT]
cmp eax, [ebx + TCP_SOCKET.SND_MAX]
jne .not_uni_xfer
;---------------------------------------
; check if we are sender in the uni-xfer
; If the following 4 conditions are all true, this segment is a pure ACK.
;
; - The segment contains no data.
test ecx, ecx
jnz .not_sender
; - The congestion window is greater than or equal to the current send window.
; This test is true only if the window is fully open, that is, the connection is not in the middle of slow start or congestion avoidance.
mov eax, [ebx + TCP_SOCKET.SND_CWND]
cmp eax, [ebx + TCP_SOCKET.SND_WND]
jb .not_uni_xfer
; - The acknowledgment field in the segment is less than or equal to the maximum sequence number sent.
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.SND_MAX]
ja .not_uni_xfer
; - The acknowledgment field in the segment is greater than the largest unacknowledged sequence number.
sub eax, [ebx + TCP_SOCKET.SND_UNA]
jbe .not_uni_xfer
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Header prediction: we are sender\n"
;---------------------------------
; Packet is a pure ACK, process it
inc [TCPS_predack]
inc [TCPS_rcvackpack]
add [TCPS_rcvackbyte], eax
; Delete acknowledged bytes from send buffer
pusha
mov ecx, eax
lea eax, [ebx + STREAM_SOCKET.snd]
call socket_ring_free
popa
; Update RTT estimators
test [temp_bits], TCP_BIT_TIMESTAMP
jz .no_timestamp_rtt
mov eax, [timestamp]
sub eax, [ebx + TCP_SOCKET.ts_ecr]
inc eax
call tcp_xmit_timer
jmp .rtt_done
.no_timestamp_rtt:
cmp [ebx + TCP_SOCKET.t_rtt], 0
je .rtt_done
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.t_rtseq]
jbe .rtt_done
mov eax, [ebx + TCP_SOCKET.t_rtt]
call tcp_xmit_timer
.rtt_done:
; update window pointers
mov eax, [edx + TCP_header.AckNumber]
mov [ebx + TCP_SOCKET.SND_UNA], eax
; Stop retransmit timer
and [ebx + TCP_SOCKET.timer_flags], not timer_flag_retransmission
; Unlock the socket
pusha
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
popa
; Awaken waiting processes
mov eax, ebx
call socket_notify
; Generate more output
call tcp_output
jmp .drop_no_socket
;-------------------------------------------------
; maybe we are the receiver in the uni-xfer then..
.not_sender:
; - The amount of data in the segment is greater than 0 (data count is in ecx)
; - The acknowledgment field equals the largest unacknowledged sequence number. This means no data is acknowledged by this segment.
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.SND_UNA]
jne .not_uni_xfer
; - The reassembly list of out-of-order segments for the connection is empty.
cmp [ebx + TCP_SOCKET.seg_next], 0
jne .not_uni_xfer
; Complete processing of received data
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Header prediction: we are receiving %u bytes\n", ecx
mov esi, [dataoffset]
add esi, edx
lea eax, [ebx + STREAM_SOCKET.rcv]
call socket_ring_write ; Add the data to the socket buffer
add [ebx + TCP_SOCKET.RCV_NXT], ecx ; Update sequence number with number of bytes we have copied
mov eax, ebx
call socket_notify
or [ebx + TCP_SOCKET.t_flags], TF_DELACK ; Set delayed ack flag
jmp .drop
;-----------------------------------------------------------------------------------
;
; TCP segment processing, the slow way
;
;-----------------------------------------------------------------------------------
.not_uni_xfer:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Header prediction failed\n"
; Calculate receive window size
push edx
mov eax, SOCKET_BUFFER_SIZE
sub eax, [ebx + STREAM_SOCKET.rcv.size]
DEBUGF DEBUG_NETWORK_VERBOSE, "Space in receive buffer=%d\n", eax
mov edx, [ebx + TCP_SOCKET.RCV_ADV]
sub edx, [ebx + TCP_SOCKET.RCV_NXT]
DEBUGF DEBUG_NETWORK_VERBOSE, "Current advertised window=%d\n", edx
cmp eax, edx
jg @f
mov eax, edx
@@:
DEBUGF DEBUG_NETWORK_VERBOSE, "Receive window size=%d\n", eax
mov [ebx + TCP_SOCKET.RCV_WND], eax
pop edx
; If we are in listen or syn_sent state, go to that specific code right away
cmp [ebx + TCP_SOCKET.t_state], TCPS_LISTEN
je .state_listen
cmp [ebx + TCP_SOCKET.t_state], TCPS_SYN_SENT
je .state_syn_sent
;-----------------------------------------------------------------------------------
;
; Trim any data not in window
;
;-----------------------------------------------------------------------------------
;-------------------------------------------------
; Check for duplicate data at beginning of segment
; Calculate number of bytes we need to drop
mov eax, [ebx + TCP_SOCKET.RCV_NXT]
sub eax, [edx + TCP_header.SequenceNumber]
jle .no_duplicate
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: %u bytes duplicate data!\n", eax
; Check for duplicate SYN
test [edx + TCP_header.Flags], TH_SYN
jz .no_dup_syn
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: got duplicate syn\n"
and [edx + TCP_header.Flags], not (TH_SYN)
inc [edx + TCP_header.SequenceNumber]
cmp [edx + TCP_header.UrgentPointer], 1
jbe @f
dec [edx + TCP_header.UrgentPointer]
jmp .dup_syn
@@:
and [edx + TCP_header.Flags], not (TH_URG)
.dup_syn:
dec eax
.no_dup_syn:
;-----------------------------------
; Check for entire duplicate segment
cmp eax, ecx ; eax holds number of bytes to drop, ecx is data size
jb .no_complete_dup
jnz @f
test [edx + TCP_header.Flags], TH_FIN
jnz .no_complete_dup
@@:
; Any valid FIN must be to the left of the window.
; At this point the FIN must be out of sequence or a duplicate, drop it
and [edx + TCP_header.Flags], not TH_FIN
; send an ACK to resynchronize and drop any data.
; But keep on processing for RST or ACK
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
mov eax, ecx
inc [TCPS_rcvduppack]
add [TCPS_rcvdupbyte], eax
jmp .dup_processed
.no_complete_dup:
inc [TCPS_rcvpartduppack]
add [TCPS_rcvpartdupbyte], eax
.dup_processed:
;-----------------------------------------------
; Remove duplicate data and update urgent offset
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: trimming duplicate data\n"
; Trim data from left side of window
add [dataoffset], eax
add [edx + TCP_header.SequenceNumber], eax
sub ecx, eax
sub [edx + TCP_header.UrgentPointer], ax
jg @f
and [edx + TCP_header.Flags], not (TH_URG)
mov [edx + TCP_header.UrgentPointer], 0
@@:
.no_duplicate:
;--------------------------------------------------
; Handle data that arrives after process terminates
cmp [ebx + SOCKET.PID], 0 ;;; TODO: use socket flags instead??
jne .not_terminated
cmp [ebx + TCP_SOCKET.t_state], TCPS_CLOSE_WAIT
jbe .not_terminated
test ecx, ecx
jz .not_terminated
mov eax, ebx
call tcp_close
inc [TCPS_rcvafterclose]
jmp .respond_seg_reset
.not_terminated:
;----------------------------------------
; Remove data beyond right edge of window
mov eax, [edx + TCP_header.SequenceNumber]
add eax, ecx
sub eax, [ebx + TCP_SOCKET.RCV_NXT]
sub eax, [ebx + TCP_SOCKET.RCV_WND] ; eax now holds the number of bytes to drop
jle .no_excess_data
DEBUGF DEBUG_NETWORK_VERBOSE, "%d bytes beyond right edge of window\n", eax
inc [TCPS_rcvpackafterwin]
cmp eax, ecx
jl .dont_drop_all
add [TCPS_rcvbyteafterwin], ecx
;----------------------------------------------------------------------------------------------------
; If a new connection request is received while in TIME_WAIT, drop the old connection and start over,
; if the sequence numbers are above the previous ones
test [edx + TCP_header.Flags], TH_SYN
jz .no_new_request
cmp [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
jne .no_new_request
; mov edx, [ebx + TCP_SOCKET.RCV_NXT]
; cmp edx, [edx + TCP_header.SequenceNumber]
; add edx, 64000 ; TCP_ISSINCR FIXME
mov eax, ebx
call tcp_close
jmp .findpcb ; FIXME: skip code for unscaling window, ...
.no_new_request:
; If window is closed, we can only take segments at window edge, and have to drop data and PUSH from
; incoming segments. Continue processing, but remember to ACK. Otherwise drop segment and ACK
cmp [ebx + TCP_SOCKET.RCV_WND], 0
jne .drop_after_ack
mov esi, [edx + TCP_header.SequenceNumber]
cmp esi, [ebx + TCP_SOCKET.RCV_NXT]
jne .drop_after_ack
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
inc [TCPS_rcvwinprobe]
.dont_drop_all:
add [TCPS_rcvbyteafterwin], eax
DEBUGF DEBUG_NETWORK_VERBOSE, "Trimming %u bytes from the right of the window\n"
; remove data from the right side of window (decrease data length)
sub ecx, eax
and [edx + TCP_header.Flags], not (TH_PUSH or TH_FIN)
.no_excess_data:
;-----------------------------------------------------------------------------------
;
; Record timestamp
;
;-----------------------------------------------------------------------------------
; If last ACK falls within this segments sequence numbers, record its timestamp
test [temp_bits], TCP_BIT_TIMESTAMP
jz .no_timestamp
mov eax, [ebx + TCP_SOCKET.last_ack_sent]
sub eax, [edx + TCP_header.SequenceNumber]
jb .no_timestamp
test [edx + TCP_header.Flags], TH_SYN or TH_FIN ; SYN and FIN occupy one byte
jz @f
dec eax
@@:
sub eax, ecx
jae .no_timestamp
DEBUGF DEBUG_NETWORK_VERBOSE, "Recording timestamp\n"
mov eax, [timestamp]
mov [ebx + TCP_SOCKET.ts_recent_age], eax
mov eax, [ebx + TCP_SOCKET.ts_val]
mov [ebx + TCP_SOCKET.ts_recent], eax
.no_timestamp:
;-----------------------------------------------------------------------------------
;
; Process RST flag
;
;-----------------------------------------------------------------------------------
test [edx + TCP_header.Flags], TH_RST
jz .no_rst
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Got an RST flag\n"
mov eax, [ebx + TCP_SOCKET.t_state]
shl eax, 2
jmp dword [eax + .rst_sw_list]
;-----------------------------------------------------------------------------------
.rst_sw_list:
dd .no_rst ; TCPS_CLOSED
dd .no_rst ; TCPS_LISTEN
dd .no_rst ; TCPS_SYN_SENT
dd .econnrefused ; TCPS_SYN_RECEIVED
dd .econnreset ; TCPS_ESTABLISHED
dd .econnreset ; TCPS_CLOSE_WAIT
dd .econnreset ; TCPS_FIN_WAIT_1
dd .rst_close ; TCPS_CLOSING
dd .rst_close ; TCPS_LAST_ACK
dd .econnreset ; TCPS_FIN_WAIT_2
dd .rst_close ; TCPS_TIME_WAIT
;-----------------------------------------------------------------------------------
.econnrefused:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Connection refused\n"
mov [ebx + SOCKET.errorcode], ECONNREFUSED
jmp .close
;-----------------------------------------------------------------------------------
.econnreset:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Connection reset\n"
mov [ebx + SOCKET.errorcode], ECONNRESET
.close:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Closing connection\n"
mov [ebx + TCP_SOCKET.t_state], TCPS_CLOSED
inc [TCPS_drops]
jmp .drop
;-----------------------------------------------------------------------------------
.rst_close:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Closing with reset\n"
jmp .unlock_and_close
;-----------------------------------------------------------------------------------
.no_rst:
;-----------------------------------------------------------------------------------
;
; Handle SYN-full and ACK-less segments
;
;-----------------------------------------------------------------------------------
; If a SYN is in the window, then this is an error so we send an RST and drop the connection
test [edx + TCP_header.Flags], TH_SYN
jz .not_syn_full
mov eax, ebx
mov ebx, ECONNRESET
call tcp_drop
jmp .drop_with_reset
.not_syn_full:
; If ACK bit is off, we drop the segment and return
test [edx + TCP_header.Flags], TH_ACK
jz .drop
;----------------------------------------------------------------------------------
;
; ACK processing for SYN_RECEIVED state
;
;----------------------------------------------------------------------------------
cmp [ebx + TCP_SOCKET.t_state], TCPS_SYN_RECEIVED
jb .ack_processed ; states: closed, listen, syn_sent
ja .no_syn_rcv ; established, fin_wait_1, fin_wait_2, close_wait, closing, last_ack, time_wait
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: state=syn_received\n"
mov eax, [edx + TCP_header.AckNumber]
cmp [ebx + TCP_SOCKET.SND_UNA], eax
ja .drop_with_reset
cmp eax, [ebx + TCP_SOCKET.SND_MAX]
ja .drop_with_reset
inc [TCPS_connects]
mov eax, ebx
call socket_is_connected
mov [ebx + TCP_SOCKET.t_state], TCPS_ESTABLISHED
; Do window scaling?
test [ebx + TCP_SOCKET.t_flags], TF_RCVD_SCALE
jz @f
test [ebx + TCP_SOCKET.t_flags], TF_REQ_SCALE
jz @f
push word[ebx + TCP_SOCKET.requested_s_scale] ; Set send and receive scale factors to the received values
pop word[ebx + TCP_SOCKET.SND_SCALE]
@@:
call tcp_reassemble
mov eax, [edx + TCP_header.SequenceNumber]
dec eax
mov [ebx + TCP_SOCKET.SND_WL1], eax
.no_syn_rcv:
;-----------------------------------------------------------------------------------
;
; ACK processing for SYN_RECEIVED state and higher
;
;-----------------------------------------------------------------------------------
;-------------------------
; Check for duplicate ACKs
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.SND_UNA]
ja .dup_ack_complete
test ecx, ecx
jnz .reset_dupacks
mov eax, dword[edx + TCP_header.Window]
cmp eax, [ebx + TCP_SOCKET.SND_WND]
jne .reset_dupacks
inc [TCPS_rcvdupack]
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Processing duplicate ACK\n"
; If we have outstanding data, other than a window probe, this is a completely duplicate ACK
; (window info didnt change) The ACK is the biggest we've seen and we've seen exactly our rexmt threshold of them,
; assume a packet has been dropped and retransmit it. Kludge snd_nxt & the congestion window so we send only this one packet.
test [ebx + TCP_SOCKET.timer_flags], timer_flag_retransmission
jz .reset_dupacks
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.SND_UNA]
jne .reset_dupacks
; Increment dupplicat ACK counter
; If it reaches the threshold, re-transmit the missing segment
inc [ebx + TCP_SOCKET.t_dupacks]
cmp [ebx + TCP_SOCKET.t_dupacks], TCP_re_xmit_thresh
jb .dup_ack_complete
ja .another_lost
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Re-transmitting lost segment\n"
push [ebx + TCP_SOCKET.SND_NXT] ; >>>>
mov eax, [ebx + TCP_SOCKET.SND_WND]
cmp eax, [ebx + TCP_SOCKET.SND_CWND]
jbe @f
mov eax, [ebx + TCP_SOCKET.SND_CWND]
@@:
shr eax, 1
push edx
xor edx, edx
div [ebx + TCP_SOCKET.t_maxseg]
cmp eax, 2
ja @f
xor eax, eax
mov al, 2
@@:
mul [ebx + TCP_SOCKET.t_maxseg]
pop edx
mov [ebx + TCP_SOCKET.SND_SSTHRESH], eax
and [ebx + TCP_SOCKET.timer_flags], not timer_flag_retransmission ; turn off retransmission timer
mov [ebx + TCP_SOCKET.t_rtt], 0
mov eax, [edx + TCP_header.AckNumber]
mov [ebx + TCP_SOCKET.SND_NXT], eax
mov eax, [ebx + TCP_SOCKET.t_maxseg]
mov [ebx + TCP_SOCKET.SND_CWND], eax
; Unlock the socket
push ebx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
; retransmit missing segment
mov eax, [esp]
call tcp_output
; Lock the socket again
mov ecx, [esp]
add ecx, SOCKET.mutex
call mutex_lock
pop ebx
; Continue processing
xor edx, edx
mov eax, [ebx + TCP_SOCKET.t_maxseg]
mul [ebx + TCP_SOCKET.t_dupacks]
add eax, [ebx + TCP_SOCKET.SND_SSTHRESH]
mov [ebx + TCP_SOCKET.SND_CWND], eax
pop eax ; <<<<
cmp eax, [ebx + TCP_SOCKET.SND_NXT]
jb @f
mov [ebx + TCP_SOCKET.SND_NXT], eax
@@:
jmp .drop
.another_lost:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Increasing congestion window\n"
mov eax, [ebx + TCP_SOCKET.t_maxseg]
add [ebx + TCP_SOCKET.SND_CWND], eax
; Unlock the socket
push ebx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
; retransmit missing segment, again
mov eax, [esp]
call tcp_output
; Lock the socket again
mov ecx, [esp]
add ecx, SOCKET.mutex
call mutex_lock
pop ebx
; And drop the incoming segment
jmp .drop
.reset_dupacks: ; We got a new ACK, reset duplicate ACK counter
mov [ebx + TCP_SOCKET.t_dupacks], 0
jmp .ack_processed
.dup_ack_complete:
;-------------------------------------------------
; If the congestion window was inflated to account
; for the other side's cached packets, retract it
mov eax, [ebx + TCP_SOCKET.SND_SSTHRESH]
cmp eax, [ebx + TCP_SOCKET.SND_CWND]
ja @f
cmp [ebx + TCP_SOCKET.t_dupacks], TCP_re_xmit_thresh
jbe @f
mov [ebx + TCP_SOCKET.SND_CWND], eax
@@:
mov [ebx + TCP_SOCKET.t_dupacks], 0
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.SND_MAX]
jbe @f
inc [TCPS_rcvacktoomuch]
jmp .drop_after_ack
@@:
mov edi, [edx + TCP_header.AckNumber]
sub edi, [ebx + TCP_SOCKET.SND_UNA] ; now we got the number of acked bytes in edi
inc [TCPS_rcvackpack]
add [TCPS_rcvackbyte], edi
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: acceptable ACK for %u bytes\n", edi
;-----------------------------------------------------------------------------------
;
; RTT measurements and retransmission timer
;
;-----------------------------------------------------------------------------------
; If we have a timestamp, update smoothed RTT
test [temp_bits], TCP_BIT_TIMESTAMP
jz .timestamp_not_present
mov eax, [timestamp]
sub eax, [ebx + TCP_SOCKET.ts_ecr]
inc eax
call tcp_xmit_timer
jmp .rtt_done_
; If no timestamp but transmit timer is running and timed sequence number was acked,
; update smoothed RTT. Since we now have an RTT measurement, cancel the timer backoff
; (Phil Karn's retransmit algo)
; Recompute the initial retransmit timer
.timestamp_not_present:
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.t_rtseq]
jbe .rtt_done_
mov eax, [ebx + TCP_SOCKET.t_rtt]
test eax, eax
jz .rtt_done_
call tcp_xmit_timer
.rtt_done_:
; If all outstanding data is acked, stop retransmit timer and remember to restart (more output or persist)
; If there is more data to be acked, restart retransmit timer, using current (possible backed-off) value.
mov eax, [ebx + TCP_SOCKET.SND_MAX]
cmp eax, [edx + TCP_header.AckNumber]
jne .more_data
and [ebx + TCP_SOCKET.timer_flags], not timer_flag_retransmission
or [temp_bits], TCP_BIT_NEEDOUTPUT
jmp .no_restart
.more_data:
test [ebx + TCP_SOCKET.timer_flags], timer_flag_persist
jnz .no_restart
mov eax, [ebx + TCP_SOCKET.t_rxtcur]
mov [ebx + TCP_SOCKET.timer_retransmission], eax
or [ebx + TCP_SOCKET.timer_flags], timer_flag_retransmission
.no_restart:
;-----------------------------------------------------------------------------------
;
; Open congestion window in response to ACKs
;
;-----------------------------------------------------------------------------------
; If the window gives us less then sstresh packets in flight, open exponentially.
; Otherwise, open lineary
mov esi, [ebx + TCP_SOCKET.SND_CWND]
mov eax, [ebx + TCP_SOCKET.t_maxseg]
cmp esi, [ebx + TCP_SOCKET.SND_SSTHRESH]
jbe @f
push edx
push eax
mul eax ; t_maxseg*t_maxseg
div esi ; t_maxseg*t_maxseg/snd_cwnd
pop edx ; t_maxseg
shr edx, 3 ; t_maxseg/8
add eax, edx ; t_maxseg*t_maxseg/snd_cwnd + t_maxseg/8
pop edx
@@:
add esi, eax
push ecx
mov cl, [ebx + TCP_SOCKET.SND_SCALE]
mov eax, TCP_max_win
shl eax, cl
pop ecx
cmp esi, eax
jbe @f
mov esi, eax
@@:
mov [ebx + TCP_SOCKET.SND_CWND], esi
;-----------------------------------------------------------------------------------
;
; Remove acknowledged data from send buffer
;
;-----------------------------------------------------------------------------------
; If the number of bytes acknowledged exceeds the number of bytes on the send buffer,
; snd_wnd is decremented by the number of bytes in the send buffer and TCP knows
; that its FIN has been ACKed. (FIN occupies 1 byte in the sequence number space)
cmp edi, [ebx + STREAM_SOCKET.snd.size]
jbe .no_fin_ack
; Drop all data in output buffer
push ecx edx ebx
mov ecx, [ebx + STREAM_SOCKET.snd.size]
sub [ebx + TCP_SOCKET.SND_WND], ecx
lea eax, [ebx + STREAM_SOCKET.snd]
call socket_ring_free
pop ebx edx ecx
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: our FIN is acked\n"
or [temp_bits], TCP_BIT_FIN_IS_ACKED
jmp .ack_complete
.no_fin_ack:
; Drop acknowledged data
push ecx edx ebx
mov ecx, edi
lea eax, [ebx + STREAM_SOCKET.snd]
call socket_ring_free
pop ebx
sub [ebx + TCP_SOCKET.SND_WND], ecx
pop edx ecx
.ack_complete:
;-----------------------------------------------------------------------------------
;
; Wake up process waiting on send buffer
;
;-----------------------------------------------------------------------------------
mov eax, ebx
call socket_notify
; Update TCPS
mov eax, [edx + TCP_header.AckNumber]
mov [ebx + TCP_SOCKET.SND_UNA], eax
cmp eax, [ebx + TCP_SOCKET.SND_NXT]
jb @f
mov [ebx + TCP_SOCKET.SND_NXT], eax
@@:
;-----------------------------------------------------------------------------------
;
; State specific ACK handeling
;
;-----------------------------------------------------------------------------------
mov eax, [ebx + TCP_SOCKET.t_state]
jmp dword[.ack_sw_list+eax*4]
.ack_sw_list:
dd .ack_processed ; TCPS_CLOSED
dd .ack_processed ; TCPS_LISTEN
dd .ack_processed ; TCPS_SYN_SENT
dd .ack_processed ; TCPS_SYN_RECEIVED
dd .ack_processed ; TCPS_ESTABLISHED
dd .ack_processed ; TCPS_CLOSE_WAIT
dd .ack_fw1 ; TCPS_FIN_WAIT_1
dd .ack_c ; TCPS_CLOSING
dd .ack_la ; TCPS_LAST_ACK
dd .ack_processed ; TCPS_FIN_WAIT_2
dd .ack_tw ; TCPS_TIMED_WAIT
;-----------------------------------------------------------------------------------
.ack_fw1:
; If our FIN is now acked, enter FIN_WAIT_2
test [temp_bits], TCP_BIT_FIN_IS_ACKED
jz .ack_processed
; If we can't receive any more data, then closing user can proceed.
; Starting the timer is contrary to the specification, but if we dont get a FIN,
; we'll hang forever.
test [ebx + SOCKET.state], SS_CANTRCVMORE
jz @f
mov eax, ebx
call socket_is_disconnected
mov [ebx + TCP_SOCKET.timer_timed_wait], TCP_time_max_idle
or [ebx + TCP_SOCKET.timer_flags], timer_flag_wait
@@:
mov [ebx + TCP_SOCKET.t_state], TCPS_FIN_WAIT_2
jmp .ack_processed
;-----------------------------------------------------------------------------------
.ack_c:
; Enter the TIME_WAIT state if our FIN is acked in CLOSED state.
test [temp_bits], TCP_BIT_FIN_IS_ACKED
jz .ack_processed
mov [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
mov eax, ebx
call tcp_cancel_timers
mov [ebx + TCP_SOCKET.timer_timed_wait], 2 * TCP_time_MSL
or [ebx + TCP_SOCKET.timer_flags], timer_flag_wait
mov eax, ebx
call socket_is_disconnected
jmp .ack_processed
;-----------------------------------------------------------------------------------
.ack_la:
; In LAST_ACK state, we may still be waiting for data to drain and/or to be acked.
; If our FIN is acked however, enter CLOSED state and return.
test [temp_bits], TCP_BIT_FIN_IS_ACKED
jz .ack_processed
.unlock_and_close:
push ebx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
pop eax
call tcp_close
jmp .drop_no_socket
;-----------------------------------------------------------------------------------
.ack_tw:
; In TIME_WAIT state the only thing that should arrive is a retransmission of the remote FIN.
; Acknowledge it and restart the FINACK timer
mov [ebx + TCP_SOCKET.timer_timed_wait], 2*TCP_time_MSL
or [ebx + TCP_SOCKET.timer_flags], timer_flag_2msl
jmp .drop_after_ack
;-----------------------------------------------------------------------------------
;
; Initiation of Passive Open?
;
;-----------------------------------------------------------------------------------
.state_listen:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: state=listen\n"
test [edx + TCP_header.Flags], TH_RST
jnz .drop
test [edx + TCP_header.Flags], TH_ACK
jnz .drop_with_reset
test [edx + TCP_header.Flags], TH_SYN
jz .drop
inc [TCPS_accepts]
;;; TODO: check if it's a broadcast or multicast, and drop if so
;-------------------------------------------
; Processing of SYN received in LISTEN state
push [edi + IPv4_header.SourceAddress]
pop [ebx + IP_SOCKET.RemoteIP]
push [edx + TCP_header.SourcePort]
pop [ebx + TCP_SOCKET.RemotePort]
push [edx + TCP_header.SequenceNumber]
pop [ebx + TCP_SOCKET.IRS]
mov eax, [TCP_sequence_num]
add [TCP_sequence_num], TCP_ISSINCR / 2
mov [ebx + TCP_SOCKET.ISS], eax
mov [ebx + TCP_SOCKET.SND_NXT], eax
tcp_sendseqinit ebx
tcp_rcvseqinit ebx
mov [ebx + TCP_SOCKET.t_state], TCPS_SYN_RECEIVED
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
mov [ebx + TCP_SOCKET.timer_keepalive], TCP_time_keep_interval ;;;; macro
or [ebx + TCP_SOCKET.timer_flags], timer_flag_keepalive
lea eax, [ebx + STREAM_SOCKET.snd]
call socket_ring_create
test eax, eax
jz .drop
lea eax, [ebx + STREAM_SOCKET.rcv]
call socket_ring_create
test eax, eax
jz .drop
and [temp_bits], not TCP_BIT_DROPSOCKET
pusha
mov eax, ebx
call socket_notify
popa
jmp .trim
;-----------------------------------------------------------------------------------
;
; Completion of active open?
;
;-----------------------------------------------------------------------------------
.state_syn_sent:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: state=syn_sent\n"
test [edx + TCP_header.Flags], TH_ACK
jz @f
mov eax, [edx + TCP_header.AckNumber]
cmp eax, [ebx + TCP_SOCKET.ISS]
jbe .drop_with_reset
cmp eax, [ebx + TCP_SOCKET.SND_MAX]
ja .drop_with_reset
@@:
test [edx + TCP_header.Flags], TH_RST
jz @f
test [edx + TCP_header.Flags], TH_ACK
jz .drop
mov eax, ebx
mov ebx, ECONNREFUSED
call tcp_drop
jmp .drop
@@:
;-----------------------------------------------------------------------------------
;
; Process received SYN in response to an active open
;
;-----------------------------------------------------------------------------------
test [edx + TCP_header.Flags], TH_SYN
jz .drop
test [edx + TCP_header.Flags], TH_ACK
jz @f
mov eax, [edx + TCP_header.AckNumber]
mov [ebx + TCP_SOCKET.SND_UNA], eax
cmp eax, [ebx + TCP_SOCKET.SND_NXT]
jbe @f
mov [ebx + TCP_SOCKET.SND_NXT], eax
and [ebx + TCP_SOCKET.timer_flags], not timer_flag_retransmission ; disable retransmission timer
@@:
push [edx + TCP_header.SequenceNumber]
pop [ebx + TCP_SOCKET.IRS]
tcp_rcvseqinit ebx
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
mov eax, [ebx + TCP_SOCKET.SND_UNA]
cmp eax, [ebx + TCP_SOCKET.ISS]
jbe .simultaneous_open
test [edx + TCP_header.Flags], TH_ACK
jz .simultaneous_open
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: active open\n"
inc [TCPS_connects]
; set socket state to connected
push eax
mov eax, ebx
call socket_is_connected
pop eax
mov [ebx + TCP_SOCKET.t_state], TCPS_ESTABLISHED
; Do window scaling on this connection ?
mov eax, [ebx + TCP_SOCKET.t_flags]
and eax, TF_REQ_SCALE or TF_RCVD_SCALE
cmp eax, TF_REQ_SCALE or TF_RCVD_SCALE
jne .no_scaling
mov ax, word[ebx + TCP_SOCKET.requested_s_scale]
mov word[ebx + TCP_SOCKET.SND_SCALE], ax
.no_scaling:
;;; TODO: reassemble packets queue
; If we didnt have time to re-transmit the SYN,
; Use its rtt as our initial srtt & rtt var.
mov eax, [ebx + TCP_SOCKET.t_rtt]
test eax, eax
je .trim
call tcp_xmit_timer
jmp .trim
;-----------------------------------------------------------------------------------
;
; Simultaneous open (We have received a SYN but no ACK)
;
;-----------------------------------------------------------------------------------
.simultaneous_open:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: simultaneous open\n"
mov [ebx + TCP_SOCKET.t_state], TCPS_SYN_RECEIVED
;-----------------------------------------------------------------------------------
;
; Common processing for receipt of SYN
;
;-----------------------------------------------------------------------------------
.trim:
; Advance sequence number to correspond to first data byte.
; If data, trim to stay within window, dropping FIN if necessary
inc [edx + TCP_header.SequenceNumber]
; Drop any received data that doesnt fit in the receive window.
cmp ecx, [ebx + TCP_SOCKET.RCV_WND]
jbe .dont_trim
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: received data does not fit in window, trimming %u bytes\n", eax
inc [TCPS_rcvpackafterwin]
sub ecx, [ebx + TCP_SOCKET.RCV_WND]
add [TCPS_rcvbyteafterwin], ecx
and [edx + TCP_header.Flags], not (TH_FIN)
mov ecx, [ebx + TCP_SOCKET.RCV_WND]
.dont_trim:
mov eax, [edx + TCP_header.SequenceNumber]
mov [ebx + TCP_SOCKET.RCV_UP], eax
dec eax
mov [ebx + TCP_SOCKET.SND_WL1], eax
;-----------------------------------------------------------------------------------
;
; Update window information (step 6 in RFC793)
;
;-----------------------------------------------------------------------------------
.ack_processed:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: ACK processed\n"
; dont look at window if no ACK
test [edx + TCP_header.Flags], TH_ACK
jz .no_window_update
; Does the segment contain new data?
mov eax, [ebx + TCP_SOCKET.SND_WL1]
cmp eax, [edx + TCP_header.SequenceNumber]
jb .update_window
ja @f
; No new data but a new ACK ?
mov eax, [ebx + TCP_SOCKET.SND_WL2]
cmp eax, [edx + TCP_header.AckNumber]
jb .update_window
@@:
; No new data or ACK but advertised window is larger then current window?
mov eax, [ebx + TCP_SOCKET.SND_WL2]
cmp eax, [edx + TCP_header.AckNumber]
jne .no_window_update
mov eax, dword[edx + TCP_header.Window]
cmp eax, [ebx + TCP_SOCKET.SND_WND]
jbe .no_window_update
; Keep track of pure window updates
.update_window:
test ecx, ecx
jnz @f
mov eax, [ebx + TCP_SOCKET.SND_WL2]
cmp eax, [edx + TCP_header.AckNumber]
jne @f
mov eax, dword[edx + TCP_header.Window]
cmp eax, [ebx + TCP_SOCKET.SND_WND]
jbe @f
inc [TCPS_rcvwinupd]
@@:
mov eax, dword[edx + TCP_header.Window]
mov [ebx + TCP_SOCKET.SND_WND], eax
cmp eax, [ebx + TCP_SOCKET.max_sndwnd]
jbe @f
mov [ebx + TCP_SOCKET.max_sndwnd], eax
@@:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Updating window to %u\n", eax
push [edx + TCP_header.SequenceNumber]
pop [ebx + TCP_SOCKET.SND_WL1]
push [edx + TCP_header.AckNumber]
pop [ebx + TCP_SOCKET.SND_WL2]
or [temp_bits], TCP_BIT_NEEDOUTPUT
.no_window_update:
;-----------------------------------------------------------------------------------
;
; Process URG flag
;
;-----------------------------------------------------------------------------------
test [edx + TCP_header.Flags], TH_URG
jz .not_urgent
cmp [edx + TCP_header.UrgentPointer], 0
jz .not_urgent
cmp [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
je .not_urgent
; Ignore bogus urgent offsets
movzx eax, [edx + TCP_header.UrgentPointer]
add eax, [ebx + STREAM_SOCKET.rcv.size]
cmp eax, SOCKET_BUFFER_SIZE
jbe .not_urgent
mov [edx + TCP_header.UrgentPointer], 0
and [edx + TCP_header.Flags], not (TH_URG)
jmp .do_data
.not_urgent:
; processing of received urgent pointer
;;; TODO (1051-1093)
;-----------------------------------------------------------------------------------
;
; Process the data
;
;-----------------------------------------------------------------------------------
.do_data:
cmp [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
jae .final_processing
test [edx + TCP_header.Flags], TH_FIN
jnz @f
test ecx, ecx
jz .final_processing
@@:
; The segment is in order?
mov eax, [edx + TCP_header.SequenceNumber]
cmp eax, [ebx + TCP_SOCKET.RCV_NXT]
jne .out_of_order
; The reassembly queue is empty?
cmp [ebx + TCP_SOCKET.seg_next], 0
jne .out_of_order
; The connection is established?
cmp [ebx + TCP_SOCKET.t_state], TCPS_ESTABLISHED
jne .out_of_order
; Ok, lets do this.. Set delayed ACK flag and copy data into socket buffer
or [ebx + TCP_SOCKET.t_flags], TF_DELACK
pusha
mov esi, [dataoffset]
add esi, edx
lea eax, [ebx + STREAM_SOCKET.rcv]
call socket_ring_write ; Add the data to the socket buffer
add [ebx + TCP_SOCKET.RCV_NXT], ecx ; Update sequence number with number of bytes we have copied
popa
; Wake up the sleeping process
mov eax, ebx
call socket_notify
jmp .data_done
.out_of_order:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP data is out of order!\nSequencenumber is %u, we expected %u.\n", \
[edx + TCP_header.SequenceNumber], [ebx + TCP_SOCKET.RCV_NXT]
; Uh-oh, some data is out of order, lets call TCP reassemble for help
call tcp_reassemble ;;; TODO!
; Generate ACK immediately, to let the other end know that a segment was received out of order,
; and to tell it what sequence number is expected. This aids the fast-retransmit algorithm.
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
jmp .final_processing ;;; HACK because of unimplemented reassembly queue!
.data_done:
;-----------------------------------------------------------------------------------
;
; Process FIN
;
;-----------------------------------------------------------------------------------
test [edx + TCP_header.Flags], TH_FIN
jz .final_processing
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Processing FIN\n"
cmp [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
jae .not_first_fin
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: First FIN for this connection\n"
mov eax, ebx
call socket_cant_recv_more
or [ebx + TCP_SOCKET.t_flags], TF_ACKNOW
inc [ebx + TCP_SOCKET.RCV_NXT]
.not_first_fin:
mov eax, [ebx + TCP_SOCKET.t_state]
jmp dword[.fin_sw_list+eax*4]
.fin_sw_list:
dd .final_processing ; TCPS_CLOSED
dd .final_processing ; TCPS_LISTEN
dd .final_processing ; TCPS_SYN_SENT
dd .fin_syn_est ; TCPS_SYN_RECEIVED
dd .fin_syn_est ; TCPS_ESTABLISHED
dd .final_processing ; TCPS_CLOSE_WAIT
dd .fin_wait1 ; TCPS_FIN_WAIT_1
dd .final_processing ; TCPS_CLOSING
dd .final_processing ; TCPS_LAST_ACK
dd .fin_wait2 ; TCPS_FIN_WAIT_2
dd .fin_timed ; TCPS_TIMED_WAIT
;-----------------------------------------------------------------------------------
.fin_syn_est:
; In SYN_RECEIVED and ESTABLISHED state, enter the CLOSE_WAIT state
mov [ebx + TCP_SOCKET.t_state], TCPS_CLOSE_WAIT
jmp .final_processing
;-----------------------------------------------------------------------------------
.fin_wait1:
; From FIN_WAIT_1 state, enter CLOSING state (our FIN has not been ACKed)
mov [ebx + TCP_SOCKET.t_state], TCPS_CLOSING
jmp .final_processing
;-----------------------------------------------------------------------------------
.fin_wait2:
; From FIN_WAIT_2 state, enter TIME_WAIT state and start the timer
mov [ebx + TCP_SOCKET.t_state], TCPS_TIME_WAIT
mov eax, ebx
call tcp_cancel_timers
call socket_is_disconnected
;-----------------------------------------------------------------------------------
.fin_timed:
; (re)start the 2 MSL timer
mov [ebx + TCP_SOCKET.timer_timed_wait], 2 * TCP_time_MSL
or [ebx + TCP_SOCKET.timer_flags], timer_flag_wait
;-----------------------------------------------------------------------------------
;
; Finally, drop the segment
;
;-----------------------------------------------------------------------------------
.final_processing:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Final processing\n"
push ebx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
pop eax
test [temp_bits], TCP_BIT_NEEDOUTPUT
jnz .need_output
test [eax + TCP_SOCKET.t_flags], TF_ACKNOW
jz .done
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: ACK now!\n"
.need_output:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: need output\n"
call tcp_output
.done:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: dumping\n"
call net_buff_free
jmp .loop
;-----------------------------------------------------------------------------------
;
; Drop segment, reply with an RST segment when needed
;
;-----------------------------------------------------------------------------------
;-----------------------------------------------------------------------------------
.drop_after_ack:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Drop after ACK\n"
push edx ebx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
pop eax edx
test [edx + TCP_header.Flags], TH_RST
jnz .done
or [eax + TCP_SOCKET.t_flags], TF_ACKNOW
jmp .need_output
;-----------------------------------------------------------------------------------
.drop_with_reset:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Drop with reset\n"
push ebx edx
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
pop edx ebx
test [edx + TCP_header.Flags], TH_RST
jnz .done
; TODO: if its a multicast/broadcast, also drop
test [edx + TCP_header.Flags], TH_ACK
jnz .respond_ack
test [edx + TCP_header.Flags], TH_SYN
jnz .respond_syn
jmp .done
.respond_ack:
push ebx
mov cl, TH_RST
call tcp_respond
pop ebx
jmp .destroy_new_socket
.respond_syn:
push ebx
mov cl, TH_RST + TH_ACK
call tcp_respond
pop ebx
jmp .destroy_new_socket
;-----------------------------------------
; The connection has no associated socket
.no_socket:
pusha
mov ecx, socket_mutex
call mutex_unlock
popa
.respond_seg_reset:
test [edx + TCP_header.Flags], TH_RST
jnz .drop_no_socket
; TODO: if its a multicast/broadcast, also drop
test [edx + TCP_header.Flags], TH_ACK
jnz .respond_seg_ack
test [edx + TCP_header.Flags], TH_SYN
jnz .respond_seg_syn
jmp .drop_no_socket
.respond_seg_ack:
mov cl, TH_RST
mov ebx, [device]
call tcp_respond_segment
jmp .drop_no_socket
.respond_seg_syn:
mov cl, TH_RST + TH_ACK
mov ebx, [device]
call tcp_respond_segment
jmp .drop_no_socket
;------------------------------------------------
; Unlock socket mutex and prepare to drop segment
.drop:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Dropping segment\n"
pusha
lea ecx, [ebx + SOCKET.mutex]
call mutex_unlock
popa
;--------------------------------------------
; Destroy the newly created socket if needed
.destroy_new_socket:
test [temp_bits], TCP_BIT_DROPSOCKET
jz .drop_no_socket
mov eax, ebx
call socket_free
;------------------
; Drop the segment
.drop_no_socket:
DEBUGF DEBUG_NETWORK_VERBOSE, "TCP_input: Drop (no socket)\n"
call net_buff_free
jmp .loop
endp