;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; ;; ;; Copyright (C) KolibriOS team 2004-2010. All rights reserved. ;; ;; Distributed under terms of the GNU General Public License ;; ;; ;; ;; TCP.INC ;; ;; ;; ;; Part of the tcp/ip network stack for KolibriOS ;; ;; ;; ;; Written by hidnplayr@kolibrios.org ;; ;; ;; ;; Based on the code of 4.4BSD ;; ;; ;; ;; GNU GENERAL PUBLIC LICENSE ;; ;; Version 2, June 1991 ;; ;; ;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; $Revision$ ; Socket states TCB_CLOSED equ 0 TCB_LISTEN equ 1 TCB_SYN_SENT equ 2 TCB_SYN_RECEIVED equ 3 TCB_ESTABLISHED equ 4 TCB_CLOSE_WAIT equ 5 TCB_FIN_WAIT_1 equ 6 TCB_CLOSING equ 7 TCB_LAST_ACK equ 8 TCB_FIN_WAIT_2 equ 9 TCB_TIMED_WAIT equ 10 ; Socket Flags TF_ACKNOW equ 1 shl 0 ; ack peer immediately TF_DELACK equ 1 shl 1 ; ack, but try to delay it TF_NODELAY equ 1 shl 2 ; don't delay packets to coalesce TF_NOOPT equ 1 shl 3 ; don't use tcp options TF_SENTFIN equ 1 shl 4 ; have sent FIN TF_REQ_SCALE equ 1 shl 5 ; have/will request window scaling TF_RCVD_SCALE equ 1 shl 6 ; other side has requested scaling TF_REQ_TSTMP equ 1 shl 7 ; have/will request timestamps TF_RCVD_TSTMP equ 1 shl 8 ; a timestamp was received in SYN TF_SACK_PERMIT equ 1 shl 9 ; other side said I could SACK ; Segment flags TH_FIN equ 1 shl 0 TH_SYN equ 1 shl 1 TH_RST equ 1 shl 2 TH_PUSH equ 1 shl 3 TH_ACK equ 1 shl 4 TH_URG equ 1 shl 5 ; Segment header options TCP_OPT_EOL equ 0 ; End of option list. TCP_OPT_NOP equ 1 ; No-Operation. TCP_OPT_MAXSEG equ 2 ; Maximum Segment Size. TCP_OPT_WINDOW equ 3 ; window scale TCP_OPT_TIMESTAMP equ 8 ; Fundamental timer values TCP_time_MSL equ 47 ; max segment lifetime (30s) TCP_time_re_min equ 2 ; min retransmission (1,28s) TCP_time_re_max equ 100 ; max retransmission (64s) TCP_time_pers_min equ 8 ; min persist (5,12s) TCP_time_pers_max equ 94 ; max persist (60,16s) TCP_time_keep_init equ 118 ; connectione stablishment (75,52s) TCP_time_keep_idle equ 4608 ; idle time before 1st probe (2h) TCP_time_keep_interval equ 118 ; between probes when no response (75,52s) TCP_time_rtt_default equ 5 ; default Round Trip Time (3,2s) ; timer constants TCP_max_rxtshift equ 12 ; max retransmissions waiting for ACK TCP_max_keepcnt equ 8 ; max keepalive probes struct TCP_segment .SourcePort dw ? .DestinationPort dw ? .SequenceNumber dd ? .AckNumber dd ? .DataOffset db ? ; DataOffset[0-3 bits] and Reserved[4-7] .Flags db ? ; Reserved[0-1 bits]|URG|ACK|PSH|RST|SYN|FIN .Window dw ? .Checksum dw ? .UrgentPointer dw ? .Data: ; ..or options ends struct tcp_in_queue_entry .data_ptr dd ? .data_size dd ? .offset dd ? .size: ends struct tcp_out_queue_entry .data_ptr dd ? .data_size dd ? .size: ends align 4 uglobal TCP_segments_tx rd IP_MAX_INTERFACES TCP_segments_rx rd IP_MAX_INTERFACES TCP_bytes_rx rq IP_MAX_INTERFACES TCP_bytes_tx rq IP_MAX_INTERFACES TCP_sequence_num dd ? endg ;----------------------------------------------------------------- ; ; TCP_init ; ; This function resets all TCP variables ; ; IN: / ; OUT: / ; ;----------------------------------------------------------------- align 4 TCP_init: xor eax, eax mov edi, TCP_segments_tx mov ecx, (6*IP_MAX_INTERFACES) rep stosd mov [TCP_sequence_num], 1 ret ;---------------------- ; ; ;---------------------- align 4 TCP_timer_160ms: mov eax, net_sockets .loop: mov eax, [eax + SOCKET.NextPtr] or eax, eax jz .exit cmp [eax + SOCKET.Type], IP_PROTO_TCP jne .loop dec [eax + TCP_SOCKET.timer_ack] jnz .loop DEBUGF 1,"TCP ack for socket %x expired, time to piggyback!\n", eax push eax call TCP_respond pop eax jmp .loop .exit: ret ;----------------------------------------------------------------- ; ; ;----------------------------------------------------------------- align 4 TCP_timer_640ms: ; Update TCP sequence number add [TCP_sequence_num], 64000 ; scan through all the active TCP sockets, decrementing ALL timers ; timers do not have the chance to wrap because of the keepalive timer will kill the socket when it expires mov eax, net_sockets .loop: mov eax, [eax + SOCKET.NextPtr] .check_only: or eax, eax jz .exit cmp [eax + SOCKET.Type], IP_PROTO_TCP jne .loop dec [eax + TCP_SOCKET.timer_retransmission] jnz .check_more2 DEBUGF 1,"socket %x: Retransmission timer expired\n", eax push eax call TCP_output pop eax .check_more2: dec [eax + TCP_SOCKET.timer_keepalive] jnz .check_more3 DEBUGF 1,"socket %x: Keepalive expired\n", eax ;;; TODO: check socket state and handle accordingly .check_more3: dec [eax + TCP_SOCKET.timer_timed_wait] jnz .check_more5 DEBUGF 1,"socket %x: 2MSL timer expired\n", eax .check_more5: dec [eax + TCP_SOCKET.timer_persist] jnz .loop DEBUGF 1,"socket %x: persist timer expired\n", eax jmp .loop .exit: ret ;----------------------------------------------------------------- ; ; TCP_input: ; ; IN: [esp] = ptr to buffer ; [esp+4] = buffer size ; ebx = ptr to device struct ; ecx = segment size ; edx = ptr to TCP segment ; ; esi = ipv4 source address ; edi = ipv4 dest address ; ; OUT: / ; ;----------------------------------------------------------------- align 4 TCP_input: DEBUGF 1,"TCP_input\n" ; Offset must be greater than or equal to the size of the standard TCP header (20) and less than or equal to the TCP length. movzx eax, [edx + TCP_segment.DataOffset] and eax, 0xf0 shr al , 2 DEBUGF 1,"data offset: %u\n", eax cmp eax, 20 jl .drop cmp eax, ecx jg .drop ;------------------------------- ; Now, re-calculate the checksum push eax edx ebx push edi push esi mov esi, edx call TCP_checksum ; this destroys edx, ecx and esi (but not edi! :) pop ebx edx eax cmp [edx + TCP_segment.Checksum], 0 jnz .drop DEBUGF 1,"Checksum is correct\n" ;----------------------------------------------------------------------------------------- ; Check if this packet has a timestamp option (We do it here so we can process it quickly) cmp eax, 20 + 12 ; Timestamp option is 12 bytes jl .no_timestamp je .is_ok cmp byte [edx + TCP_segment.Data + 12], 0 ; end of option list jne .no_timestamp .is_ok: test [edx + TCP_segment.Flags], TH_SYN ; SYN flag must not be set jnz .no_timestamp cmp dword [edx + TCP_segment.Data], 0x0101080a ; Timestamp header jne .no_timestamp DEBUGF 1,"timestamp ok\n" ; TODO: Parse the options ; TODO: Set a Bit in the TCP to tell all options are parsed ret .no_timestamp: ;------------------------------------------- ; Convert Big-endian values to little endian ntohld [edx + TCP_segment.SequenceNumber] ntohld [edx + TCP_segment.AckNumber] ntohlw [edx + TCP_segment.Window] ntohlw [edx + TCP_segment.UrgentPointer] ;------------------------------------------------------------ ; Next thing to do is find the TCB (thus, the socket pointer) ; IP Packet TCP Destination Port = local Port ; (IP Packet SenderAddress = Remote IP) OR (Remote IP = 0) ; (IP Packet TCP Source Port = remote Port) OR (remote Port = 0) mov ebx, net_sockets .socket_loop: mov ebx, [ebx + SOCKET.NextPtr] or ebx, ebx jz .drop_with_reset cmp [ebx + SOCKET.Type], IP_PROTO_TCP jne .socket_loop mov ax, [edx + TCP_segment.DestinationPort] cmp [ebx + TCP_SOCKET.LocalPort], ax jne .socket_loop mov eax, [ebx + IP_SOCKET.RemoteIP] cmp eax, esi je @f test eax, eax jnz .socket_loop @@: mov ax, [ebx + TCP_SOCKET.RemotePort] cmp [edx + TCP_segment.SourcePort] , ax je .found_socket test ax, ax jnz .socket_loop .found_socket: DEBUGF 1,"Socket ptr: %x\n", ebx ; ebx now contains the pointer to the socket ;---------------------------- ; Check if socket isnt closed cmp [TCP_SOCKET.t_state], TCB_CLOSED je .drop ;---------------- ; Lock the socket add ebx, SOCKET.lock ; TODO: figure out if we should lock now already call wait_mutex sub ebx, SOCKET.lock ;--------------------------------------- ; unscale the window into a 32 bit value ;;;;;; movzx eax, [edx + TCP_segment.Window] xchg al, ah test [edx + TCP_segment.Flags], TH_SYN jnz .no_syn mov cl , [ebx + TCP_SOCKET.SND_SCALE] shl eax, cl .no_syn: ;----------------------------------- ; Is this socket a listening socket? ; If so, create a new socket test [ebx + SOCKET.options], SO_ACCEPTCON jz .no_accept_conn ; TODO: create a new socket .no_accept_conn: ;---------------------------- ; Compute window scale factor ; TODO ;------------------------------------- ; Reset idle timer and keepalive timer ;;;; TODO: idle timer? mov [ebx + TCP_SOCKET.timer_keepalive], TCP_time_keep_interval ;----------------------------------------- ; Process TCP options if not in LISTEN state test [ebx + TCP_SOCKET.t_state], TCB_LISTEN jz .dont_do_options call TCP_do_options .dont_do_options: ;----------------------------------------------------------------------- ; Time to do some header prediction (Original Principle by Van Jacobson) ; There are two common cases for an uni-directional data transfer. ; ; General rule: the packets has no control flags, is in-sequence, ; window width didnt change and we're not retransmitting. ; ; Second rules: ; - If the length is 0 and the ACK moved forward, we're the sender side of the transfer. ; In this case we'll free the ACK'ed data and notify higher levels that we have free space in buffer ; ; - If the length is not 0 and the ACK didn't move, we're the receiver side of the transfer. ; If the packets are in order (data queue is empty), add the data to the socket buffer and request a delayed ACK cmp [TCP_SOCKET.t_state], TCB_ESTABLISHED jnz .not_uni_xfer test [TCP_segment.Flags], TH_SYN + TH_FIN + TH_RST + TH_URG jnz .not_uni_xfer test [TCP_segment.Flags], TH_ACK jz .not_uni_xfer mov eax, [edx + TCP_segment.SequenceNumber] cmp eax, [ebx + TCP_SOCKET.RCV_NXT] jne .not_uni_xfer movzx eax, [edx + TCP_segment.Window] ;;;;; cmp eax, [ebx + TCP_SOCKET.SND_WND] jne .not_uni_xfer mov eax, [ebx + TCP_SOCKET.SND_NXT] cmp eax, [ebx + TCP_SOCKET.SND_MAX] jne .not_uni_xfer ;------------------------------------------------------------------------------- ; If last ACK falls within this segment's sequence number, record the timestamp. ; TODO: check if it has a timestamp ;--------------------------------------- ; check if we are sender in the uni-xfer ; If the following 4 conditions are all true, this segment is a pure ACK. ; ; - The segment contains no data (ti_len is 0). movzx eax, [edx + TCP_segment.DataOffset] and eax, 11110000b shr eax, 2 sub ecx, eax jnz .not_sender ; - The acknowledgment field in the segment (ti_ack) is greater than the largest unacknowledged sequence number (snd_una). ; Since this test is "greater than" and not "greater than or equal to," it is true only if some positive amount of data is acknowledged by the ACK. mov eax, [edx + TCP_segment.AckNumber] cmp eax, [ebx + TCP_SOCKET.SND_UNA] jle .not_uni_xfer ; - The acknowledgment field in the segment (ti_ack) is less than or equal to the maximum sequence number sent (snd_max). ; mov eax, [edx + TCP_segment.Ack] cmp eax, [ebx + TCP_SOCKET.SND_MAX] jg .not_uni_xfer ; - The congestion window (snd_cwnd) is greater than or equal to the current send window (snd_wnd). ; This test is true only if the window is fully open, that is, the connection is not in the middle of slow start or congestion avoidance. mov eax, [ebx + TCP_SOCKET.SND_CWND] cmp eax, [ebx + TCP_SOCKET.SND_WND] jl .not_uni_xfer DEBUGF 1,"Header prediction: we are sender\n" ;--------------------------------- ; Packet is a pure ACK, process it ; Update RTT estimators ; Delete acknowledged bytes from send buffer ; Stop retransmit timer mov [ebx + TCP_SOCKET.timer_ack], 0 ; Awaken waiting processes mov eax, ebx call SOCKET_notify_owner ; Generate more output call TCP_output jmp .drop ;------------------------------------------------- ; maybe we are the receiver in the uni-xfer then.. .not_sender: ; The amount of data in the segment (ti_len) is greater than 0 (data count is in ecx) ; The acknowledgment field (ti_ack) equals the largest unacknowledged sequence number. This means no data is acknowledged by this segment. mov eax, [edx + TCP_segment.AckNumber] cmp eax, [ebx + TCP_SOCKET.SND_UNA] jne .not_uni_xfer ; The reassembly list of out-of-order segments for the connection is empty (seg_next equals tp). ;;;; jnz .not_uni_xfer ; There is room in the receive buffer for the data in the segment. ;;;; jnz .not_uni_xfer ;------------------------------------- ; Complete processing of received data DEBUGF 1,"header prediction: we are receiver\nreceiving %u bytes of data\n", ecx ; The next expected receive sequence number (rcv_nxt) is incremented by the number of bytes of data. add [ebx + TCP_SOCKET.RCV_NXT], ecx ; Add the data to the socket buffer mov eax, ebx ;;; mov... call SOCKET_input ; The delayed-ACK flag is set and the input processing is complete. jmp .drop ;---------------------------------------------------- ; Header prediction failed, doing it the slow way.. .not_uni_xfer: DEBUGF 1,"Header prediction failed\n" ;------------------------ ; calculate header length ;;;;; we already calculated this before! movzx eax, [edx + TCP_segment.DataOffset] and eax, 0xf0 shr eax, 2 ; Update edx to point to data.. add edx, eax ; ..and ecx to give data size sub ecx, eax ;------------------------------ ; Calculate receive window size ;;;; ;------------------------- ; TCP slow input procedure DEBUGF 1,"TCP slow input procedure\n" cmp [eax + TCP_SOCKET.t_state], TCB_LISTEN je .LISTEN cmp [eax + TCP_SOCKET.t_state], TCB_SYN_SENT je .SYN_SENT ;-------------------------------------------- ; Protection Against Wrapped Sequence Numbers ; First, check timestamp if present ;;;; TODO ; Then, check if at least some bytes of data are within window ;;;; TODO jmp .trim_then_step6 align 4 .LISTEN: DEBUGF 1,"TCP state: listen\n" test [edx + TCP_segment.Flags], TH_RST jnz .drop test [edx + TCP_segment.Flags], TH_ACK jnz .drop_with_reset test [edx + TCP_segment.Flags], TH_SYN jz .drop ; TODO: check if it's a broadcast or multicast, and drop if so ;;; 28.6 ; create a new socket and fill in the nescessary variables ;; Exit if backlog queue is full ; mov ax, [ebx + TCP_SOCKET.backlog_cur] ; cmp ax, [ebx + TCP_SOCKET.backlog] ; jae .exit ; Allocate new socket call SOCKET_alloc ;;; jz .fail ; Copy structure from current socket to new, (including lock!) ; We start at PID to reserve the socket num, and the 2 pointers at beginning of socket lea esi, [edx + SOCKET.PID] lea edi, [eax + SOCKET.PID] mov ecx, (TCP_SOCKET.end - SOCKET.PID + 3)/4 rep movsd ;; Push pointer to new socket to queue ; movzx ecx, [ebx + TCP_SOCKET.backlog_cur] ; inc [ebx + TCP_SOCKET.backlog_cur] ; mov [ebx + TCP_SOCKET.end + ecx*4], eax mov [eax + IP_SOCKET.RemoteIP], esi ; IP source address mov cx, [edx + TCP_segment.SourcePort] mov [eax + TCP_SOCKET.RemotePort], cx mov ecx, [edx + TCP_segment.SequenceNumber] mov [eax + TCP_SOCKET.IRS], ecx mov ecx, [eax + TCP_SOCKET.ISS] mov [eax + TCP_SOCKET.SND_NXT], ecx jmp .trim_then_step6 align 4 .SYN_SENT: DEBUGF 1,"TCP state: syn_sent\n" test [edx + TCP_segment.Flags], TH_ACK jz @f mov eax, [edx + TCP_segment.AckNumber] cmp eax, [ebx + TCP_SOCKET.ISS] jle .drop_with_reset mov eax, [edx + TCP_segment.AckNumber] cmp eax, [ebx + TCP_SOCKET.SND_MAX] jg .drop_with_reset @@: test [edx + TCP_segment.Flags], TH_RST jz @f test [edx + TCP_segment.Flags], TH_ACK jz .drop ;tp = tcp_drop(tp, ECONNREFUSED) jmp .drop @@: test [edx + TCP_segment.Flags], TH_SYN jz .drop ; now, process received SYN in response to an active open test [edx + TCP_segment.Flags], TH_ACK jz @f mov eax, [edx + TCP_segment.AckNumber] mov [ebx + TCP_SOCKET.SND_UNA], eax mov eax, [ebx + TCP_SOCKET.SND_UNA] cmp eax, [ebx + TCP_SOCKET.SND_NXT] jle @f mov [ebx + TCP_SOCKET.SND_NXT], eax mov [ebx + TCP_SOCKET.timer_keepalive], TCP_time_keep_interval mov [ebx + TCP_SOCKET.timer_retransmission], 0 mov eax, [edx + TCP_segment.SequenceNumber] mov [ebx + TCP_SOCKET.IRS], eax ; TODO: set socket state to connected mov [ebx + TCP_SOCKET.t_state], TCB_ESTABLISHED ; TODO: check if we should scale the connection (567-572) ; TODO: update RTT estimators @@: ; We have received a syn but no ACK, so we are having a simultaneous open.. mov [ebx + TCP_SOCKET.t_state], TCB_SYN_RECEIVED ;------------------------------------- ; Common processing for receipt of SYN .trimthenstep6: inc [edx + TCP_segment.SequenceNumber] cmp cx, [ebx + TCP_SOCKET.RCV_WND] jle @f movzx eax, cx sub ax, [ebx + TCP_SOCKET.RCV_WND] ; TODO: 592 mov cx, [ebx + TCP_SOCKET.RCV_WND] ; TODO... @@: ;;;;; ;;; jmp .step6 align 4 .trim_then_step6: DEBUGF 1,"Trim, then step 6\n" ;---------------------------- ; trim any data not in window mov eax, [ebx + TCP_SOCKET.RCV_NXT] sub eax, [edx + TCP_segment.SequenceNumber] test eax, eax jz .no_drop test [edx + TCP_segment.Flags], TH_SYN jz .no_drop and [edx + TCP_segment.Flags], not (TH_SYN) inc [edx + TCP_segment.SequenceNumber] cmp [edx + TCP_segment.UrgentPointer], 1 jl @f dec [edx + TCP_segment.UrgentPointer] jmp .no_drop @@: and [edx + TCP_segment.Flags], not (TH_URG) dec eax .no_drop: ; eax holds number of bytes to drop ;---------------------------------- ; Check for entire duplicate packet cmp eax, ecx jge .duplicate ;;; TODO: figure 28.30 ;; inc [TCP_segments_rx] ;; add dword [TCP_bytes_rx], ecx ;; adc dword [TCP_bytes_rx+4], 0 ;------------------------ ; Check for duplicate FIN test [edx + TCP_segment.Flags], TH_FIN jz @f inc ecx cmp eax, ecx dec ecx jne @f mov eax, ecx and [edx + TCP_segment.Flags], not TH_FIN ;;; TODO: set ACKNOW flag jmp .no_duplicate @@: ; Handle the case when a bound socket connects to itself ; Allow packets with a SYN and an ACKto continue with the processing ;------------------------------------- ; Generate duplicate ACK if nescessary ; This code also handles simultaneous half-open or self-connects test eax, eax jnz .drop_after_ack cmp [edx + TCP_segment.Flags], TH_ACK jz .drop_after_ack .duplicate: ;---------------------------------------- ; Update statistics for duplicate packets ;;; TODO ;;; DROP the packet ?? .no_duplicate: ;----------------------------------------------- ; Remove duplicate data and update urgent offset add [edx + TCP_segment.SequenceNumber], eax ;;; TODO sub [edx + TCP_segment.UrgentPointer], ax jg @f and [edx + TCP_segment.Flags], not (TH_URG) mov [edx + TCP_segment.UrgentPointer], 0 @@: ;-------------------------------------------------- ; Handle data that arrives after process terminates cmp [ebx + SOCKET.PID], 0 jge @f cmp [ebx + TCP_SOCKET.t_state], TCB_CLOSE_WAIT jle @f test ecx, ecx jz @f ;;; Close the socket ;;; update stats jmp .drop_with_reset @@: ;---------------------------------------- ; Remove data beyond right edge of window mov eax, [edx + TCP_segment.SequenceNumber] add eax, ecx sub eax, [ebx + TCP_SOCKET.RCV_NXT] sub ax, [ebx + TCP_SOCKET.RCV_WND] ; eax now holds the number of bytes to drop jle .no_excess_data ;;; TODO: update stats cmp eax, ecx jl .dont_drop_all ;;; TODO 700-736 .dont_drop_all: .no_excess_data: ;----------------- ; Record timestamp ;;; TODO 737-746 ;------------------ ; Process RST flags test [edx + TCP_segment.Flags], TH_RST jz .rst_skip mov eax, [ebx + TCP_SOCKET.t_state] shl eax, 2 jmp dword [eax + .rst_sw_list] .rst_sw_list: dd .rst_skip ;TCB_CLOSED dd .rst_skip ;TCB_LISTEN dd .rst_skip ;TCB_SYN_SENT dd .econnrefused ;TCB_SYN_RECEIVED dd .econnreset ;TCB_ESTABLISHED dd .econnreset ;TCB_CLOSE_WAIT dd .econnreset ;TCB_FIN_WAIT_1 dd .rst_close ;TCB_CLOSING dd .rst_close ;TCB_LAST_ACK dd .econnreset ;TCB_FIN_WAIT_2 dd .rst_close ;TCB_TIMED_WAIT .econnrefused: ;;; TODO: debug info jmp .close .econnreset: ;;; TODO: debug info .close: ;;; update stats .rst_close: ;;; Close the socket jmp .drop .rst_skip: ;-------------------------------------- ; handle SYN-full and ACK-less segments test [edx + TCP_segment.Flags], TH_SYN jz @f ;;; tcp_drop ( ECONNRESET) jmp .drop_with_reset test [edx + TCP_segment.Flags], TH_ACK jz .drop ;---------------- ; Process the ACK cmp [ebx + TCP_SOCKET.t_state], TCB_SYN_RECEIVED jg .ack_dup jl .ack_nodup ;;;;; .ack_dup: ;;;; .ack_nodup: ;;;; 887 ;------------------------------------------------- ; If the congestion window was inflated to account ; for the other side's cached packets, retrace it ;;;; 888 - 902 ;------------------------------------------ ; RTT measurements and retransmission timer ;;;;; 903 - 926 mov [ebx + TCP_SOCKET.timer_retransmission], 0 mov eax, [ebx + TCP_SOCKET.SND_MAX] cmp eax, [edx + TCP_segment.AckNumber] je .all_outstanding mov [ebx + TCP_SOCKET.timer_retransmission], 120 ;;;; TODO: correct this value .all_outstanding: ;------------------------------------------- ; Open congestion window in response to ACKs ;;;; ;------------------------------------------ ; Remove acknowledged data from send buffer ;;;; 943 - 956 ;--------------------------------------- ; Wake up process waiting on send buffer ;;;;; mov eax, [ebx + TCP_SOCKET.t_state] shl eax, 2 jmp dword [eax + .ACK_sw_list] .ACK_sw_list: dd .step6 ;TCB_CLOSED dd .step6 ;TCB_LISTEN dd .step6 ;TCB_SYN_SENT dd .step6 ;TCB_SYN_RECEIVED dd .step6 ;TCB_ESTABLISHED dd .step6 ;TCB_CLOSE_WAIT dd ._963 ;TCB_FIN_WAIT_1 dd ._958 ;TCB_CLOSING dd ._999 ;TCB_LAST_ACK dd .step6 ;TCB_FIN_WAIT_2 dd ._1010 ;TCB_TIMED_WAIT ._963: jmp .step6 ._958: jmp .step6 ._999: jmp .step6 ._1010: jmp .step6 align 4 .step6: DEBUGF 1,"step 6\n" ;-------------------------- ; update window information test [edx + TCP_segment.Flags], TH_ACK jz .no_window_update mov eax, [ebx + TCP_SOCKET.SND_WL1] cmp eax, [edx + TCP_segment.SequenceNumber] ;;;; 1021 ;---------------------------------- ; Keep track of pure window updates test ecx, ecx jz @f mov eax, [ebx + TCP_SOCKET.SND_WL2] cmp eax, [edx + TCP_segment.AckNumber] jne @f ;; mov eax, tiwin cmp eax, [ebx + TCP_SOCKET.SND_WND] jle @f ;;; update stats @@: ;; mov eax, incoming window cmp eax, [ebx + TCP_SOCKET.max_sndwnd] jle @f mov [ebx + TCP_SOCKET.max_sndwnd], eax @@: mov [ebx + TCP_SOCKET.SND_WND], eax mov eax, [edx + TCP_segment.SequenceNumber] mov [ebx + TCP_SOCKET.SND_WL1], eax mov eax, [edx + TCP_segment.AckNumber] mov [ebx + TCP_SOCKET.SND_WL2], eax ;;; needoutput = 1 .no_window_update: ;----------------- ; process URG flag test [edx + TCP_segment.Flags], TH_URG jz .not_urgent cmp [edx + TCP_segment.UrgentPointer], 0 jz .not_urgent cmp [ebx + TCP_SOCKET.t_state], TCB_TIMED_WAIT je .not_urgent ; Ignore bogus urgent offsets ;;; 1040-1050 movzx eax, [edx + TCP_segment.UrgentPointer] add eax, [ebx + SOCKET.SO_RCV.SB_CC] cmp eax, SOCKET_MAXDATA jle .not_urgent mov [edx + TCP_segment.UrgentPointer], 0 and [edx + TCP_segment.Flags], not (TH_URG) jmp .do_data .not_urgent: ;-------------------------------------- ; processing of received urgent pointer ;;; 1051-1093 align 4 .do_data: DEBUGF 1,"Do data:\n" ; process the data in the segment test [edx + TCP_segment.Flags], TH_FIN jz .process_fin test [ebx + TCP_SOCKET.t_state], TCB_FIN_WAIT_1 ;;;;; jge .dont_do_data DEBUGF 1,"Processing data in segment\n" ;;; NOW, process the data jmp .final_processing .dont_do_data: ;--------------- ; FIN processing .process_fin: DEBUGF 1,"Processing FIN\n" mov eax, [ebx + TCP_SOCKET.t_state] shl eax, 2 jmp dword [eax + .FIN_sw_list] .FIN_sw_list: dd .no_fin ;TCB_CLOSED dd .no_fin ;TCB_LISTEN dd .no_fin ;TCB_SYN_SENT dd ._1131 ;TCB_SYN_RECEIVED dd ._1131 ;TCB_ESTABLISHED dd .no_fin ;TCB_CLOSE_WAIT dd ._1139 ;TCB_FIN_WAIT_1 dd .no_fin ;TCB_CLOSING dd .no_fin ;TCB_LAST_ACK dd ._1147 ;TCB_FIN_WAIT_2 dd ._1156 ;TCB_TIMED_WAIT ._1131: ._1139: ._1147: ._1156: .no_fin: ;----------------- ; Final processing .final_processing: DEBUGF 1,"Final processing\n" ;;; if debug enabled, output packet ;test ;;;needoutput = 1 ;jnz .outputnow test [ebx + TCP_SOCKET.t_flags], TF_ACKNOW jz .ret .outputnow: call TCP_output .ret: mov [ebx + SOCKET.lock], 0 call kernel_free ret 4 ;------------------------------------------ ; Generate an ACK, droping incoming segment align 4 .drop_after_ack: DEBUGF 1,"Drop after ACK\n" test [edx + TCP_segment.Flags], TH_RST jnz .drop and [ebx + TCP_SOCKET.t_flags], TF_ACKNOW call TCP_output mov [ebx + SOCKET.lock], 0 call kernel_free ret 4 ;------------------------------------------- ; Generate an RST, dropping incoming segment align 4 .drop_with_reset: DEBUGF 1,"Drop with reset\n" test [edx + TCP_segment.Flags], TH_RST jnz .drop ;;; if its a multicast/broadcast, also drop test [edx + TCP_segment.Flags], TH_ACK jnz .respond_ack test [edx + TCP_segment.Flags], TH_SYN jnz .respond_syn mov [ebx + SOCKET.lock], 0 call kernel_free ret 4 .respond_ack: ;;;; call TCP_respond jmp .destroy_new_socket .respond_syn: ;;;; call TCP_respond jmp .destroy_new_socket ;----- ; Drop align 4 .drop: DEBUGF 1,"Dropping packet\n" ;;;; If debugging options are enabled, output the packet somwhere .destroy_new_socket: ;;;; kill the newly created socket mov [ebx + SOCKET.lock], 0 call kernel_free ret 4 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;--------------------- ; ; TCP_do_options ; ;------------------- align 4 TCP_do_options: DEBUGF 1,"TCP_do_options\n" push eax sub eax, 20 jz .no_options lea esi, [edx + TCP_segment.Data] ;------------------------------------------- ; Begin the loop by checking for EOL and NOP .loop: cmp byte [esi], TCP_OPT_EOL ; end of option list? jz .no_options cmp byte [esi], TCP_OPT_NOP ; nop ? ;;; cmove edi, 1 ; if so, set option size to 1 jz .continue ; and continue scanning ;------------------ ; We have an option movzx edi, byte [esi + 1] ; get the length of this option in edi ;-------------------------------------- ; Check for Maximum segment size option cmp byte [esi], TCP_OPT_MAXSEG jne .no_maxseg cmp edi, 4 ; option length jne .continue test [edx + TCP_segment.Flags], TH_SYN jz .continue ; Now parse the option... jmp .continue .no_maxseg: ;------------------------ ; Check for Window option cmp byte [esi], TCP_OPT_WINDOW jne .no_window cmp edi, 3 ; option length jne .continue test [edx + TCP_segment.Flags], TH_SYN jz .continue ; ... jmp .continue .no_window: ;--------------------------- ; Check for Timestamp option cmp byte [esi], TCP_OPT_TIMESTAMP jne .no_timestamp cmp edi, 10 ; option length jne .continue ; ... jmp .continue .no_timestamp: ;---------------------------------- ; Future options may be placed here ;------------------------------ ; Continue scanning for options .continue: add esi, edi sub eax, edi jg .loop .no_options: pop eax ret ;--------------------------- ; ; TCP_pull_out_of_band ; ; IN: eax = ; ebx = socket ptr ; edx = tcp packet ptr ; ; OUT: / ; ;--------------------------- align 4 TCP_pull_out_of_band: DEBUGF 1,"TCP_pull_out_of_band\n" ;;;; 1282-1305 ret ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;----------------------------------------------------------------- ; ; TCP_output ; ; IN: eax = socket pointer ;; esi = ptr to data ;; ecx = number of data bytes ; ; OUT: / ; ;----------------------------------------------------------------- align 4 TCP_output: DEBUGF 1,"TCP_output, socket: %x\n", eax ; We'll detect the length of the data to be transmitted, and flags to be used ; If there is some data, or any critical controls to send (SYN / RST), then transmit ; Otherwise, investigate further mov ebx, [eax + TCP_SOCKET.SND_MAX] cmp ebx, [eax + TCP_SOCKET.SND_UNA] jne .not_idle mov ebx, [eax + TCP_SOCKET.t_idle] cmp ebx, [eax + TCP_SOCKET.t_rxtcur] jle .not_idle ; We have been idle for a while and no ACKS are expected to clock out any data we send.. ; Slow start to get ack "clock" running again. mov ebx, [eax + TCP_SOCKET.t_maxseg] mov [eax + TCP_SOCKET.SND_CWND], ebx .not_idle: .again: mov ebx, [eax + TCP_SOCKET.SND_NXT] ; calculate offset sub ebx, [eax + TCP_SOCKET.SND_UNA] ; mov ecx, [eax + TCP_SOCKET.SND_WND] ; determine window cmp ecx, [eax + TCP_SOCKET.SND_CWND] ; jl @f ; mov ecx, [eax + TCP_SOCKET.SND_CWND] ; @@: ; call TCP_outflags ; If in persist timeout with window of 0, send 1 byte. ; Otherwise, if window is small but nonzero, and timer expired, ; we will send what we can and go to transmit state test [eax + TCP_SOCKET.t_force], -1 jz .no_persist_timeout test ecx, ecx jnz .no_zero_window cmp ebx, [eax + SOCKET.SO_SND.SB_CC] jge @f and dl, not (TH_FIN) ; clear the FIN flag ??? how can it be set before? @@: inc ecx jmp .no_persist_timeout .no_zero_window: ;;; mov [eax + TCP_SOCKET.t_timer....TCPT_PERSIST], 0 mov [eax + TCP_SOCKET.t_rxtshift], 0 .no_persist_timeout: ;;;106 mov esi, [eax + SOCKET.SO_SND.SB_CC] cmp esi, ecx jl @f mov esi, ecx @@: sub esi, ebx cmp esi, -1 jne .not_minus_one ; If FIN has been set, but not ACKed, and we havent been called to retransmit, ; len (esi) will be -1 ; Otherwise, window shrank after we sent into it. ; If window shrank to 0, cancel pending retransmit and pull SND_NXT back to (closed) window ; We will enter persist state below. ; If window didn't close completely, just wait for an ACK xor esi, esi test ecx, ecx jnz @f ;;; mov [eax + TCP_SOCKET.t_timer..TCPT_REXMT], 0 push [eax + TCP_SOCKET.SND_UNA] pop [eax + TCP_SOCKET.SND_NXT] @@: .not_minus_one: ;;; 124 cmp esi, [eax + TCP_SOCKET.t_maxseg] jle @f mov esi, [eax + TCP_SOCKET.t_maxseg] ;sendalot = 1 @@: ;;; 128 mov edi, [eax + TCP_SOCKET.SND_NXT] add edi, esi ; len sub edi, [eax + TCP_SOCKET.SND_UNA] add edi, [eax + SOCKET.SO_SND.SB_CC] cmp edi, 0 jle @f and dl, not (TH_FIN) ; clear the FIN flag @@: ;;;; 130 TODO: set window (ecx) to space in send buffer ;------------------------------ ; Sender silly window avoidance test esi, esi jz .zero_length cmp esi, [eax + TCP_SOCKET.t_maxseg] je .send ;;; TODO: 144-145 test [eax + TCP_SOCKET.t_force], -1 jnz .send ;;; TODO: 149..152 .zero_length: ;---------------------------------------- ; Check if a window update should be sent cmp ecx, 0 ; window jle .no_window ;;; TODO 154-172 .no_window: ;-------------------------- ; Should a segment be sent? test [ebx + TCP_SOCKET.t_flags], TF_ACKNOW jnz .send test dl, TH_SYN + TH_RST jnz .send mov eax, [ebx + TCP_SOCKET.SND_UP] cmp eax, [ebx + TCP_SOCKET.SND_UNA] jg .send test dl, TH_FIN jz .enter_persist test [ebx + TCP_SOCKET.t_flags], TF_SENTFIN jnz .send mov eax, [ebx + TCP_SOCKET.SND_NXT] cmp eax, [ebx + TCP_SOCKET.SND_UNA] je .send ;-------------------- ; Enter persist state .enter_persist: DEBUGF 1,"Entering persist state\n" ;-------------------------------------- ; No reason to send a segment, just ret DEBUGF 1,"No reason to send a segment\n" ret ;----------------------------------------------- ; ; Send a segment ; ; ebx = socket pointer ; dl = flags ; ;----------------------------------------------- .send: DEBUGF 1,"Preparing to send a segment\n" xor edi, edi ; edi will contain the number of header option bytes ;------------------------------------ ; Send options with first SYN segment test dl, TH_SYN jz .no_options mov eax, [ebx + TCP_SOCKET.ISS] mov [ebx + TCP_SOCKET.SND_NXT], eax test [ebx + TCP_SOCKET.t_flags], TF_NOOPT jnz .no_options mov eax, TCP_OPT_MAXSEG shl 24 + 4 shl 16 mov ax, 1280 ;;;;;; bswap eax push eax mov di, 4 test [ebx + TCP_SOCKET.t_flags], TF_REQ_SCALE jz .no_syn test dl, TH_ACK jnz .scale_opt test [ebx + TCP_SOCKET.t_flags], TF_RCVD_SCALE jz .no_syn .scale_opt: mov eax, TCP_OPT_WINDOW shl 24 + 4 shl 16 + TCP_OPT_NOP mov ah, byte [ebx + TCP_SOCKET.request_r_scale] bswap eax push eax add di, 4 .no_syn: ;------------------------------------ ; Make the timestamp option if needed test [ebx + TCP_SOCKET.t_flags], TF_REQ_TSTMP jz .no_timestamp test dl, TH_RST jnz .no_timestamp test dl, TH_ACK jz .timestamp test [ebx + TCP_SOCKET.t_flags], TF_RCVD_TSTMP jz .no_timestamp .timestamp: DEBUGF 1,"Creating a timestamp\n" push dword (TCP_OPT_TIMESTAMP shl 8 + 10 + TCP_OPT_NOP shl 16 + TCP_OPT_NOP shl 24) pushw 0 mov eax, [timer_ticks] bswap eax push eax add di, 10 .no_timestamp: ;; TODO: check if we dont exceed the max segment size .no_options: add edi, TCP_segment.Data ;----------------------------------- ; Check if we have some data to send ;;; mov ecx, [huppeldepup] test ecx, ecx jz .no_data ;;; 278-316 jmp .header .no_data: ;;; 317-338 ;---------- push di dx ebx add ecx, edi ; total TCP segment size mov eax, [ebx + IP_SOCKET.RemoteIP] mov ebx, [ebx + IP_SOCKET.LocalIP] mov di , IP_PROTO_TCP call IPv4_create_packet ;;;; jz .fail push edx eax call [ebx + NET_DEVICE.transmit] ret ;---------------- ;------------------------------- ; Now, create the 20-byte header .header: ;----------------------- ; Fill in the TCP header pop esi push [esi + TCP_SOCKET.SND_NXT] rol word [esp], 8 rol dword [esp], 16 pop [edi + TCP_segment.SequenceNumber] push [esi + TCP_SOCKET.RCV_NXT] rol word [esp], 8 rol dword [esp], 16 pop [edi + TCP_segment.AckNumber] push [esi + TCP_SOCKET.LocalPort] rol word [esp], 8 pop [edi + TCP_segment.SourcePort] push [esi + TCP_SOCKET.RemotePort] rol word [esp], 8 pop [edi + TCP_segment.DestinationPort] mov [edi + TCP_segment.Window], 0x0005 ; 1280 bytes mov [edi + TCP_segment.UrgentPointer], 0 mov [edi + TCP_segment.DataOffset], 0x50 mov [edi + TCP_segment.Flags], cl mov [edi + TCP_segment.Checksum], 0 ;----- ;-------------- ; Copy the data pop esi push edi add edi, TCP_segment.Data ;; sub ecx, TCP_segment.Data ;;; shr ecx, 1 jnc .nb movsb .nb: shr ecx, 1 jnc .nw movsw .nw: test ecx, ecx jz .nd rep movsd .nd: pop edi ;-------------------- ; Create the checksum push [ebx + IP_SOCKET.LocalIP] push [ebx + IP_SOCKET.RemoteIP] call TCP_checksum ;---------------- ; Send the packet ;;;;; DEBUGF 1,"Sending TCP Packet to device %x\n", ebx call [ebx + NET_DEVICE.transmit] ret ;------------------------- ; ; TCP_outflags ; ; IN: eax = socket ptr ; ; OUT: edx = flags ; ;------------------------- align 4 TCP_outflags: mov edx, [eax + TCP_SOCKET.t_state] movzx edx, byte [edx + .flaglist] DEBUGF 1,"TCP_outflags, socket: %x, flags: %x\n", eax, dl ret .flaglist: db TH_RST + TH_ACK ; TCB_CLOSED db 0 ; TCB_LISTEN db TH_SYN ; TCB_SYN_SENT db TH_SYN + TH_ACK ; TCB_SYN_RECEIVED db TH_ACK ; TCB_ESTABLISHED db TH_ACK ; TCB_CLOSE_WAIT db TH_SYN + TH_ACK ; TCB_FIN_WAIT_1 db TH_SYN + TH_ACK ; TCB_CLOSING db TH_SYN + TH_ACK ; TCB_LAST_ACK db TH_ACK ; TCB_FIN_WAIT_2 db TH_ACK ; TCB_TIMED_WAIT ;------------------------- ; ; TCP_drop ; ; IN: eax = socket ptr ; ; OUT: / ; ;------------------------- align 4 TCP_drop: DEBUGF 1,"TCP_drop\n" ; cmp [eax + TCP_SOCKET.t_state], TCB_SYN_RECEIVED ; jl .no_syn_received mov [eax + TCP_SOCKET.t_state], TCB_CLOSED call TCP_output ; .no_syn_received: ret ;--------------------------------------- ; ; TCP_ack ; ; The easy way to send an ACK/RST/keepalive segment ; ; IN: eax = socket ptr ; -or- ; edx = packet ptr (eax must be 0) ; cl = flags ; ; OUT: / ; ;--------------------------------------- align 4 TCP_respond: DEBUGF 1,"TCP_respond\n" ;--------------------- ; Create the IP packet push cx eax edx mov ebx, [eax + IP_SOCKET.LocalIP] mov eax, [eax + IP_SOCKET.RemoteIP] mov ecx, TCP_segment.Data mov di , IP_PROTO_TCP call IPv4_create_packet test edi, edi jz .error ;--------------------------- ; Now fill in the TCP header pop ecx pop esi test esi, esi ; jz push edx eax push dword .checksum je .use_segment jmp .use_socket ;--------------------- ; Fill in the checksum .checksum: push [esi + IP_SOCKET.LocalIP] push [esi + IP_SOCKET.RemoteIP] lea esi, [edi - 20] xor ecx, ecx call TCP_checksum ;-------------------- ; And send the segment call [ebx + NET_DEVICE.transmit] ret .error: DEBUGF 1,"TCP_ack failed\n" add esp, 4 ret ;--------------------------------------------------- ; Fill in the TCP header by using a received segment .use_segment: mov ax, [esi + TCP_segment.DestinationPort] rol ax, 8 stosw mov ax, [esi + TCP_segment.SourcePort] rol ax, 8 stosw mov eax, [esi + TCP_segment.AckNumber] bswap eax stosd xor eax, eax stosd mov al, 0x50 ; Dataoffset: 20 bytes stosb mov al, cl stosb mov ax, 1280 rol ax, 8 stosw ; window xor eax, eax stosd ; checksum + urgentpointer ret ;----------------------------------------------- ; Fill in the TCP header by using the socket ptr .use_socket: mov ax, [esi + TCP_SOCKET.LocalPort] rol ax, 8 stosw mov ax, [esi + TCP_SOCKET.RemotePort] rol ax, 8 stosw mov eax, [esi + TCP_SOCKET.SND_NXT] bswap eax stosd mov eax, [esi + TCP_SOCKET.RCV_NXT] bswap eax stosd mov al, 0x50 ; Dataoffset: 20 bytes stosb mov al, cl stosb mov ax, [esi + TCP_SOCKET.RCV_WND] rol ax, 8 stosw ; window xor eax, eax stosd ; checksum + urgentpointer ret ;----------------------------------------------------------------- ; ; TCP_checksum ; ; This is the fast procedure to create or check a UDP header ; - To create a new checksum, the checksum field must be set to 0 before computation ; - To check an existing checksum, leave the checksum as is, ; and it will be 0 after this procedure, if it was correct ; ; IN: push source ip ; push dest ip ; ; esi = packet ptr ; ; OUT: checksum is filled in in packet! (but also in dx) ; ;----------------------------------------------------------------- align 4 TCP_checksum: ;------------- ; Pseudoheader ; protocol type mov edx, IP_PROTO_TCP ; NO shl 8 here ! (it took me ages to figure this one out) ; source address add dl, [esp+1+4] adc dh, [esp+0+4] adc dl, [esp+3+4] adc dh, [esp+2+4] ; destination address adc dl, [esp+1+8] adc dh, [esp+0+8] adc dl, [esp+3+8] adc dh, [esp+2+8] ; size adc dl, cl adc dh, ch ;--------------------- ; Real header and data push esi call checksum_1 call checksum_2 pop esi neg [esi+UDP_Packet.Checksum] ; zero will stay zero so we just get the checksum add [esi+UDP_Packet.Checksum], dx ; , else we will get (new checksum - old checksum) in the end, wich should be 0 :) ret 8 ; Remove the IPs from stack ;--------------------------------------------------------------------------- ; ; TCP_API ; ; This function is called by system function 75 ; ; IN: subfunction number in bl ; device number in bh ; ecx, edx, .. depends on subfunction ; ; OUT: ; ;--------------------------------------------------------------------------- align 4 TCP_API: movzx eax, bh shl eax, 2 test bl, bl jz .packets_tx ; 0 dec bl jz .packets_rx ; 1 .error: mov eax, -1 ret .packets_tx: add eax, TCP_segments_tx mov eax, [eax] ret .packets_rx: add eax, TCP_segments_rx mov eax, [eax] ret