From f3ab4896ed13733a22e9637395973fc1808823e1 Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Mon, 19 Feb 2018 15:23:13 +0100 Subject: SCTP: congestion control This patch addresses the requirements depicted by section 7.1.1 and 7.1.2 of the RFC 4960. Specifically, it implements the Slow-start and Congestion-avoidance policies. The patch also took care of correctly implementing some 'formatting' functions required - for instance - in packet(s) tracing. Change-Id: I68eade1b30345de3acb3ac8a653a5ef76eb6d2ac Signed-off-by: Marco Varlese --- src/vnet/sctp/sctp.c | 187 +++++++++++++++++--------------------------- src/vnet/sctp/sctp.h | 141 +++++++++++++++++++++++++++------ src/vnet/sctp/sctp_input.c | 61 ++++++++------- src/vnet/sctp/sctp_output.c | 45 +++++++---- src/vnet/sctp/sctp_packet.h | 1 - 5 files changed, 251 insertions(+), 184 deletions(-) (limited to 'src') diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c index 224c97d419e..b81d4d0e9ea 100644 --- a/src/vnet/sctp/sctp.c +++ b/src/vnet/sctp/sctp.c @@ -42,6 +42,8 @@ sctp_connection_bind (u32 session_index, transport_endpoint_t * tep) ip_copy (&listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_ip, &tep->ip, tep->is_ip4); + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].PMTU = + vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index, VLIB_TX); listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.is_ip4 = tep->is_ip4; listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto = TRANSPORT_PROTO_SCTP; @@ -178,25 +180,44 @@ format_sctp_state (u8 * s, va_list * args) u8 * format_sctp_connection_id (u8 * s, va_list * args) { - /* - sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); - if (!sctp_conn) - return s; - if (sctp_conn->c_is_ip4) - { - s = format (s, "[#%d][%s] %U:%d->%U:%d", sctp_conn->c_thread_index, "T", - format_ip4_address, &sctp_conn->c_lcl_ip4, - clib_net_to_host_u16 (sctp_conn->c_lcl_port), format_ip4_address, - &sctp_conn->c_rmt_ip4, clib_net_to_host_u16 (sctp_conn->c_rmt_port)); - } - else - { - s = format (s, "[#%d][%s] %U:%d->%U:%d", sctp_conn->c_thread_index, "T", - format_ip6_address, &sctp_conn->c_lcl_ip6, - clib_net_to_host_u16 (sctp_conn->c_lcl_port), format_ip6_address, - &sctp_conn->c_rmt_ip6, clib_net_to_host_u16 (sctp_conn->c_rmt_port)); - } - */ + sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); + if (!sctp_conn) + return s; + + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].connection.is_ip4) + { + s = format (s, "%U[#%d][%s] %U:%d->%U:%d", + s, + sctp_conn->sub_conn[i].connection.thread_index, + "T", + format_ip4_address, + &sctp_conn->sub_conn[i].connection.lcl_ip.ip4, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.lcl_port), + format_ip4_address, + &sctp_conn->sub_conn[i].connection.rmt_ip.ip4, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.rmt_port)); + } + else + { + s = format (s, "%U[#%d][%s] %U:%d->%U:%d", + s, + sctp_conn->sub_conn[i].connection.thread_index, + "T", + format_ip6_address, + &sctp_conn->sub_conn[i].connection.lcl_ip.ip6, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.lcl_port), + format_ip6_address, + &sctp_conn->sub_conn[i].connection.rmt_ip.ip6, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.rmt_port)); + } + } return s; } @@ -235,48 +256,11 @@ sctp_init_snd_vars (sctp_connection_t * sctp_conn) time_now = sctp_time_now (); sctp_conn->local_initial_tsn = random_u32 (&time_now); - sctp_conn->remote_initial_tsn = 0x0; - sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->last_unacked_tsn = sctp_conn->local_initial_tsn; sctp_conn->next_tsn = sctp_conn->local_initial_tsn + 1; -} - -/** - * Update max segment size we're able to process. - * - * The value is constrained by our interface's MTU and IP options. It is - * also what we advertise to our peer. - */ -void -sctp_update_rcv_mss (sctp_connection_t * sctp_conn) -{ - sctp_conn->smallest_PMTU = DEFAULT_A_RWND; /* TODO find our iface MTU */ - sctp_conn->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t); - sctp_conn->rcv_opts.a_rwnd = sctp_conn->a_rwnd; - sctp_conn->rcv_a_rwnd = sctp_conn->a_rwnd; /* This will be updated by our congestion algos */ -} - -void -sctp_init_mss (sctp_connection_t * sctp_conn) -{ - SCTP_DBG ("CONN_INDEX = %u", - sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); - - u16 default_a_rwnd = 536; - sctp_update_rcv_mss (sctp_conn); - - /* TODO cache mss and consider PMTU discovery */ - sctp_conn->snd_a_rwnd = - clib_min (sctp_conn->rcv_opts.a_rwnd, sctp_conn->a_rwnd); - if (sctp_conn->snd_a_rwnd < sizeof (sctp_full_hdr_t)) - { - SCTP_ADV_DBG ("sctp_conn->snd_a_rwnd < sizeof(sctp_full_hdr_t)"); - /* Assume that at least the min default mss works */ - sctp_conn->snd_a_rwnd = default_a_rwnd; - sctp_conn->rcv_opts.a_rwnd = default_a_rwnd; - } - - ASSERT (sctp_conn->snd_a_rwnd > sizeof (sctp_full_hdr_t)); + sctp_conn->remote_initial_tsn = 0x0; + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; } always_inline sctp_connection_t * @@ -384,6 +368,8 @@ sctp_connection_open (transport_endpoint_t * rmt) clib_spinlock_lock_if_init (&tm->half_open_lock); sctp_conn = sctp_half_open_connection_new (thread_id); + sctp_conn->sub_conn[idx].PMTU = + vnet_sw_interface_get_mtu (vnet_get_main (), rmt->sw_if_index, VLIB_TX); transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection; ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4); @@ -462,7 +448,8 @@ sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn) sctp_conn->sub_conn[i].enqueue_state != SCTP_ERROR_ENQUEUED) { SCTP_DBG_OUTPUT - ("Connection %u has still DATA to be enqueued inboud / outboud"); + ("Connection %u has still DATA to be enqueued inboud / outboud", + sctp_conn->sub_conn[i].connection.c_index); return 1; } @@ -485,6 +472,7 @@ void sctp_session_close (u32 conn_index, u32 thread_index) { ASSERT (thread_index == 0); + sctp_connection_t *sctp_conn; sctp_conn = sctp_connection_get (conn_index, thread_index); if (sctp_conn != NULL) @@ -506,80 +494,40 @@ sctp_session_cleanup (u32 conn_index, u32 thread_index) } /** - * Update snd_mss to reflect the effective segment size that we can send + * Compute maximum segment size for session layer. */ -void -sctp_update_snd_mss (sctp_connection_t * sctp_conn) -{ - /* The overhead for the sctp_header_t and sctp_chunks_common_hdr_t - * (the sum equals to sctp_full_hdr_t) is already taken into account - * for the sctp_conn->a_rwnd computation. - * So let's not account it again here. - */ - sctp_conn->snd_hdr_length = - sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t); - sctp_conn->snd_a_rwnd = - clib_min (sctp_conn->a_rwnd, - sctp_conn->rcv_opts.a_rwnd) - sctp_conn->snd_hdr_length; - - SCTP_DBG ("sctp_conn->snd_a_rwnd = %u, sctp_conn->snd_hdr_length = %u ", - sctp_conn->snd_a_rwnd, sctp_conn->snd_hdr_length); - - ASSERT (sctp_conn->snd_a_rwnd > 0); -} - u16 sctp_session_send_mss (transport_connection_t * trans_conn) { - SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); - sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (trans_conn); - if (trans_conn == NULL) - { - SCTP_DBG ("trans_conn == NULL"); - return 0; - } - if (sctp_conn == NULL) { SCTP_DBG ("sctp_conn == NULL"); return 0; } - /* Ensure snd_mss does accurately reflect the amount of data we can push - * in a segment. This also makes sure that options are updated according to - * the current state of the connection. */ - sctp_update_snd_mss (sctp_conn); - return sctp_conn->snd_a_rwnd; + update_cwnd (sctp_conn); + update_smallest_pmtu_idx (sctp_conn); + + return sctp_conn->sub_conn[sctp_conn->smallest_PMTU_idx].cwnd; } u16 sctp_snd_space (sctp_connection_t * sctp_conn) { - /* TODO: This requires a real implementation */ - if (sctp_conn == NULL) - { - SCTP_DBG ("sctp_conn == NULL"); - return 0; - } - - if (sctp_conn->state != SCTP_STATE_ESTABLISHED) - { - SCTP_DBG_STATE_MACHINE - ("Trying to send DATA while not in SCTP_STATE_ESTABLISHED"); - return 0; - } - - return sctp_conn->snd_a_rwnd; + /* Finally, let's subtract the DATA chunk headers overhead */ + return sctp_conn->sub_conn[sctp_conn->smallest_PMTU_idx].cwnd - + sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t); } +/** + * Compute TX window session is allowed to fill. + */ u32 sctp_session_send_space (transport_connection_t * trans_conn) { - SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); - sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (trans_conn); @@ -610,13 +558,25 @@ sctp_session_get_listener (u32 listener_index) u8 * format_sctp_session (u8 * s, va_list * args) { - return NULL; + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + u32 verbose = va_arg (*args, u32); + sctp_connection_t *tc; + + tc = sctp_connection_get (tci, thread_index); + if (tc) + s = format (s, "%U", format_sctp_connection, tc, verbose); + else + s = format (s, "empty\n"); + return s; } u8 * format_sctp_listener_session (u8 * s, va_list * args) { - return NULL; + u32 tci = va_arg (*args, u32); + sctp_connection_t *tc = sctp_listener_get (tci); + return format (s, "%U", format_sctp_connection_id, tc); } void @@ -849,7 +809,6 @@ const static transport_proto_vft_t sctp_proto = { .push_header = sctp_push_header, .send_mss = sctp_session_send_mss, .send_space = sctp_session_send_space, - .tx_fifo_offset = NULL, //sctp_session_tx_fifo_offset, .update_time = sctp_update_time, .get_connection = sctp_session_get_transport, .get_listener = sctp_session_get_listener, diff --git a/src/vnet/sctp/sctp.h b/src/vnet/sctp/sctp.h index af652dc06dc..0d2e4b3e821 100644 --- a/src/vnet/sctp/sctp.h +++ b/src/vnet/sctp/sctp.h @@ -77,7 +77,7 @@ typedef enum _sctp_error #define IS_B_BIT_SET(var) ((var) & (1<<1)) #define IS_U_BIT_SET(var) ((var) & (1<<2)) -#define MAX_SCTP_CONNECTIONS 32 +#define MAX_SCTP_CONNECTIONS 8 #define MAIN_SCTP_SUB_CONN_IDX 0 #if (VLIB_BUFFER_TRACE_TRAJECTORY) @@ -96,6 +96,7 @@ enum _sctp_subconn_state SCTP_SUBCONN_STATE_ALLOW_HB }; +#define SCTP_INITIAL_SSHTRESH 65535 typedef struct _sctp_sub_connection { transport_connection_t connection; /**< Common transport data. First! */ @@ -104,8 +105,10 @@ typedef struct _sctp_sub_connection u32 error_count; /**< The current error count for this destination. */ u32 error_threshold; /**< Current error threshold for this destination, i.e. what value marks the destination down if error count reaches this value. */ - u32 cwnd; /**< The current congestion window. */ - u32 ssthresh; /**< The current ssthresh value. */ + u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. */ + u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the + sender to distinguish slow-start and congestion avoidance phases. */ u32 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */ @@ -132,12 +135,14 @@ typedef struct _sctp_sub_connection u32 last_seen; /**< The time to which this destination was last sent a packet to. This can be used to determine if a HEARTBEAT is needed. */ + u32 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */ + u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had; If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */ u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */ - u8 enqueue_state; + u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */ } sctp_sub_connection_t; @@ -203,6 +208,7 @@ typedef struct _sctp_connection TSN (normally just prior to transmit or during fragmentation). */ + u32 last_unacked_tsn; /** < Last TSN number still unacked */ u32 next_tsn_expected; /**< The next TSN number expected to be received. */ u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value @@ -221,25 +227,14 @@ typedef struct _sctp_connection Note: This is used only when no DATA chunks are received out-of-order. When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */ - u32 a_rwnd; /** This value represents the dedicated buffer space, in number of bytes, - the sender of the INIT has reserved in association with this window. - During the life of the association, this buffer space SHOULD NOT be lessened - (i.e., dedicated buffers taken away from this association); - however, an endpoint MAY change the value of a_rwnd it sends in SACK chunks. */ - - u32 smallest_PMTU; /** The smallest PMTU discovered for all of the peer's transport addresses. */ - - u32 rcv_a_rwnd; /**< LOCAL max seg size that includes options. To be updated by congestion algos, etc. */ - u32 snd_a_rwnd; /**< REMOTE max seg size that includes options. To be updated if peer pushes back on window, etc.*/ + u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */ u8 overall_sending_status; /**< 0 indicates first fragment of a user message 1 indicates normal stream 2 indicates last fragment of a user message */ - sctp_options_t rcv_opts; sctp_options_t snd_opts; - u32 snd_hdr_length; /**< BASE HEADER LENGTH for the DATA chunk when sending */ u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */ } sctp_connection_t; @@ -413,7 +408,7 @@ sctp_optparam_type_to_string (u8 type) #define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */ #define SCTP_RTO_MIN 1 * SHZ /* 1 second */ #define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */ -#define SCTP_RTO_BURST 4 +#define SCTP_RTO_BURST 4 #define SCTP_RTO_ALPHA 1/8 #define SCTP_RTO_BETA 1/4 #define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */ @@ -423,6 +418,8 @@ sctp_optparam_type_to_string (u8 type) #define SCTP_HB_INTERVAL 30 * SHZ #define SCTP_HB_MAX_BURST 1 +#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */ + #define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */ typedef struct _sctp_lookup_dispatch @@ -715,17 +712,18 @@ sctp_connection_get (u32 conn_index, u32 thread_index) always_inline u8 sctp_data_subconn_select (sctp_connection_t * sctp_conn) { - u8 i = 0; - u8 state = SCTP_SUBCONN_STATE_DOWN; u32 sub = MAIN_SCTP_SUB_CONN_IDX; - u32 data_subconn_seed = random_default_seed (); - - while (state == SCTP_SUBCONN_STATE_DOWN && i < SELECT_MAX_RETRIES) + u8 i, cwnd = sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].cwnd; + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) { - u32 sub = random_u32 (&data_subconn_seed) % MAX_SCTP_CONNECTIONS; - if (sctp_conn->sub_conn[sub].state == SCTP_SUBCONN_STATE_UP) - break; - i++; + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (sctp_conn->sub_conn[i].cwnd > cwnd) + { + sub = i; + cwnd = sctp_conn->sub_conn[i].cwnd; + } } return sub; } @@ -813,6 +811,97 @@ vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, sctp_hdr_opts_len); } +always_inline void +update_smallest_pmtu_idx (sctp_connection_t * sctp_conn) +{ + u8 i; + u8 smallest_pmtu_index = MAIN_SCTP_SUB_CONN_IDX; + + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN) + { + if (sctp_conn->sub_conn[i].PMTU < + sctp_conn->sub_conn[smallest_pmtu_index].PMTU) + smallest_pmtu_index = i; + } + } + + sctp_conn->smallest_PMTU_idx = smallest_pmtu_index; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +sctp_init_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (1) */ + sctp_conn->sub_conn[i].cwnd = + clib_min (4 * sctp_conn->sub_conn[i].PMTU, + clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380)); + + /* Section 7.2.1; point (3) */ + sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH; + + /* Section 7.2.2; point (1) */ + sctp_conn->sub_conn[i].partially_acked_bytes = 0; + } +} + +always_inline u8 +sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx) +{ + return 0; +} + +always_inline u8 +cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx) +{ + return 0; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +update_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (2) */ + if (sctp_conn->sub_conn[i].is_retransmitting) + { + sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU; + continue; + } + + /* Section 7.2.2; point (4) */ + if (sctp_conn->sub_conn[i].last_data_ts > + sctp_time_now () + SCTP_DATA_IDLE_INTERVAL) + { + sctp_conn->sub_conn[i].cwnd = + clib_max (sctp_conn->sub_conn[i].cwnd / 2, + 4 * sctp_conn->sub_conn[i].PMTU); + continue; + } + + /* Section 7.2.1; point (5) */ + if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh) + { + if (!cwnd_fully_utilized (sctp_conn, i)) + continue; + + if (sctp_in_cong_recovery (sctp_conn, i)) + continue; + + sctp_conn->sub_conn[i].cwnd = + clib_min (sctp_conn->sub_conn[i].PMTU, 1); + } + } +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/vnet/sctp/sctp_input.c b/src/vnet/sctp/sctp_input.c index 6c804880d7e..35218d5dd62 100644 --- a/src/vnet/sctp/sctp_input.c +++ b/src/vnet/sctp/sctp_input.c @@ -537,6 +537,8 @@ sctp_handle_init_ack (sctp_header_t * sctp_hdr, sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_COOKIE, sctp_conn->sub_conn[idx].RTO); + stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection); + return SCTP_ERROR_NONE; } @@ -804,8 +806,6 @@ sctp_handle_cookie_echo (sctp_header_t * sctp_hdr, sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT, sctp_conn->sub_conn[idx].RTO); - stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection); - return SCTP_ERROR_NONE; } @@ -834,8 +834,6 @@ sctp_handle_cookie_ack (sctp_header_t * sctp_hdr, sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT, sctp_conn->sub_conn[idx].RTO); - stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection); - return SCTP_ERROR_NONE; } @@ -870,7 +868,7 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, ip6_header_t *ip6_hdr = 0; sctp_connection_t *sctp_conn, *new_sctp_conn; u16 sctp_implied_length = 0; - u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT; + u16 error0 = SCTP_ERROR_NONE, next0 = sctp_next_drop (is_ip4); u8 idx; bi0 = from[0]; @@ -936,6 +934,8 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, new_sctp_conn - tm->connections[my_thread_index]; new_sctp_conn->sub_conn[idx].c_thread_index = my_thread_index; + new_sctp_conn->sub_conn[idx].PMTU = + sctp_conn->sub_conn[idx].PMTU; new_sctp_conn->sub_conn[idx].parent = new_sctp_conn; if (sctp_half_open_connection_cleanup (sctp_conn)) @@ -951,7 +951,7 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, new_sctp_conn, idx, b0, sctp_implied_length); - sctp_init_mss (new_sctp_conn); + sctp_init_cwnd (new_sctp_conn); if (session_stream_connect_notify (&new_sctp_conn->sub_conn[idx].connection, 0)) @@ -962,8 +962,8 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, sctp_connection_cleanup (new_sctp_conn); goto drop; } + next0 = sctp_next_output (is_ip4); } - next0 = sctp_next_output (is_ip4); break; /* All UNEXPECTED scenarios (wrong chunk received per state-machine) @@ -1177,13 +1177,11 @@ sctp_handle_shutdown_complete (sctp_header_t * sctp_hdr, if (sctp_is_bundling (sctp_implied_length, &shutdown_complete->chunk_hdr)) return SCTP_ERROR_BUNDLING_VIOLATION; - sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX, - SCTP_TIMER_T2_SHUTDOWN); + sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN); - sctp_conn->state = SCTP_STATE_CLOSED; + stream_session_disconnect_notify (&sctp_conn->sub_conn[idx].connection); - stream_session_disconnect_notify (&sctp_conn->sub_conn - [MAIN_SCTP_SUB_CONN_IDX].connection); + sctp_conn->state = SCTP_STATE_CLOSED; *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); @@ -1422,6 +1420,17 @@ sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk, sctp_conn->sub_conn[idx].last_seen = sctp_time_now (); + /* Section 7.2.2; point (2) */ + if (sctp_conn->sub_conn[idx].cwnd > sctp_conn->sub_conn[idx].ssthresh) + sctp_conn->sub_conn[idx].partially_acked_bytes = + sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack; + + /* Section 7.2.2; point (5) */ + if (sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack == 0) + sctp_conn->sub_conn[idx].partially_acked_bytes = 0; + + sctp_conn->last_unacked_tsn = sack_chunk->cumulative_tsn_ack; + sctp_calculate_rto (sctp_conn, idx); sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, @@ -1510,7 +1519,7 @@ sctp46_listen_process_inline (vlib_main_t * vm, ip6_header_t *ip6_hdr; sctp_connection_t *child_conn; sctp_connection_t *sctp_listener; - u16 next0 = SCTP_LISTEN_PHASE_N_NEXT, error0 = SCTP_ERROR_ENQUEUED; + u16 next0 = sctp_next_drop (is_ip4), error0 = SCTP_ERROR_ENQUEUED; bi0 = from[0]; to_next[0] = bi0; @@ -1560,6 +1569,8 @@ sctp46_listen_process_inline (vlib_main_t * vm, child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_is_ip4 = is_ip4; child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto = sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].PMTU = + sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].PMTU; child_conn->state = SCTP_STATE_CLOSED; if (is_ip4) @@ -1609,7 +1620,7 @@ sctp46_listen_process_inline (vlib_main_t * vm, sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0, sctp_implied_length); - sctp_init_mss (child_conn); + sctp_init_cwnd (child_conn); if (error0 == SCTP_ERROR_NONE) { @@ -1624,8 +1635,8 @@ sctp46_listen_process_inline (vlib_main_t * vm, error0 = SCTP_ERROR_CREATE_SESSION_FAIL; goto drop; } + next0 = sctp_next_output (is_ip4); } - next0 = sctp_next_output (is_ip4); break; /* Reception of a DATA chunk whilst in the CLOSED state is called @@ -2058,9 +2069,6 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, goto done; } -#if SCTP_DEBUG_STATE_MACHINE - u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state); -#endif vnet_buffer (b0)->sctp.hdr_offset = (u8 *) sctp_hdr - (u8 *) vlib_buffer_get_current (b0); @@ -2075,15 +2083,12 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, next0 = tm->dispatch_table[sctp_conn->state][chunk_type].next; error0 = tm->dispatch_table[sctp_conn->state][chunk_type].error; - SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u: " - "CURRENT_CONNECTION_STATE = %s," - "CHUNK_TYPE_RECEIVED = %s " - "NEXT_PHASE = %s", - sctp_conn->sub_conn - [idx].connection.c_index, - sctp_state_to_string (sctp_conn->state), - sctp_chunk_to_string (chunk_type), - phase_to_string (next0)); + SCTP_DBG_STATE_MACHINE + ("SESSION_INDEX = %u, CURRENT_CONNECTION_STATE = %s," + "CHUNK_TYPE_RECEIVED = %s " "NEXT_PHASE = %s", + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX]. + connection.s_index, sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type), phase_to_string (next0)); if (chunk_type == DATA) SCTP_ADV_DBG ("n_advance_bytes0 = %u, n_data_bytes0 = %u", @@ -2223,7 +2228,7 @@ do { \ * _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ * _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") */ - _(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED DATA chunk which requires special handling */ + //_(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED DATA chunk which requires special handling */ _(CLOSED, INIT, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); _(CLOSED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ _(CLOSED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c index 459b33d46bc..fba06d942e7 100644 --- a/src/vnet/sctp/sctp_output.c +++ b/src/vnet/sctp/sctp_output.c @@ -463,7 +463,7 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, u8 idx, vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len); vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr); - init_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + init_chunk->a_rwnd = clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); init_chunk->inboud_streams_count = clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); @@ -717,7 +717,8 @@ sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, init_ack_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); - init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + init_ack_chunk->a_rwnd = + clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); init_ack_chunk->inboud_streams_count = clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); init_ack_chunk->outbound_streams_count = @@ -1030,8 +1031,6 @@ sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, sctp_reuse_buffer (vm, b0); sctp_prepare_shutdown_complete_chunk (sctp_conn, idx, b0); - - sctp_conn->state = SCTP_STATE_CLOSED; } /* @@ -1072,7 +1071,7 @@ sctp_send_init (sctp_connection_t * sctp_conn) * Push SCTP header and update connection variables */ static void -sctp_push_hdr_i (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b, +sctp_push_hdr_i (sctp_connection_t * sctp_conn, vlib_buffer_t * b, sctp_state_t next_state) { u16 data_len = @@ -1093,6 +1092,8 @@ sctp_push_hdr_i (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b, sctp_payload_data_chunk_t *data_chunk = vlib_buffer_push_uninit (b, bytes_to_add); + u8 idx = sctp_data_subconn_select (sctp_conn); + data_chunk->sctp_hdr.checksum = 0; data_chunk->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; @@ -1113,8 +1114,22 @@ sctp_push_hdr_i (sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b, SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u", b->data, b->current_data); + sctp_conn->last_unacked_tsn = sctp_conn->next_tsn; sctp_conn->next_tsn += data_len; + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + /* Section 7.2.2; point (3) */ + if (sctp_conn->sub_conn[idx].partially_acked_bytes >= + sctp_conn->sub_conn[idx].cwnd + && inflight >= sctp_conn->sub_conn[idx].cwnd) + { + sctp_conn->sub_conn[idx].cwnd += sctp_conn->sub_conn[idx].PMTU; + sctp_conn->sub_conn[idx].partially_acked_bytes -= + sctp_conn->sub_conn[idx].cwnd; + } + + sctp_conn->sub_conn[idx].last_data_ts = sctp_time_now (); + vnet_buffer (b)->sctp.connection_index = sctp_conn->sub_conn[idx].connection.c_index; @@ -1127,9 +1142,7 @@ sctp_push_header (transport_connection_t * trans_conn, vlib_buffer_t * b) sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (trans_conn); - u8 idx = sctp_data_subconn_select (sctp_conn); - - sctp_push_hdr_i (sctp_conn, idx, b, SCTP_STATE_ESTABLISHED); + sctp_push_hdr_i (sctp_conn, b, SCTP_STATE_ESTABLISHED); sctp_trajectory_add_start (b0, 3); @@ -1333,8 +1346,9 @@ sctp46_output_inline (vlib_main_t * vm, } #endif SCTP_DBG_STATE_MACHINE - ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + ("SESSION_INDEX = %u, CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u", + sctp_conn->sub_conn[idx].connection.s_index, sctp_conn->sub_conn[idx].connection.c_index, sctp_conn->state, sctp_state_to_string (sctp_conn->state), sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port, @@ -1352,6 +1366,7 @@ sctp46_output_inline (vlib_main_t * vm, error0 = SCTP_ERROR_UNKOWN_CHUNK; next0 = SCTP_OUTPUT_NEXT_DROP; goto done; + } #endif @@ -1415,12 +1430,12 @@ sctp46_output_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; - SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, " - "NEW_STATE = %s, " - "CHUNK_SENT = %s", - sctp_conn->sub_conn[idx].connection.c_index, - sctp_state_to_string (sctp_conn->state), - sctp_chunk_to_string (chunk_type)); + SCTP_DBG_STATE_MACHINE + ("SESSION_INDEX = %u, CONNECTION_INDEX = %u, " "NEW_STATE = %s, " + "CHUNK_SENT = %s", sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type)); vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr); diff --git a/src/vnet/sctp/sctp_packet.h b/src/vnet/sctp/sctp_packet.h index 9419c16241a..8109efcafc1 100644 --- a/src/vnet/sctp/sctp_packet.h +++ b/src/vnet/sctp/sctp_packet.h @@ -513,7 +513,6 @@ vnet_sctp_calculate_padding (u16 base_length) return (4 - base_length % 4); } -#define DEFAULT_A_RWND 1480 #define INBOUND_STREAMS_COUNT 1 #define OUTBOUND_STREAMS_COUNT 1 -- cgit 1.2.3-korg