From 8ad6a2dfed0a7248be9d005dd04c76da142f2238 Mon Sep 17 00:00:00 2001 From: Marco Varlese Date: Fri, 26 Jan 2018 16:50:01 +0100 Subject: Prep-work patch for congestion-control This patch addresses the missing field in various data-structures to track valuable information to implement the congestion-control algorithms and manage sub-connections states. It adds the possibility to queue up to 2 SACKs chunks when the connection is not gapping. At the same time, it pushes some variable/field renaming for better readibility. Change-Id: Idcc53512983456779600a75e78e21af078e46602 Signed-off-by: Marco Varlese --- src/vnet/sctp/sctp.c | 349 ++++++++++++++++++++------------------- src/vnet/sctp/sctp.h | 129 +++++++++++---- src/vnet/sctp/sctp_debug.h | 7 + src/vnet/sctp/sctp_input.c | 238 +++++++++++++++++---------- src/vnet/sctp/sctp_output.c | 386 +++++++++++++++++++++++++++----------------- src/vnet/sctp/sctp_packet.h | 11 -- 6 files changed, 670 insertions(+), 450 deletions(-) diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c index 2e37a91dbac..d0f37f4346e 100644 --- a/src/vnet/sctp/sctp.c +++ b/src/vnet/sctp/sctp.c @@ -65,13 +65,13 @@ static void sctp_connection_unbind (u32 listener_index) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc; + sctp_connection_t *sctp_conn; - tc = pool_elt_at_index (tm->listener_pool, listener_index); + sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index); /* Poison the entry */ if (CLIB_DEBUG > 0) - memset (tc, 0xFA, sizeof (*tc)); + memset (sctp_conn, 0xFA, sizeof (*sctp_conn)); pool_put_index (tm->listener_pool, listener_index); } @@ -127,31 +127,32 @@ sctp_alloc_custom_local_endpoint (sctp_main_t * tm, ip46_address_t * lcl_addr, * Initialize all connection timers as invalid */ void -sctp_connection_timers_init (sctp_connection_t * tc) +sctp_connection_timers_init (sctp_connection_t * sctp_conn) { int i, j; /* Set all to invalid */ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) - for (j = 0; j < SCTP_N_TIMERS; j++) - { - tc->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID; - } - - tc->rto = SCTP_RTO_INIT; + { + sctp_conn->sub_conn[i].RTO = SCTP_RTO_INIT; + for (j = 0; j < SCTP_N_TIMERS; j++) + { + sctp_conn->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID; + } + } } /** * Stop all connection timers */ void -sctp_connection_timers_reset (sctp_connection_t * tc) +sctp_connection_timers_reset (sctp_connection_t * sctp_conn) { int i, j; for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) { for (j = 0; j < SCTP_N_TIMERS; j++) - sctp_timer_reset (tc, i, j); + sctp_timer_reset (sctp_conn, i, j); } } @@ -177,22 +178,22 @@ u8 * format_sctp_connection_id (u8 * s, va_list * args) { /* - sctp_connection_t *tc = va_arg (*args, sctp_connection_t *); - if (!tc) + sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); + if (!sctp_conn) return s; - if (tc->c_is_ip4) + if (sctp_conn->c_is_ip4) { - s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T", - format_ip4_address, &tc->c_lcl_ip4, - clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address, - &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port)); + s = format (s, "[#%d][%s] %U:%d->%U:%d", sctp_conn->c_thread_index, "T", + format_ip4_address, &sctp_conn->c_lcl_ip4, + clib_net_to_host_u16 (sctp_conn->c_lcl_port), format_ip4_address, + &sctp_conn->c_rmt_ip4, clib_net_to_host_u16 (sctp_conn->c_rmt_port)); } else { - s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T", - format_ip6_address, &tc->c_lcl_ip6, - clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address, - &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port)); + s = format (s, "[#%d][%s] %U:%d->%U:%d", sctp_conn->c_thread_index, "T", + format_ip6_address, &sctp_conn->c_lcl_ip6, + clib_net_to_host_u16 (sctp_conn->c_lcl_port), format_ip6_address, + &sctp_conn->c_rmt_ip6, clib_net_to_host_u16 (sctp_conn->c_rmt_port)); } */ return s; @@ -201,15 +202,15 @@ format_sctp_connection_id (u8 * s, va_list * args) u8 * format_sctp_connection (u8 * s, va_list * args) { - sctp_connection_t *tc = va_arg (*args, sctp_connection_t *); + sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); u32 verbose = va_arg (*args, u32); - if (!tc) + if (!sctp_conn) return s; - s = format (s, "%-50U", format_sctp_connection_id, tc); + s = format (s, "%-50U", format_sctp_connection_id, sctp_conn); if (verbose) { - s = format (s, "%-15U", format_sctp_state, tc->state); + s = format (s, "%-15U", format_sctp_state, sctp_conn->state); } return s; @@ -219,23 +220,23 @@ format_sctp_connection (u8 * s, va_list * args) * Initialize connection send variables. */ void -sctp_init_snd_vars (sctp_connection_t * tc) +sctp_init_snd_vars (sctp_connection_t * sctp_conn) { u32 time_now; - /* * We use the time to randomize iss and for setting up the initial * timestamp. Make sure it's updated otherwise syn and ack in the * handshake may make it look as if time has flown in the opposite * direction for us. */ + sctp_set_time_now (vlib_get_thread_index ()); time_now = sctp_time_now (); - tc->iss = random_u32 (&time_now); - tc->snd_una = tc->iss; - tc->snd_nxt = tc->iss + 1; - tc->snd_una_max = tc->snd_nxt; + sctp_conn->local_initial_tsn = random_u32 (&time_now); + sctp_conn->remote_initial_tsn = 0x0; + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->next_tsn = sctp_conn->local_initial_tsn + 1; } /** @@ -245,72 +246,63 @@ sctp_init_snd_vars (sctp_connection_t * tc) * also what we advertise to our peer. */ void -sctp_update_rcv_mss (sctp_connection_t * tc) +sctp_update_rcv_mss (sctp_connection_t * sctp_conn) { - /* TODO find our iface MTU */ - tc->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t); - tc->rcv_opts.a_rwnd = tc->a_rwnd; - tc->rcv_a_rwnd = tc->a_rwnd; /* This will be updated by our congestion algos */ + sctp_conn->smallest_PMTU = DEFAULT_A_RWND; /* TODO find our iface MTU */ + sctp_conn->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t); + sctp_conn->rcv_opts.a_rwnd = sctp_conn->a_rwnd; + sctp_conn->rcv_a_rwnd = sctp_conn->a_rwnd; /* This will be updated by our congestion algos */ } void -sctp_init_mss (sctp_connection_t * tc) +sctp_init_mss (sctp_connection_t * sctp_conn) { SCTP_DBG ("CONN_INDEX = %u", - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); u16 default_a_rwnd = 536; - sctp_update_rcv_mss (tc); + sctp_update_rcv_mss (sctp_conn); /* TODO cache mss and consider PMTU discovery */ - tc->snd_a_rwnd = clib_min (tc->rcv_opts.a_rwnd, tc->a_rwnd); + sctp_conn->snd_a_rwnd = + clib_min (sctp_conn->rcv_opts.a_rwnd, sctp_conn->a_rwnd); - if (tc->snd_a_rwnd < sizeof (sctp_full_hdr_t)) + if (sctp_conn->snd_a_rwnd < sizeof (sctp_full_hdr_t)) { - SCTP_ADV_DBG ("tc->snd_a_rwnd < sizeof(sctp_full_hdr_t)"); + SCTP_ADV_DBG ("sctp_conn->snd_a_rwnd < sizeof(sctp_full_hdr_t)"); /* Assume that at least the min default mss works */ - tc->snd_a_rwnd = default_a_rwnd; - tc->rcv_opts.a_rwnd = default_a_rwnd; + sctp_conn->snd_a_rwnd = default_a_rwnd; + sctp_conn->rcv_opts.a_rwnd = default_a_rwnd; } - ASSERT (tc->snd_a_rwnd > sizeof (sctp_full_hdr_t)); -} - -/** Initialize sctp connection variables - * - * Should be called after having received a msg from the peer, i.e., a SYN or - * a SYNACK, such that connection options have already been exchanged. */ -void -sctp_connection_init_vars (sctp_connection_t * tc) -{ - sctp_init_mss (tc); - sctp_init_snd_vars (tc); + ASSERT (sctp_conn->snd_a_rwnd > sizeof (sctp_full_hdr_t)); } always_inline sctp_connection_t * sctp_sub_connection_add (u8 thread_index) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc = tm->connections[thread_index]; + sctp_connection_t *sctp_conn = tm->connections[thread_index]; - tc->sub_conn[tc->next_avail_sub_conn].connection.c_index = - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index; - tc->sub_conn[tc->next_avail_sub_conn].connection.thread_index = - thread_index; - tc->sub_conn[tc->next_avail_sub_conn].parent = tc; + sctp_conn->sub_conn[sctp_conn->next_avail_sub_conn].connection.c_index = + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index; + sctp_conn->sub_conn[sctp_conn->next_avail_sub_conn]. + connection.thread_index = thread_index; + sctp_conn->sub_conn[sctp_conn->next_avail_sub_conn].parent = sctp_conn; - tc->next_avail_sub_conn += 1; + sctp_conn->next_avail_sub_conn += 1; - return tc; + return sctp_conn; } void sctp_sub_connection_add_ip4 (u8 thread_index, sctp_ipv4_addr_param_t * ipv4_addr) { - sctp_connection_t *tc = sctp_sub_connection_add (thread_index); + sctp_connection_t *sctp_conn = sctp_sub_connection_add (thread_index); - clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip4, + clib_memcpy (&sctp_conn-> + sub_conn[sctp_conn->next_avail_sub_conn].connection.lcl_ip.ip4, &ipv4_addr->address, sizeof (ipv4_addr->address)); } @@ -318,9 +310,10 @@ void sctp_sub_connection_add_ip6 (u8 thread_index, sctp_ipv6_addr_param_t * ipv6_addr) { - sctp_connection_t *tc = sctp_sub_connection_add (thread_index); + sctp_connection_t *sctp_conn = sctp_sub_connection_add (thread_index); - clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip6, + clib_memcpy (&sctp_conn-> + sub_conn[sctp_conn->next_avail_sub_conn].connection.lcl_ip.ip6, &ipv6_addr->address, sizeof (ipv6_addr->address)); } @@ -328,39 +321,39 @@ sctp_connection_t * sctp_connection_new (u8 thread_index) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc; + sctp_connection_t *sctp_conn; - pool_get (tm->connections[thread_index], tc); - memset (tc, 0, sizeof (*tc)); - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = - tc - tm->connections[thread_index]; - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index = thread_index; - tc->local_tag = 0; - tc->next_avail_sub_conn = 1; + pool_get (tm->connections[thread_index], sctp_conn); + memset (sctp_conn, 0, sizeof (*sctp_conn)); + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = sctp_conn; + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = + sctp_conn - tm->connections[thread_index]; + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index = thread_index; + sctp_conn->local_tag = 0; + sctp_conn->next_avail_sub_conn = 1; - return tc; + return sctp_conn; } sctp_connection_t * sctp_half_open_connection_new (u8 thread_index) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc = 0; + sctp_connection_t *sctp_conn = 0; ASSERT (vlib_get_thread_index () == 0); - pool_get (tm->half_open_connections, tc); - memset (tc, 0, sizeof (*tc)); - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = - tc - tm->half_open_connections; - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; - return tc; + pool_get (tm->half_open_connections, sctp_conn); + memset (sctp_conn, 0, sizeof (*sctp_conn)); + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = + sctp_conn - tm->half_open_connections; + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = sctp_conn; + return sctp_conn; } static inline int sctp_connection_open (transport_endpoint_t * rmt) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc; + sctp_connection_t *sctp_conn; ip46_address_t lcl_addr; u16 lcl_port; uword thread_id; @@ -389,27 +382,27 @@ sctp_connection_open (transport_endpoint_t * rmt) ASSERT (thread_id == 0); clib_spinlock_lock_if_init (&tm->half_open_lock); - tc = sctp_half_open_connection_new (thread_id); - - transport_connection_t *t_conn = &tc->sub_conn[idx].connection; - ip_copy (&t_conn->rmt_ip, &rmt->ip, rmt->is_ip4); - ip_copy (&t_conn->lcl_ip, &lcl_addr, rmt->is_ip4); - tc->sub_conn[idx].parent = tc; - t_conn->rmt_port = rmt->port; - t_conn->lcl_port = clib_host_to_net_u16 (lcl_port); - t_conn->is_ip4 = rmt->is_ip4; - t_conn->proto = TRANSPORT_PROTO_SCTP; - t_conn->fib_index = rmt->fib_index; - - sctp_connection_timers_init (tc); + sctp_conn = sctp_half_open_connection_new (thread_id); + + transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection; + ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4); + ip_copy (&trans_conn->lcl_ip, &lcl_addr, rmt->is_ip4); + sctp_conn->sub_conn[idx].parent = sctp_conn; + trans_conn->rmt_port = rmt->port; + trans_conn->lcl_port = clib_host_to_net_u16 (lcl_port); + trans_conn->is_ip4 = rmt->is_ip4; + trans_conn->proto = TRANSPORT_PROTO_SCTP; + trans_conn->fib_index = rmt->fib_index; + + sctp_connection_timers_init (sctp_conn); /* The other connection vars will be initialized after INIT_ACK chunk received */ - sctp_init_snd_vars (tc); + sctp_init_snd_vars (sctp_conn); - sctp_send_init (tc); + sctp_send_init (sctp_conn); clib_spinlock_unlock_if_init (&tm->half_open_lock); - return tc->sub_conn[idx].connection.c_index; + return sctp_conn->sub_conn[idx].connection.c_index; } /** @@ -418,7 +411,7 @@ sctp_connection_open (transport_endpoint_t * rmt) * No notifications. */ void -sctp_connection_cleanup (sctp_connection_t * tc) +sctp_connection_cleanup (sctp_connection_t * sctp_conn) { sctp_main_t *tm = &sctp_main; u8 i; @@ -426,26 +419,26 @@ sctp_connection_cleanup (sctp_connection_t * tc) /* Cleanup local endpoint if this was an active connect */ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) transport_endpoint_cleanup (TRANSPORT_PROTO_SCTP, - &tc->sub_conn[i].connection.lcl_ip, - tc->sub_conn[i].connection.lcl_port); + &sctp_conn->sub_conn[i].connection.lcl_ip, + sctp_conn->sub_conn[i].connection.lcl_port); /* Check if connection is not yet fully established */ - if (tc->state == SCTP_STATE_COOKIE_WAIT) + if (sctp_conn->state == SCTP_STATE_COOKIE_WAIT) { } else { int thread_index = - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.thread_index; + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.thread_index; /* Make sure all timers are cleared */ - sctp_connection_timers_reset (tc); + sctp_connection_timers_reset (sctp_conn); /* Poison the entry */ if (CLIB_DEBUG > 0) - memset (tc, 0xFA, sizeof (*tc)); - pool_put (tm->connections[thread_index], tc); + memset (sctp_conn, 0xFA, sizeof (*sctp_conn)); + pool_put (tm->connections[thread_index], sctp_conn); } } @@ -456,20 +449,20 @@ sctp_session_open (transport_endpoint_t * tep) } u16 -sctp_check_outstanding_data_chunks (sctp_connection_t * tc) +sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn) { return 0; /* Indicates no more data to be read/sent */ } void -sctp_connection_close (sctp_connection_t * tc) +sctp_connection_close (sctp_connection_t * sctp_conn) { SCTP_DBG ("Closing connection %u...", - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); + sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); - tc->state = SCTP_STATE_SHUTDOWN_PENDING; + sctp_conn->state = SCTP_STATE_SHUTDOWN_PENDING; - sctp_send_shutdown (tc); + sctp_send_shutdown (sctp_conn); } void @@ -477,42 +470,43 @@ sctp_session_close (u32 conn_index, u32 thread_index) { ASSERT (thread_index == 0); - sctp_connection_t *tc; - tc = sctp_connection_get (conn_index, thread_index); - sctp_connection_close (tc); + sctp_connection_t *sctp_conn; + sctp_conn = sctp_connection_get (conn_index, thread_index); + sctp_connection_close (sctp_conn); } void sctp_session_cleanup (u32 conn_index, u32 thread_index) { - sctp_connection_t *tc; - tc = sctp_connection_get (conn_index, thread_index); - sctp_connection_timers_reset (tc); + sctp_connection_t *sctp_conn; + sctp_conn = sctp_connection_get (conn_index, thread_index); + sctp_connection_timers_reset (sctp_conn); /* Wait for the session tx events to clear */ - tc->state = SCTP_STATE_CLOSED; + sctp_conn->state = SCTP_STATE_CLOSED; } /** * Update snd_mss to reflect the effective segment size that we can send */ void -sctp_update_snd_mss (sctp_connection_t * tc) +sctp_update_snd_mss (sctp_connection_t * sctp_conn) { /* The overhead for the sctp_header_t and sctp_chunks_common_hdr_t * (the sum equals to sctp_full_hdr_t) is already taken into account - * for the tc->a_rwnd computation. + * for the sctp_conn->a_rwnd computation. * So let's not account it again here. */ - tc->snd_hdr_length = + sctp_conn->snd_hdr_length = sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t); - tc->snd_a_rwnd = - clib_min (tc->a_rwnd, tc->rcv_opts.a_rwnd) - tc->snd_hdr_length; + sctp_conn->snd_a_rwnd = + clib_min (sctp_conn->a_rwnd, + sctp_conn->rcv_opts.a_rwnd) - sctp_conn->snd_hdr_length; - SCTP_DBG ("tc->snd_a_rwnd = %u, tc->snd_hdr_length = %u ", - tc->snd_a_rwnd, tc->snd_hdr_length); + SCTP_DBG ("sctp_conn->snd_a_rwnd = %u, sctp_conn->snd_hdr_length = %u ", + sctp_conn->snd_a_rwnd, sctp_conn->snd_hdr_length); - ASSERT (tc->snd_a_rwnd > 0); + ASSERT (sctp_conn->snd_a_rwnd > 0); } u16 @@ -520,7 +514,8 @@ sctp_session_send_mss (transport_connection_t * trans_conn) { SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); - sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); if (trans_conn == NULL) { @@ -528,17 +523,17 @@ sctp_session_send_mss (transport_connection_t * trans_conn) return 0; } - if (tc == NULL) + if (sctp_conn == NULL) { - SCTP_DBG ("tc == NULL"); + SCTP_DBG ("sctp_conn == NULL"); return 0; } /* Ensure snd_mss does accurately reflect the amount of data we can push * in a segment. This also makes sure that options are updated according to * the current state of the connection. */ - sctp_update_snd_mss (tc); + sctp_update_snd_mss (sctp_conn); - return tc->snd_a_rwnd; + return sctp_conn->snd_a_rwnd; } u16 @@ -566,42 +561,27 @@ sctp_session_send_space (transport_connection_t * trans_conn) { SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); - sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); - - return sctp_snd_space (tc); -} - -u32 -sctp_session_tx_fifo_offset (transport_connection_t * trans_conn) -{ - SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); - - sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); - if (tc == NULL) - { - SCTP_DBG ("tc == NULL"); - return 0; - } - - /* This still works if fast retransmit is on */ - return (tc->snd_nxt - tc->snd_una); + return sctp_snd_space (sctp_conn); } transport_connection_t * sctp_session_get_transport (u32 conn_index, u32 thread_index) { - sctp_connection_t *tc = sctp_connection_get (conn_index, thread_index); - return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; + sctp_connection_t *sctp_conn = + sctp_connection_get (conn_index, thread_index); + return &sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; } transport_connection_t * sctp_session_get_listener (u32 listener_index) { sctp_main_t *tm = vnet_get_sctp_main (); - sctp_connection_t *tc; - tc = pool_elt_at_index (tm->listener_pool, listener_index); - return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; + sctp_connection_t *sctp_conn; + sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index); + return &sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; } u8 * @@ -617,28 +597,38 @@ format_sctp_listener_session (u8 * s, va_list * args) } void -sctp_timer_init_handler (u32 conn_index) +sctp_timer_init_handler (u32 conn_index, u32 timer_id) { - sctp_connection_t *tc; + sctp_connection_t *sctp_conn; - tc = sctp_connection_get (conn_index, vlib_get_thread_index ()); + clib_warning (""); + sctp_conn = sctp_connection_get (conn_index, vlib_get_thread_index ()); /* note: the connection may have already disappeared */ - if (PREDICT_FALSE (tc == 0)) + if (PREDICT_FALSE (sctp_conn == 0)) return; - ASSERT (tc->state == SCTP_STATE_COOKIE_ECHOED); + ASSERT (sctp_conn->state == SCTP_STATE_COOKIE_ECHOED); + + switch (timer_id) + { + case SCTP_TIMER_T4_HEARTBEAT: + { + clib_warning ("Heartbeat timeout"); + break; + } + } /* Start cleanup. App wasn't notified yet so use delete notify as * opposed to delete to cleanup session layer state. */ - stream_session_delete_notify (&tc-> + stream_session_delete_notify (&sctp_conn-> sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection); - tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].timers[SCTP_TIMER_T1_INIT] = - SCTP_TIMER_HANDLE_INVALID; - sctp_connection_cleanup (tc); + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); } /* *INDENT OFF* */ -static timer_expiration_handler *sctp_timer_expiration_handlers[SCTP_N_TIMERS] - = { +static sctp_timer_expiration_handler + * sctp_timer_expiration_handlers[SCTP_N_TIMERS] = { sctp_timer_init_handler }; @@ -657,8 +647,11 @@ sctp_expired_timers_dispatch (u32 * expired_timers) timer_id = expired_timers[i] >> 28; /* Handle expiration */ - (*sctp_timer_expiration_handlers[timer_id]) (connection_index); + (*sctp_timer_expiration_handlers[timer_id]) (connection_index, + timer_id); } + + clib_warning (""); } void @@ -683,7 +676,7 @@ sctp_main_enable (vlib_main_t * vm) clib_error_t *error = 0; u32 num_threads; int thread; - sctp_connection_t *tc __attribute__ ((unused)); + sctp_connection_t *sctp_conn __attribute__ ((unused)); u32 preallocated_connections_per_thread; if ((error = vlib_call_init_function (vm, ip_main_init))) @@ -778,16 +771,16 @@ sctp_enable_disable (vlib_main_t * vm, u8 is_en) transport_connection_t * sctp_half_open_session_get_transport (u32 conn_index) { - sctp_connection_t *tc = sctp_half_open_connection_get (conn_index); - return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; + sctp_connection_t *sctp_conn = sctp_half_open_connection_get (conn_index); + return &sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; } u8 * format_sctp_half_open (u8 * s, va_list * args) { u32 tci = va_arg (*args, u32); - sctp_connection_t *tc = sctp_half_open_connection_get (tci); - return format (s, "%U", format_sctp_connection_id, tc); + sctp_connection_t *sctp_conn = sctp_half_open_connection_get (tci); + return format (s, "%U", format_sctp_connection_id, sctp_conn); } /* *INDENT OFF* */ diff --git a/src/vnet/sctp/sctp.h b/src/vnet/sctp/sctp.h index 7c4df309906..3e3750ea92a 100644 --- a/src/vnet/sctp/sctp.h +++ b/src/vnet/sctp/sctp.h @@ -28,6 +28,7 @@ _(T1_COOKIE, "T1_COOKIE") \ _(T2_SHUTDOWN, "T2_SHUTDOWN") \ _(T3_RXTX, "T3_RXTX") \ + _(T4_HEARTBEAT, "T4_HB") \ _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD") typedef enum _sctp_timers @@ -71,58 +72,125 @@ typedef struct _sctp_sub_connection { transport_connection_t connection; /**< Common transport data. First! */ void *parent; /**< Link to the parent-super connection */ - u32 timers[SCTP_N_TIMERS]; /**< Timer handles into timer wheel */ + + u32 error_count; /**< The current error count for this destination. */ + u32 error_threshold; /**< Current error threshold for this destination, + i.e. what value marks the destination down if error count reaches this value. */ + u32 cwnd; /**< The current congestion window. */ + u32 ssthresh; /**< The current ssthresh value. */ + + u32 RTO; /**< The current retransmission timeout value. */ + u32 SRTT; /**< The current smoothed round-trip time. */ + u32 RTTVAR; /**< The current RTT variation. */ + + u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in + congestion avoidance mode (see Section 7.2.2).*/ + + u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */ + + u16 PMTU; /**< The current known path MTU. */ + + u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */ + + u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to + this address is currently being used to compute an RTT. + If this flag is 0, the next DATA chunk sent to this destination + should be used to compute an RTT and this flag should be set. + Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd), + clear this flag. */ + + u32 last_time; /**< The time to which this destination was last sent a packet to. + This can be used to determine if a HEARTBEAT is needed. */ } sctp_sub_connection_t; typedef struct { - u32 a_rwnd; /**< Maximum segment size advertised */ + u32 a_rwnd; /**< Maximum segment size advertised */ } sctp_options_t; +#define SetBit(A,k) ( A[(k/32)] |= (1 << (k%32)) ) +#define ClearBit(A,k) ( A[(k/32)] &= ~(1 << (k%32)) ) +#define TestBit(A,k) ( A[(k/32)] & (1 << (k%32)) ) + +#define MAX_INFLIGHT_PACKETS 128 +#define MAX_ENQUEABLE_SACKS 2 + +/* This parameter indicates to the receiver how much increment in + * milliseconds the sender wishes the receiver to add to its default + * cookie life-span. + */ +#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000 + typedef struct _sctp_connection { - sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ + sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ u8 state; /**< SCTP state as per sctp_state_t */ u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */ + u32 local_tag; /**< INIT_TAG generated locally */ u32 remote_tag; /**< INIT_TAG generated by the remote peer */ - u16 life_span_inc; - - /** Send sequence variables RFC4960 */ - u32 snd_una; /**< oldest unacknowledged sequence number */ - u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/ - u32 snd_wl1; /**< seq number used for last snd.wnd update */ - u32 snd_wl2; /**< ack number used for last snd.wnd update */ - u32 snd_nxt; /**< next seq number to be sent */ - - /** Receive sequence variables RFC4960 */ - u32 rcv_nxt; /**< next sequence number expected */ - u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */ - u32 iss; /**< initial sent sequence */ - u32 irs; /**< initial remote sequence */ - - /* RTT and RTO */ - u32 rto; /**< Retransmission timeout */ - u32 rto_boff; /**< Index for RTO backoff */ - u32 srtt; /**< Smoothed RTT */ - u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ - u32 rtt_ts; /**< Timestamp for tracked ACK */ - u32 rtt_seq; /**< Sequence number for tracked ACK */ - - u32 a_rwnd; /** Constrained by medium / IP / etc. */ + + u32 local_initial_tsn; /**< Initial TSN generated locally */ + u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */ + + u32 peer_cookie_life_span_increment; + + u32 overall_err_count; /**< The overall association error count. */ + u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count + reaches will cause this association to be torn down. */ + + u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */ + + u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk. + This is sent in the INIT or INIT ACK chunk to the peer + and incremented each time a DATA chunk is assigned a + TSN (normally just prior to transmit or during + fragmentation). */ + + u32 next_tsn_expected; /**< The next TSN number expected to be received. */ + + u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value + is set initially by taking the peer's initial TSN, + received in the INIT or INIT ACK chunk, and + subtracting one from it. */ + + u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order + TSNs have been received (relative to the Last Rcvd TSN). + If no gaps exist, i.e., no out-of-order packets have been received, + this array will be set to all zero. */ + + u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK. + This is initialized to 0. When a packet is received it is incremented. + If this value reaches 2 or more, a SACK is sent and the value is reset to 0. + Note: This is used only when no DATA chunks are received out-of-order. + When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */ + + u32 a_rwnd; /** This value represents the dedicated buffer space, in number of bytes, + the sender of the INIT has reserved in association with this window. + During the life of the association, this buffer space SHOULD NOT be lessened + (i.e., dedicated buffers taken away from this association); + however, an endpoint MAY change the value of a_rwnd it sends in SACK chunks. */ + + u32 smallest_PMTU; /** The smallest PMTU discovered for all of the peer's transport addresses. */ + u32 rcv_a_rwnd; /**< LOCAL max seg size that includes options. To be updated by congestion algos, etc. */ u32 snd_a_rwnd; /**< REMOTE max seg size that includes options. To be updated if peer pushes back on window, etc.*/ + + u32 rtt_ts; + u32 rtt_seq; + sctp_options_t rcv_opts; sctp_options_t snd_opts; - u32 snd_hdr_length; /**< BASE HEADER LENGTH for the DATA chunk when sending */ + u32 snd_hdr_length; /**< BASE HEADER LENGTH for the DATA chunk when sending */ u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */ + } sctp_connection_t; -typedef void (timer_expiration_handler) (u32 index); +typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id); sctp_connection_t *sctp_connection_new (u8 thread_index); void sctp_sub_connection_add_ip4 (u8 thread_index, @@ -155,7 +223,7 @@ clib_error_t *sctp_init (vlib_main_t * vm); void sctp_connection_timers_init (sctp_connection_t * tc); void sctp_connection_timers_reset (sctp_connection_t * tc); void sctp_init_snd_vars (sctp_connection_t * tc); -void sctp_connection_init_vars (sctp_connection_t * tc); +void sctp_init_mss (sctp_connection_t * tc); void sctp_prepare_initack_chunk (sctp_connection_t * ts, vlib_buffer_t * b, ip4_address_t * ip4_addr, @@ -522,6 +590,7 @@ sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID) tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], sub->timers[timer_id]); + tc->sub_conn[conn_idx].timers[timer_id] = tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], sub->c_c_index, timer_id, interval); diff --git a/src/vnet/sctp/sctp_debug.h b/src/vnet/sctp/sctp_debug.h index b422d199640..b0059d5fe67 100644 --- a/src/vnet/sctp/sctp_debug.h +++ b/src/vnet/sctp/sctp_debug.h @@ -59,4 +59,11 @@ typedef enum _sctp_dbg #define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) #endif +#define SCTP_CONN_TRACKING_DEBUG (0) +#if SCTP_CONN_TRACKING_DEBUG +#define SCTP_CONN_TRACKING_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_CONN_TRACKING_DBG(_fmt, _args...) +#endif + #endif /* included_sctp_debug_h__ */ diff --git a/src/vnet/sctp/sctp_input.c b/src/vnet/sctp/sctp_input.c index 4e5ea9194e8..a1bcb2b0ae3 100644 --- a/src/vnet/sctp/sctp_input.c +++ b/src/vnet/sctp/sctp_input.c @@ -122,17 +122,18 @@ phase_to_string (u8 phase) _ (PUNT_PHASE, "ip6-punt") static u8 -sctp_lookup_is_valid (transport_connection_t * t_conn, +sctp_lookup_is_valid (transport_connection_t * trans_conn, sctp_header_t * sctp_hdr) { - sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (t_conn); + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); if (!sctp_conn) return 1; - u8 is_valid = (t_conn->lcl_port == sctp_hdr->dst_port + u8 is_valid = (trans_conn->lcl_port == sctp_hdr->dst_port && (sctp_conn->state == SCTP_STATE_CLOSED - || t_conn->rmt_port == sctp_hdr->src_port)); + || trans_conn->rmt_port == sctp_hdr->src_port)); return is_valid; } @@ -146,7 +147,7 @@ sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, { sctp_main_t *tm = vnet_get_sctp_main (); sctp_header_t *sctp_hdr; - transport_connection_t *tconn; + transport_connection_t *trans_conn; sctp_connection_t *sctp_conn; u8 is_filtered, i; if (is_ip4) @@ -154,14 +155,14 @@ sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, ip4_header_t *ip4_hdr; ip4_hdr = vlib_buffer_get_current (b); sctp_hdr = ip4_next_header (ip4_hdr); - tconn = session_lookup_connection_wt4 (fib_index, - &ip4_hdr->dst_address, - &ip4_hdr->src_address, - sctp_hdr->dst_port, - sctp_hdr->src_port, - TRANSPORT_PROTO_SCTP, - thread_index, &is_filtered); - if (tconn == 0) /* Not primary connection */ + trans_conn = session_lookup_connection_wt4 (fib_index, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (trans_conn == 0) /* Not primary connection */ { for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) { @@ -172,28 +173,28 @@ sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, connection.rmt_ip.ip4.as_u32 == ip4_hdr->src_address.as_u32)) { - tconn = + trans_conn = &tm->connections[thread_index]->sub_conn[i].connection; break; } } } - ASSERT (tconn != 0); - ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr)); + ASSERT (trans_conn != 0); + ASSERT (sctp_lookup_is_valid (trans_conn, sctp_hdr)); } else { ip6_header_t *ip6_hdr; ip6_hdr = vlib_buffer_get_current (b); sctp_hdr = ip6_next_header (ip6_hdr); - tconn = session_lookup_connection_wt6 (fib_index, - &ip6_hdr->dst_address, - &ip6_hdr->src_address, - sctp_hdr->dst_port, - sctp_hdr->src_port, - TRANSPORT_PROTO_SCTP, - thread_index, &is_filtered); - if (tconn == 0) /* Not primary connection */ + trans_conn = session_lookup_connection_wt6 (fib_index, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (trans_conn == 0) /* Not primary connection */ { for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) { @@ -210,16 +211,16 @@ sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, sub_conn[i].connection.rmt_ip.ip6.as_u64[1] == ip6_hdr->src_address.as_u64[1])) { - tconn = + trans_conn = &tm->connections[thread_index]->sub_conn[i].connection; break; } } } - ASSERT (tconn != 0); - ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr)); + ASSERT (trans_conn != 0); + ASSERT (sctp_lookup_is_valid (trans_conn, sctp_hdr)); } - sctp_conn = sctp_get_connection_from_transport (tconn); + sctp_conn = sctp_get_connection_from_transport (trans_conn); return sctp_conn; } @@ -321,6 +322,13 @@ sctp_handle_init (sctp_header_t * sctp_hdr, /* Save the INITIATE_TAG of the remote peer for this connection: * it MUST be used for the VERIFICATION_TAG parameter in the SCTP HEADER */ sctp_conn->remote_tag = init_chunk->initiate_tag; + sctp_conn->remote_initial_tsn = + clib_net_to_host_u32 (init_chunk->initial_tsn); + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->next_tsn_expected = sctp_conn->remote_initial_tsn + 1; + SCTP_CONN_TRACKING_DBG ("sctp_conn->remote_initial_tsn = %u", + sctp_conn->remote_initial_tsn); + sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_chunk->a_rwnd); /* @@ -365,7 +373,8 @@ sctp_handle_init (sctp_header_t * sctp_hdr, { sctp_cookie_preservative_param_t *cookie_pres = (sctp_cookie_preservative_param_t *) opt_params_hdr; - sctp_conn->life_span_inc = cookie_pres->life_span_inc; + sctp_conn->peer_cookie_life_span_increment = + cookie_pres->life_span_inc; break; } case SCTP_HOSTNAME_ADDRESS_TYPE: @@ -443,6 +452,12 @@ sctp_handle_init_ack (sctp_header_t * sctp_hdr, /* remote_tag to be placed in the VERIFICATION_TAG field of the COOKIE_ECHO chunk */ sctp_conn->remote_tag = init_ack_chunk->initiate_tag; + sctp_conn->remote_initial_tsn = + clib_net_to_host_u32 (init_ack_chunk->initial_tsn); + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->next_tsn_expected = sctp_conn->remote_initial_tsn + 1; + SCTP_CONN_TRACKING_DBG ("sctp_conn->remote_initial_tsn = %u", + sctp_conn->remote_initial_tsn); sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_ack_chunk->a_rwnd); u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr); @@ -527,48 +542,50 @@ sctp_handle_init_ack (sctp_header_t * sctp_hdr, /** Enqueue data for delivery to application */ always_inline int -sctp_session_enqueue_data (sctp_connection_t * tc, vlib_buffer_t * b, +sctp_session_enqueue_data (sctp_connection_t * sctp_conn, vlib_buffer_t * b, u16 data_len, u8 conn_idx) { int written, error = SCTP_ERROR_ENQUEUED; written = - session_enqueue_stream_connection (&tc->sub_conn[conn_idx].connection, b, - 0, 1 /* queue event */ , 1); + session_enqueue_stream_connection (&sctp_conn-> + sub_conn[conn_idx].connection, b, 0, + 1 /* queue event */ , + 1); - /* Update rcv_nxt */ + /* Update next_tsn_expected */ if (PREDICT_TRUE (written == data_len)) { - tc->rcv_nxt += written; + sctp_conn->next_tsn_expected += written; SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] == DATA_LEN [%d]", - tc->sub_conn[conn_idx].connection.c_index, + sctp_conn->sub_conn[conn_idx].connection.c_index, written, data_len); } /* If more data written than expected, account for out-of-order bytes. */ else if (written > data_len) { - tc->rcv_nxt += written; + sctp_conn->next_tsn_expected += written; SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > DATA_LEN [%d]", - tc->sub_conn[conn_idx].connection.c_index, + sctp_conn->sub_conn[conn_idx].connection.c_index, written, data_len); } else if (written > 0) { /* We've written something but FIFO is probably full now */ - tc->rcv_nxt += written; + sctp_conn->next_tsn_expected += written; error = SCTP_ERROR_PARTIALLY_ENQUEUED; SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > 0 (SCTP_ERROR_PARTIALLY_ENQUEUED)", - tc->sub_conn[conn_idx].connection.c_index, written); + sctp_conn->sub_conn[conn_idx].connection.c_index, written); } else { SCTP_ADV_DBG ("CONN = %u, WRITTEN == 0 (SCTP_ERROR_FIFO_FULL)", - tc->sub_conn[conn_idx].connection.c_index); + sctp_conn->sub_conn[conn_idx].connection.c_index); return SCTP_ERROR_FIFO_FULL; } @@ -576,6 +593,30 @@ sctp_session_enqueue_data (sctp_connection_t * tc, vlib_buffer_t * b, return error; } +always_inline u8 +sctp_is_sack_delayable (sctp_connection_t * sctp_conn, u8 gapping) +{ + if (gapping != 0) + { + SCTP_CONN_TRACKING_DBG + ("gapping != 0: CONN_INDEX = %u, sctp_conn->ack_state = %u", + sctp_conn->sub_conn[idx].connection.c_index, sctp_conn->ack_state); + return 1; + } + + if (sctp_conn->ack_state >= MAX_ENQUEABLE_SACKS) + { + SCTP_CONN_TRACKING_DBG + ("sctp_conn->ack_state >= MAX_ENQUEABLE_SACKS: CONN_INDEX = %u, sctp_conn->ack_state = %u", + sctp_conn->sub_conn[idx].connection.c_index, sctp_conn->ack_state); + return 1; + } + + sctp_conn->ack_state += 1; + + return 0; +} + always_inline u16 sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk, sctp_connection_t * sctp_conn, vlib_buffer_t * b, @@ -583,6 +624,7 @@ sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk, { u32 error = 0, n_data_bytes; u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state); + u8 gapping = 0; /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ if (sctp_conn->local_tag != sctp_data_chunk->sctp_hdr.verification_tag) @@ -592,22 +634,36 @@ sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk, vnet_buffer (b)->sctp.sid = sctp_data_chunk->stream_id; vnet_buffer (b)->sctp.ssn = sctp_data_chunk->stream_seq; - vnet_buffer (b)->sctp.tsn = sctp_data_chunk->tsn; + + u32 tsn = clib_net_to_host_u32 (sctp_data_chunk->tsn); vlib_buffer_advance (b, vnet_buffer (b)->sctp.data_offset); n_data_bytes = vnet_buffer (b)->sctp.data_len; ASSERT (n_data_bytes); + if (sctp_conn->next_tsn_expected != tsn) // It means data transmission is GAPPING + { + SCTP_CONN_TRACKING_DBG + ("GAPPING: CONN_INDEX = %u, sctp_conn->next_tsn_expected = %u, tsn = %u, diff = %u", + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->next_tsn_expected, tsn, + sctp_conn->next_tsn_expected - tsn); + + gapping = 1; + } + + sctp_conn->last_rcvd_tsn = tsn; + SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data); /* In order data, enqueue. Fifo figures out by itself if any out-of-order * segments can be enqueued after fifo tail offset changes. */ error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); - sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT); - *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); - sctp_prepare_sack_chunk (sctp_conn, b); + + if (sctp_is_sack_delayable (sctp_conn, gapping) != 0) + sctp_prepare_sack_chunk (sctp_conn, b); return error; } @@ -741,23 +797,23 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; - transport_connection_t *t_conn = + transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection; - t_conn->lcl_port = sctp_hdr->dst_port; - t_conn->rmt_port = sctp_hdr->src_port; - t_conn->is_ip4 = is_ip4; + trans_conn->lcl_port = sctp_hdr->dst_port; + trans_conn->rmt_port = sctp_hdr->src_port; + trans_conn->is_ip4 = is_ip4; if (is_ip4) { - t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; - t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; + trans_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; + trans_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; } else { - clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address, + clib_memcpy (&trans_conn->lcl_ip.ip6, &ip6_hdr->dst_address, sizeof (ip6_address_t)); - clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address, + clib_memcpy (&trans_conn->rmt_ip.ip6, &ip6_hdr->src_address, sizeof (ip6_address_t)); } @@ -800,7 +856,8 @@ sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, new_sctp_conn, b0, sctp_implied_length); - sctp_connection_init_vars (new_sctp_conn); + sctp_init_mss (new_sctp_conn); + //sctp_init_snd_vars (new_sctp_conn); if (session_stream_connect_notify (&new_sctp_conn->sub_conn[idx].connection, 0)) @@ -1244,12 +1301,18 @@ vlib_node_registration_t sctp6_established_phase_node; always_inline u16 sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk, - sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b0, u16 * next0) { - *next0 = - sctp_next_output (sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX]. - connection.is_ip4); + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sack_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT); + + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].connection.is_ip4); return SCTP_ERROR_NONE; } @@ -1404,12 +1467,13 @@ sctp46_listen_process_inline (vlib_main_t * vm, case INIT: sctp_connection_timers_init (child_conn); + sctp_init_snd_vars (child_conn); + error0 = sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0, sctp_implied_length); - sctp_connection_init_vars (child_conn); - + sctp_init_mss (child_conn); if (error0 == SCTP_ERROR_NONE) { @@ -1536,25 +1600,25 @@ sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; - transport_connection_t *t_conn = + transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection; - t_conn->lcl_port = sctp_hdr->dst_port; - t_conn->rmt_port = sctp_hdr->src_port; - t_conn->is_ip4 = is_ip4; + trans_conn->lcl_port = sctp_hdr->dst_port; + trans_conn->rmt_port = sctp_hdr->src_port; + trans_conn->is_ip4 = is_ip4; sctp_conn->sub_conn[idx].parent = sctp_conn; if (is_ip4) { - t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; - t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; + trans_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; + trans_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; } else { - clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address, + clib_memcpy (&trans_conn->lcl_ip.ip6, &ip6_hdr->dst_address, sizeof (ip6_address_t)); - clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address, + clib_memcpy (&trans_conn->rmt_ip.ip6, &ip6_hdr->src_address, sizeof (ip6_address_t)); } @@ -1582,7 +1646,7 @@ sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, case SACK: error0 = sctp_handle_sack ((sctp_selective_ack_chunk_t *) sctp_hdr, - sctp_conn, b0, &next0); + sctp_conn, idx, b0, &next0); break; case HEARTBEAT: @@ -1795,7 +1859,7 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, sctp_header_t *sctp_hdr = 0; sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; sctp_connection_t *sctp_conn; - transport_connection_t *tconn; + transport_connection_t *trans_conn; ip4_header_t *ip4_hdr; ip6_header_t *ip6_hdr; u32 error0 = SCTP_ERROR_NO_LISTENER, next0 = SCTP_INPUT_NEXT_DROP; @@ -1827,14 +1891,14 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, n_data_bytes0 = clib_net_to_host_u16 (ip4_hdr->length) - n_advance_bytes0; - tconn = session_lookup_connection_wt4 (fib_index0, - &ip4_hdr->dst_address, - &ip4_hdr->src_address, - sctp_hdr->dst_port, - sctp_hdr->src_port, - TRANSPORT_PROTO_SCTP, - my_thread_index, - &is_filtered); + trans_conn = session_lookup_connection_wt4 (fib_index0, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &is_filtered); } else { @@ -1850,14 +1914,14 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, n_advance_bytes0; n_advance_bytes0 += sizeof (ip6_hdr[0]); - tconn = session_lookup_connection_wt6 (fib_index0, - &ip6_hdr->dst_address, - &ip6_hdr->src_address, - sctp_hdr->dst_port, - sctp_hdr->src_port, - TRANSPORT_PROTO_SCTP, - my_thread_index, - &is_filtered); + trans_conn = session_lookup_connection_wt6 (fib_index0, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &is_filtered); } /* Length check */ @@ -1867,7 +1931,7 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, goto done; } - sctp_conn = sctp_get_connection_from_transport (tconn); + sctp_conn = sctp_get_connection_from_transport (trans_conn); vnet_sctp_common_hdr_params_net_to_host (sctp_chunk_hdr); u8 type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); @@ -1882,7 +1946,7 @@ sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, if (PREDICT_TRUE (0 != sctp_conn)) { /* Save connection index */ - vnet_buffer (b0)->sctp.connection_index = tconn->c_index; + vnet_buffer (b0)->sctp.connection_index = trans_conn->c_index; vnet_buffer (b0)->sctp.data_offset = n_advance_bytes0; vnet_buffer (b0)->sctp.data_len = n_data_bytes0; diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c index 841444e20a3..7b22cc59ac4 100644 --- a/src/vnet/sctp/sctp_output.c +++ b/src/vnet/sctp/sctp_output.c @@ -224,16 +224,17 @@ ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, } void -sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc, +sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * sctp_sub_conn, vlib_buffer_t * b) { sctp_header_t *th = vlib_buffer_get_current (b); vlib_main_t *vm = vlib_get_main (); - if (tc->c_is_ip4) + if (sctp_sub_conn->c_is_ip4) { ip4_header_t *ih; - ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4, - &tc->c_rmt_ip4, IP_PROTOCOL_SCTP, 1); + ih = vlib_buffer_push_ip4 (vm, b, &sctp_sub_conn->c_lcl_ip4, + &sctp_sub_conn->c_rmt_ip4, IP_PROTOCOL_SCTP, + 1); th->checksum = ip4_sctp_compute_checksum (vm, b, ih); } else @@ -241,8 +242,8 @@ sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc, ip6_header_t *ih; int bogus = ~0; - ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6, - &tc->c_rmt_ip6, IP_PROTOCOL_SCTP); + ih = vlib_buffer_push_ip6 (vm, b, &sctp_sub_conn->c_lcl_ip6, + &sctp_sub_conn->c_rmt_ip6, IP_PROTOCOL_SCTP); th->checksum = ip6_sctp_compute_checksum (vm, b, ih, &bogus); ASSERT (!bogus); } @@ -473,6 +474,11 @@ sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) init_chunk->outbound_streams_count = clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + init_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("sctp_conn->local_initial_tsn = %u", + sctp_conn->local_initial_tsn); + sctp_conn->local_tag = init_chunk->initiate_tag; vnet_buffer (b)->sctp.connection_index = sub_conn->c_c_index; @@ -495,7 +501,8 @@ sctp_compute_mac () } void -sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, + vlib_buffer_t * b) { vlib_main_t *vm = vlib_get_main (); u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK); @@ -515,18 +522,21 @@ sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) vlib_buffer_push_uninit (b, alloc_bytes); cookie_ack_chunk->sctp_hdr.checksum = 0; - cookie_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - cookie_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - cookie_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + cookie_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + cookie_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK); vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } void -sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b, +sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, + vlib_buffer_t * b, sctp_state_cookie_param_t * sc) { vlib_main_t *vm = vlib_get_main (); @@ -545,23 +555,23 @@ sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b, vlib_buffer_push_uninit (b, alloc_bytes); cookie_echo_chunk->sctp_hdr.checksum = 0; cookie_echo_chunk->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; cookie_echo_chunk->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - cookie_echo_chunk->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_echo_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO); vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len); clib_memcpy (&(cookie_echo_chunk->cookie), sc, sizeof (sctp_state_cookie_param_t)); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } /** * Convert buffer to INIT-ACK */ void -sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, +sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b, ip4_address_t * ip4_addr, ip6_address_t * ip6_addr) { @@ -588,7 +598,7 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; } - if (tc->sub_conn[idx].connection.is_ip4) + if (sctp_conn->sub_conn[idx].connection.is_ip4) alloc_bytes += sizeof (sctp_ipv4_addr_param_t); else alloc_bytes += sizeof (sctp_ipv6_addr_param_t); @@ -649,11 +659,11 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; } - if (tc->sub_conn[idx].connection.is_ip4) + if (sctp_conn->sub_conn[idx].connection.is_ip4) { ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; ip4_param->address.as_u32 = - tc->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32; pointer_offset += sizeof (sctp_ipv4_addr_param_t); } @@ -661,44 +671,49 @@ sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, { ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; ip6_param->address.as_u64[0] = - tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; ip6_param->address.as_u64[1] = - tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; pointer_offset += sizeof (sctp_ipv6_addr_param_t); } /* src_port & dst_port are already in network byte-order */ init_ack_chunk->sctp_hdr.checksum = 0; - init_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - init_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - /* the tc->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ - init_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + init_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + init_ack_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u", + init_ack_chunk->initial_tsn); vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); init_ack_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); - /* As per RFC 4960, the initial_tsn may be the same value as the initiate_tag */ - init_ack_chunk->initial_tsn = init_ack_chunk->initiate_tag; + init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); init_ack_chunk->inboud_streams_count = clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); init_ack_chunk->outbound_streams_count = clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); - tc->local_tag = init_ack_chunk->initiate_tag; + sctp_conn->local_tag = init_ack_chunk->initiate_tag; vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } /** * Convert buffer to SHUTDOWN */ void -sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) { vlib_main_t *vm = vlib_get_main (); u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); @@ -718,30 +733,32 @@ sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b) shutdown_chunk->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ - shutdown_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - shutdown_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - shutdown_chunk->sctp_hdr.verification_tag = tc->remote_tag; + shutdown_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + shutdown_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_chunk->chunk_hdr, SHUTDOWN); vnet_sctp_set_chunk_length (&shutdown_chunk->chunk_hdr, chunk_len); - shutdown_chunk->cumulative_tsn_ack = tc->rcv_las; + shutdown_chunk->cumulative_tsn_ack = sctp_conn->last_rcvd_tsn; vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } /* * Send SHUTDOWN */ void -sctp_send_shutdown (sctp_connection_t * tc) +sctp_send_shutdown (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); - if (sctp_check_outstanding_data_chunks (tc) > 0) + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) return; if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) @@ -749,18 +766,20 @@ sctp_send_shutdown (sctp_connection_t * tc) b = vlib_get_buffer (vm, bi); sctp_init_buffer (vm, b); - sctp_prepare_shutdown_chunk (tc, b); + sctp_prepare_shutdown_chunk (sctp_conn, b); u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_output_now (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); } /** * Convert buffer to SHUTDOWN_ACK */ void -sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn, + vlib_buffer_t * b) { u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); @@ -774,30 +793,30 @@ sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) shutdown_ack_chunk->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ shutdown_ack_chunk->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; shutdown_ack_chunk->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - shutdown_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_ack_chunk->chunk_hdr, SHUTDOWN_ACK); vnet_sctp_set_chunk_length (&shutdown_ack_chunk->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } /* * Send SHUTDOWN_ACK */ void -sctp_send_shutdown_ack (sctp_connection_t * tc) +sctp_send_shutdown_ack (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; sctp_main_t *tm = vnet_get_sctp_main (); vlib_main_t *vm = vlib_get_main (); - if (sctp_check_outstanding_data_chunks (tc) > 0) + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) return; if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) @@ -805,22 +824,23 @@ sctp_send_shutdown_ack (sctp_connection_t * tc) b = vlib_get_buffer (vm, bi); sctp_init_buffer (vm, b); - sctp_prepare_shutdown_ack_chunk (tc, b); + sctp_prepare_shutdown_ack_chunk (sctp_conn, b); u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ - sctp_timer_set (tc, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT); - tc->state = SCTP_STATE_SHUTDOWN_ACK_SENT; + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT); + sctp_conn->state = SCTP_STATE_SHUTDOWN_ACK_SENT; } /** * Convert buffer to SACK */ void -sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) { vlib_main_t *vm = vlib_get_main (); u8 idx = sctp_pick_conn_idx_on_chunk (SACK); @@ -839,21 +859,84 @@ sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) sctp_selective_ack_chunk_t *sack = vlib_buffer_push_uninit (b, alloc_bytes); sack->sctp_hdr.checksum = 0; - sack->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - sack->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - sack->sctp_hdr.verification_tag = tc->remote_tag; + sack->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + sack->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + sack->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&sack->chunk_hdr, SACK); vnet_sctp_set_chunk_length (&sack->chunk_hdr, chunk_len); + sack->cumulative_tsn_ack = sctp_conn->next_tsn_expected; + + sctp_conn->ack_state = 0; + + vnet_buffer (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; +} + +/** + * Convert buffer to HEARTBEAT + */ +void +sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + u8 idx = sctp_pick_conn_idx_on_chunk (HEARTBEAT); + u16 alloc_bytes = sizeof (sctp_hb_req_chunk_t); + + b = sctp_reuse_buffer (vm, b); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_hb_req_chunk_t *hb_req = vlib_buffer_push_uninit (b, alloc_bytes); + + hb_req->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + hb_req->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + hb_req->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + hb_req->sctp_hdr.verification_tag = sctp_conn->remote_tag; + hb_req->hb_info.param_hdr.type = clib_host_to_net_u16 (1); + hb_req->hb_info.param_hdr.length = + clib_host_to_net_u16 (sizeof (hb_req->hb_info.hb_info)); + + vnet_sctp_set_chunk_type (&hb_req->chunk_hdr, HEARTBEAT); + vnet_sctp_set_chunk_length (&hb_req->chunk_hdr, chunk_len); + vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; +} + +void +sctp_send_heartbeat (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_heartbeat_chunk (sctp_conn, b); + + u8 idx = sctp_pick_conn_idx_on_state (SCTP_STATE_ESTABLISHED); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); } /** * Convert buffer to SHUTDOWN_COMPLETE */ void -sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc, +sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) { u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); @@ -868,20 +951,20 @@ sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc, shutdown_complete->sctp_hdr.checksum = 0; /* No need of host_to_net conversion, already in net-byte order */ shutdown_complete->sctp_hdr.src_port = - tc->sub_conn[idx].connection.lcl_port; + sctp_conn->sub_conn[idx].connection.lcl_port; shutdown_complete->sctp_hdr.dst_port = - tc->sub_conn[idx].connection.rmt_port; - shutdown_complete->sctp_hdr.verification_tag = tc->remote_tag; + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_complete->sctp_hdr.verification_tag = sctp_conn->remote_tag; vnet_sctp_set_chunk_type (&shutdown_complete->chunk_hdr, SHUTDOWN_COMPLETE); vnet_sctp_set_chunk_length (&shutdown_complete->chunk_hdr, chunk_len); vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } void -sctp_send_shutdown_complete (sctp_connection_t * tc) +sctp_send_shutdown_complete (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; @@ -893,13 +976,14 @@ sctp_send_shutdown_complete (sctp_connection_t * tc) b = vlib_get_buffer (vm, bi); sctp_init_buffer (vm, b); - sctp_prepare_shutdown_complete_chunk (tc, b); + sctp_prepare_shutdown_complete_chunk (sctp_conn, b); u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); - tc->state = SCTP_STATE_CLOSED; + sctp_conn->state = SCTP_STATE_CLOSED; } @@ -907,7 +991,7 @@ sctp_send_shutdown_complete (sctp_connection_t * tc) * Send INIT */ void -sctp_send_init (sctp_connection_t * tc) +sctp_send_init (sctp_connection_t * sctp_conn) { vlib_buffer_t *b; u32 bi; @@ -921,20 +1005,20 @@ sctp_send_init (sctp_connection_t * tc) u8 idx = sctp_pick_conn_idx_on_chunk (INIT); sctp_init_buffer (vm, b); - sctp_prepare_init_chunk (tc, b); + sctp_prepare_init_chunk (sctp_conn, b); /* Measure RTT with this */ - tc->rtt_ts = sctp_time_now (); - tc->rtt_seq = tc->snd_nxt; - tc->rto_boff = 0; + sctp_conn->rtt_ts = sctp_time_now (); + sctp_conn->rtt_seq = sctp_conn->next_tsn; - sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); - sctp_enqueue_to_ip_lookup_now (vm, b, bi, tc->sub_conn[idx].c_is_ip4); + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup_now (vm, b, bi, + sctp_conn->sub_conn[idx].c_is_ip4); /* Start the T1_INIT timer */ - sctp_timer_set (tc, idx, SCTP_TIMER_T1_INIT, SCTP_RTO_INIT); + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT, SCTP_RTO_INIT); /* Change state to COOKIE_WAIT */ - tc->state = SCTP_STATE_COOKIE_WAIT; + sctp_conn->state = SCTP_STATE_COOKIE_WAIT; } always_inline u8 @@ -947,7 +1031,7 @@ sctp_in_cong_recovery (sctp_connection_t * sctp_conn) * Push SCTP header and update connection variables */ static void -sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, +sctp_push_hdr_i (sctp_connection_t * sctp_conn, vlib_buffer_t * b, sctp_state_t next_state) { u8 idx = sctp_pick_conn_idx_on_chunk (DATA); @@ -971,11 +1055,13 @@ sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, vlib_buffer_push_uninit (b, bytes_to_add); data_chunk->sctp_hdr.checksum = 0; - data_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; - data_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; - data_chunk->sctp_hdr.verification_tag = tc->remote_tag; + data_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + data_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + data_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; - data_chunk->tsn = clib_host_to_net_u32 (0); + data_chunk->tsn = clib_host_to_net_u32 (sctp_conn->next_tsn); data_chunk->stream_id = clib_host_to_net_u16 (0); data_chunk->stream_seq = clib_host_to_net_u16 (0); @@ -985,20 +1071,23 @@ sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u", b->data, b->current_data); + sctp_conn->next_tsn += data_len; + vnet_buffer (b)->sctp.connection_index = - tc->sub_conn[idx].connection.c_index; + sctp_conn->sub_conn[idx].connection.c_index; } u32 -sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +sctp_push_header (transport_connection_t * trans_conn, vlib_buffer_t * b) { - sctp_connection_t *tc = sctp_get_connection_from_transport (tconn); - sctp_push_hdr_i (tc, b, SCTP_STATE_ESTABLISHED); + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + sctp_push_hdr_i (sctp_conn, b, SCTP_STATE_ESTABLISHED); - if (tc->rtt_ts == 0 && !sctp_in_cong_recovery (tc)) + if (sctp_conn->rtt_ts == 0 && !sctp_in_cong_recovery (sctp_conn)) { - tc->rtt_ts = sctp_time_now (); - tc->rtt_seq = tc->snd_nxt; + sctp_conn->rtt_ts = sctp_time_now (); + sctp_conn->rtt_seq = sctp_conn->next_tsn; } sctp_trajectory_add_start (b0, 3); @@ -1030,7 +1119,7 @@ sctp46_output_inline (vlib_main_t * vm, u32 bi0; vlib_buffer_t *b0; sctp_header_t *sctp_hdr = 0; - sctp_connection_t *tc0; + sctp_connection_t *sctp_conn; sctp_tx_trace_t *t0; sctp_header_t *th0 = 0; u32 error0 = SCTP_ERROR_PKTS_SENT, next0 = @@ -1048,80 +1137,82 @@ sctp46_output_inline (vlib_main_t * vm, n_left_to_next -= 1; b0 = vlib_get_buffer (vm, bi0); - tc0 = sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, - my_thread_index); + sctp_conn = + sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, + my_thread_index); - if (PREDICT_FALSE (tc0 == 0)) + if (PREDICT_FALSE (sctp_conn == 0)) { error0 = SCTP_ERROR_INVALID_CONNECTION; next0 = SCTP_OUTPUT_NEXT_DROP; goto done; } - u8 idx = sctp_pick_conn_idx_on_state (tc0->state); + u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state); th0 = vlib_buffer_get_current (b0); if (is_ip4) { - ip4_header_t *th0 = vlib_buffer_push_ip4 (vm, - b0, - &tc0->sub_conn - [idx].connection. - lcl_ip.ip4, - &tc0-> - sub_conn - [idx].connection. - rmt_ip.ip4, - IP_PROTOCOL_SCTP, 1); - - u32 checksum = ip4_sctp_compute_checksum (vm, b0, th0); - - sctp_hdr = ip4_next_header (th0); + ip4_header_t *iph4 = vlib_buffer_push_ip4 (vm, + b0, + &sctp_conn->sub_conn + [idx].connection. + lcl_ip.ip4, + &sctp_conn-> + sub_conn + [idx].connection. + rmt_ip.ip4, + IP_PROTOCOL_SCTP, 1); + + u32 checksum = ip4_sctp_compute_checksum (vm, b0, iph4); + + sctp_hdr = ip4_next_header (iph4); sctp_hdr->checksum = checksum; vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; #if SCTP_DEBUG_STATE_MACHINE - packet_length = clib_net_to_host_u16 (th0->length); + packet_length = clib_net_to_host_u16 (iph4->length); #endif } else { - ip6_header_t *ih0; - ih0 = vlib_buffer_push_ip6 (vm, - b0, - &tc0->sub_conn[idx]. - connection.lcl_ip.ip6, - &tc0->sub_conn[idx]. - connection.rmt_ip.ip6, - IP_PROTOCOL_SCTP); + ip6_header_t *iph6 = vlib_buffer_push_ip6 (vm, + b0, + &sctp_conn->sub_conn + [idx]. + connection.lcl_ip. + ip6, + &sctp_conn->sub_conn + [idx]. + connection.rmt_ip. + ip6, + IP_PROTOCOL_SCTP); int bogus = ~0; - u32 checksum = ip6_sctp_compute_checksum (vm, b0, ih0, &bogus); + u32 checksum = ip6_sctp_compute_checksum (vm, b0, iph6, &bogus); ASSERT (!bogus); - sctp_hdr = ip6_next_header (ih0); + sctp_hdr = ip6_next_header (iph6); sctp_hdr->checksum = checksum; - vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; + vnet_buffer (b0)->l3_hdr_offset = (u8 *) iph6 - b0->data; vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; - th0->checksum = 0; #if SCTP_DEBUG_STATE_MACHINE - packet_length = clib_net_to_host_u16 (ih0->payload_length); + packet_length = clib_net_to_host_u16 (iph6->payload_length); #endif } u8 is_valid = - (tc0->sub_conn[idx].connection.lcl_port == + (sctp_conn->sub_conn[idx].connection.lcl_port == sctp_hdr->src_port - || tc0->sub_conn[idx].connection.lcl_port == + || sctp_conn->sub_conn[idx].connection.lcl_port == sctp_hdr->dst_port) - && (tc0->sub_conn[idx].connection.rmt_port == + && (sctp_conn->sub_conn[idx].connection.rmt_port == sctp_hdr->dst_port - || tc0->sub_conn[idx].connection.rmt_port == + || sctp_conn->sub_conn[idx].connection.rmt_port == sctp_hdr->src_port); sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; @@ -1134,13 +1225,14 @@ sctp46_output_inline (vlib_main_t * vm, "chunk_type = %u [%s], " "connection.lcl_port = %u, sctp_hdr->src_port = %u, " "connection.rmt_port = %u, sctp_hdr->dst_port = %u", - tc0->sub_conn + sctp_conn->sub_conn [idx].connection.c_index, packet_length, chunk_type, sctp_chunk_to_string (chunk_type), - tc0->sub_conn[idx].connection.lcl_port, - sctp_hdr->src_port, - tc0->sub_conn[idx].connection.rmt_port, + sctp_conn->sub_conn[idx]. + connection.lcl_port, sctp_hdr->src_port, + sctp_conn->sub_conn[idx]. + connection.rmt_port, sctp_hdr->dst_port); error0 = SCTP_ERROR_UNKOWN_CHUNK; @@ -1151,8 +1243,8 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u", - tc0->sub_conn[idx].connection.c_index, - tc0->state, sctp_state_to_string (tc0->state), + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->state, sctp_state_to_string (sctp_conn->state), sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port, full_hdr->hdr.dst_port); @@ -1160,7 +1252,7 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length); /* Let's make sure the state-machine does not send anything crazy */ - switch (tc0->state) + switch (sctp_conn->state) { case SCTP_STATE_CLOSED: { @@ -1169,7 +1261,7 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_DBG_STATE_MACHINE ("Sending the wrong chunk (%s) based on state-machine status (%s)", sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); + sctp_state_to_string (sctp_conn->state)); error0 = SCTP_ERROR_UNKOWN_CHUNK; next0 = SCTP_OUTPUT_NEXT_DROP; @@ -1185,7 +1277,7 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_DBG_STATE_MACHINE ("Sending the wrong chunk (%s) based on state-machine status (%s)", sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); + sctp_state_to_string (sctp_conn->state)); error0 = SCTP_ERROR_UNKOWN_CHUNK; next0 = SCTP_OUTPUT_NEXT_DROP; @@ -1198,29 +1290,35 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_DBG_STATE_MACHINE ("Sending the wrong chunk (%s) based on state-machine status (%s)", sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); + sctp_state_to_string (sctp_conn->state)); error0 = SCTP_ERROR_UNKOWN_CHUNK; next0 = SCTP_OUTPUT_NEXT_DROP; goto done; } /* Change state */ - tc0->state = SCTP_STATE_COOKIE_ECHOED; + sctp_conn->state = SCTP_STATE_COOKIE_ECHOED; break; default: SCTP_DBG_STATE_MACHINE ("Sending chunk (%s) based on state-machine status (%s)", sctp_chunk_to_string (chunk_type), - sctp_state_to_string (tc0->state)); + sctp_state_to_string (sctp_conn->state)); break; } if (chunk_type == SHUTDOWN) { /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ - sctp_timer_set (tc0, idx, SCTP_TIMER_T2_SHUTDOWN, + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT); - tc0->state = SCTP_STATE_SHUTDOWN_SENT; + sctp_conn->state = SCTP_STATE_SHUTDOWN_SENT; + } + + if (chunk_type == DATA) + { + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, + SCTP_RTO_INIT); } vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; @@ -1231,8 +1329,8 @@ sctp46_output_inline (vlib_main_t * vm, SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, " "NEW_STATE = %s, " "CHUNK_SENT = %s", - tc0->sub_conn[idx].connection.c_index, - sctp_state_to_string (tc0->state), + sctp_conn->sub_conn[idx].connection.c_index, + sctp_state_to_string (sctp_conn->state), sctp_chunk_to_string (chunk_type)); vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr); @@ -1251,7 +1349,7 @@ sctp46_output_inline (vlib_main_t * vm, { memset (&t0->sctp_header, 0, sizeof (t0->sctp_header)); } - clib_memcpy (&t0->sctp_connection, tc0, + clib_memcpy (&t0->sctp_connection, sctp_conn, sizeof (t0->sctp_connection)); } diff --git a/src/vnet/sctp/sctp_packet.h b/src/vnet/sctp/sctp_packet.h index 4c358db6d3b..d1fe7ab71ea 100644 --- a/src/vnet/sctp/sctp_packet.h +++ b/src/vnet/sctp/sctp_packet.h @@ -503,17 +503,6 @@ vnet_sctp_calculate_padding (u16 base_length) return (4 - base_length % 4); } -always_inline u16 -vnet_sctp_calculate_payload_data_padding (sctp_payload_data_chunk_t * p) -{ - u16 payload_length = vnet_sctp_get_chunk_length (&p->chunk_hdr) - - sizeof (p->chunk_hdr) - - sizeof (p->tsn) - - sizeof (p->stream_id) - sizeof (p->stream_seq) - sizeof (p->payload_id); - - return vnet_sctp_calculate_padding (payload_length); -} - #define DEFAULT_A_RWND 1480 #define INBOUND_STREAMS_COUNT 1 #define OUTBOUND_STREAMS_COUNT 1 -- cgit 1.2.3-korg