diff options
Diffstat (limited to 'src/plugins/sctp')
-rw-r--r-- | src/plugins/sctp/CMakeLists.txt | 38 | ||||
-rw-r--r-- | src/plugins/sctp/sctp.api | 51 | ||||
-rw-r--r-- | src/plugins/sctp/sctp.c | 1128 | ||||
-rw-r--r-- | src/plugins/sctp/sctp.h | 1019 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_all_api_h.h | 16 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_api.c | 158 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_debug.h | 69 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_error.def | 52 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_format.c | 40 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_input.c | 2532 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_msg_enum.h | 28 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_output.c | 1568 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_output_node.c | 397 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_packet.h | 1470 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_pg.c | 30 | ||||
-rw-r--r-- | src/plugins/sctp/sctp_timer.h | 29 |
16 files changed, 8625 insertions, 0 deletions
diff --git a/src/plugins/sctp/CMakeLists.txt b/src/plugins/sctp/CMakeLists.txt new file mode 100644 index 00000000000..0c28fe70524 --- /dev/null +++ b/src/plugins/sctp/CMakeLists.txt @@ -0,0 +1,38 @@ +# Copyright (c) 2019 Cisco and/or its affiliates. +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at: +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +add_vpp_plugin(sctp + SOURCES + sctp.c + sctp_api.c + sctp_pg.c + sctp_input.c + sctp_output.c + sctp_output_node.c + sctp_format.c + + API_FILES + sctp.api + + MULTIARCH_SOURCES + sctp_output_node.c + sctp_input.c + + INSTALL_HEADERS + sctp_all_api_h.h + sctp_msg_enum.h + sctp_error.def + sctp_packet.h + sctp_timer.h + sctp.h +) diff --git a/src/plugins/sctp/sctp.api b/src/plugins/sctp/sctp.api new file mode 100644 index 00000000000..fb228617216 --- /dev/null +++ b/src/plugins/sctp/sctp.api @@ -0,0 +1,51 @@ +/* + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +option version = "1.0.0"; + +/** \brief Configure SCTP source addresses, for active-open SCTP sessions + + SCTP src/dst ports are 16 bits + @param client_index - opaque cookie to identify the sender + @param context - sender context, to match reply w/ request + @param is_ipv6 - 1 for ipv6, 0 for ipv4 + @param vrf_id - fib table / vrf id for local adjacencies + @param src_address - src address that SCTP will use for this sub-conn + @param dst_address - dst address that SCTP will use for this sub-conn +*/ +autoreply define sctp_add_src_dst_connection { + u32 client_index; + u32 context; + u8 is_ipv6; + u32 vrf_id; + u8 src_address[16]; + u8 dst_address[16]; + }; + +autoreply define sctp_del_src_dst_connection { + u32 client_index; + u32 context; + u8 is_ipv6; + u32 vrf_id; + u8 src_address[16]; + u8 dst_address[16]; + }; + +autoreply define sctp_config { + u32 client_index; + u32 context; + u8 never_delay_sack; + u8 never_bundle; + };
\ No newline at end of file diff --git a/src/plugins/sctp/sctp.c b/src/plugins/sctp/sctp.c new file mode 100644 index 00000000000..14958e55d60 --- /dev/null +++ b/src/plugins/sctp/sctp.c @@ -0,0 +1,1128 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/plugin/plugin.h> +#include <vpp/app/version.h> + +#include <sctp/sctp.h> +#include <sctp/sctp_debug.h> + +sctp_main_t sctp_main; + +static u32 +sctp_connection_bind (u32 session_index, transport_endpoint_t * tep) +{ + sctp_main_t *tm = &sctp_main; + sctp_connection_t *listener; + void *iface_ip; + u32 mtu = 1460; + + pool_get (tm->listener_pool, listener); + clib_memset (listener, 0, sizeof (*listener)); + + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = + SCTP_PRIMARY_PATH_IDX; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index = + listener - tm->listener_pool; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_port = tep->port; + + /* If we are provided a sw_if_index, bind using one of its IPs */ + if (ip_is_zero (&tep->ip, 1) && tep->sw_if_index != ENDPOINT_INVALID_INDEX) + { + if ((iface_ip = ip_interface_get_first_ip (tep->sw_if_index, + tep->is_ip4))) + ip_set (&tep->ip, iface_ip, tep->is_ip4); + } + ip_copy (&listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.lcl_ip, + &tep->ip, tep->is_ip4); + + if (tep->sw_if_index != ENDPOINT_INVALID_INDEX) + mtu = tep->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (), + tep->sw_if_index, + VNET_MTU_IP4) : + vnet_sw_interface_get_mtu (vnet_get_main (), tep->sw_if_index, + VNET_MTU_IP6); + + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = mtu; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.is_ip4 = tep->is_ip4; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto = + TRANSPORT_PROTO_SCTP; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_s_index = session_index; + listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.fib_index = + tep->fib_index; + listener->state = SCTP_STATE_CLOSED; + + sctp_connection_timers_init (listener); + + return listener->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index; +} + +u32 +sctp_session_bind (u32 session_index, transport_endpoint_t * tep) +{ + return sctp_connection_bind (session_index, tep); +} + +static void +sctp_connection_unbind (u32 listener_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn; + + sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index); + + /* Poison the entry */ + if (CLIB_DEBUG > 0) + clib_memset (sctp_conn, 0xFA, sizeof (*sctp_conn)); + + pool_put_index (tm->listener_pool, listener_index); +} + +u32 +sctp_session_unbind (u32 listener_index) +{ + sctp_connection_unbind (listener_index); + return 0; +} + +void +sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) +{ + sctp_main_t *tm = &sctp_main; + if (is_ip4) + tm->punt_unknown4 = is_add; + else + tm->punt_unknown6 = is_add; +} + +static int +sctp_alloc_custom_local_endpoint (sctp_main_t * tm, ip46_address_t * lcl_addr, + u16 * lcl_port, u8 is_ip4) +{ + int index, port; + if (is_ip4) + { + index = tm->last_v4_address_rotor++; + if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses)) + tm->last_v4_address_rotor = 0; + lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32; + } + else + { + index = tm->last_v6_address_rotor++; + if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses)) + tm->last_v6_address_rotor = 0; + clib_memcpy (&lcl_addr->ip6, &tm->ip6_src_addresses[index], + sizeof (ip6_address_t)); + } + port = transport_alloc_local_port (TRANSPORT_PROTO_SCTP, lcl_addr); + if (port < 1) + { + clib_warning ("Failed to allocate src port"); + return -1; + } + *lcl_port = port; + return 0; +} + +/** + * Initialize all connection timers as invalid + */ +void +sctp_connection_timers_init (sctp_connection_t * sctp_conn) +{ + int i, j; + + /* Set all to invalid */ + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + sctp_conn->sub_conn[i].RTO = SCTP_RTO_INIT; + + for (j = 0; j < SCTP_N_TIMERS; j++) + { + sctp_conn->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID; + } + } +} + +/** + * Stop all connection timers + */ +void +sctp_connection_timers_reset (sctp_connection_t * sctp_conn) +{ + int i, j; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + for (j = 0; j < SCTP_N_TIMERS; j++) + sctp_timer_reset (sctp_conn, i, j); + } +} + +const char *sctp_fsm_states[] = { +#define _(sym, str) str, + foreach_sctp_fsm_state +#undef _ +}; + +u8 * +format_sctp_state (u8 * s, va_list * args) +{ + u32 state = va_arg (*args, u32); + + if (state < SCTP_N_STATES) + s = format (s, "%s", sctp_fsm_states[state]); + else + s = format (s, "UNKNOWN (%d (0x%x))", state, state); + return s; +} + +u8 * +format_sctp_connection_id (u8 * s, va_list * args) +{ + sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); + if (!sctp_conn) + return s; + + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (i > 0 && sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + if (sctp_conn->sub_conn[i].connection.is_ip4) + { + s = format (s, "[#%d][%s] %U:%d->%U:%d", + sctp_conn->sub_conn[i].connection.thread_index, + "S", + format_ip4_address, + &sctp_conn->sub_conn[i].connection.lcl_ip.ip4, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.lcl_port), + format_ip4_address, + &sctp_conn->sub_conn[i].connection.rmt_ip.ip4, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.rmt_port)); + } + else + { + s = format (s, "[#%d][%s] %U:%d->%U:%d", + sctp_conn->sub_conn[i].connection.thread_index, + "S", + format_ip6_address, + &sctp_conn->sub_conn[i].connection.lcl_ip.ip6, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.lcl_port), + format_ip6_address, + &sctp_conn->sub_conn[i].connection.rmt_ip.ip6, + clib_net_to_host_u16 (sctp_conn->sub_conn[i]. + connection.rmt_port)); + } + } + return s; +} + +u8 * +format_sctp_connection (u8 * s, va_list * args) +{ + sctp_connection_t *sctp_conn = va_arg (*args, sctp_connection_t *); + u32 verbose = va_arg (*args, u32); + + if (!sctp_conn) + return s; + s = format (s, "%-50U", format_sctp_connection_id, sctp_conn); + if (verbose) + { + s = format (s, "%-15U", format_sctp_state, sctp_conn->state); + if (verbose > 1) + s = format (s, "\n"); + } + + return s; +} + +/** + * Initialize connection send variables. + */ +void +sctp_init_snd_vars (sctp_connection_t * sctp_conn) +{ + u32 time_now; + /* + * We use the time to randomize iss and for setting up the initial + * timestamp. Make sure it's updated otherwise syn and ack in the + * handshake may make it look as if time has flown in the opposite + * direction for us. + */ + + sctp_set_time_now (vlib_get_thread_index ()); + time_now = sctp_time_now (); + + sctp_conn->local_initial_tsn = random_u32 (&time_now); + sctp_conn->last_unacked_tsn = sctp_conn->local_initial_tsn; + sctp_conn->next_tsn = sctp_conn->local_initial_tsn + 1; + + sctp_conn->remote_initial_tsn = 0x0; + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; +} + +always_inline sctp_connection_t * +sctp_sub_connection_add (u8 thread_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn = tm->connections[thread_index]; + + u8 subconn_idx = sctp_next_avail_subconn (sctp_conn); + + ASSERT (subconn_idx < MAX_SCTP_CONNECTIONS); + + sctp_conn->sub_conn[subconn_idx].connection.c_index = + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.c_index; + sctp_conn->sub_conn[subconn_idx].connection.thread_index = thread_index; + sctp_conn->sub_conn[subconn_idx].subconn_idx = subconn_idx; + + return sctp_conn; +} + +u8 +sctp_sub_connection_add_ip4 (vlib_main_t * vm, + ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr) +{ + sctp_connection_t *sctp_conn = sctp_sub_connection_add (vm->thread_index); + + u8 subconn_idx = sctp_next_avail_subconn (sctp_conn); + + if (subconn_idx == MAX_SCTP_CONNECTIONS) + return SCTP_ERROR_MAX_CONNECTIONS; + + clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.lcl_ip, + &lcl_addr, sizeof (lcl_addr)); + + clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.rmt_ip, + &rmt_addr, sizeof (rmt_addr)); + + sctp_conn->forming_association_changed = 1; + + return SCTP_ERROR_NONE; +} + +u8 +sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + + u32 thread_idx = vlib_get_thread_index (); + u8 i; + + ASSERT (thread_idx == 0); + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + sctp_connection_t *sctp_conn = sctp_main->connections[thread_idx]; + sctp_sub_connection_t *sub_conn = + &sctp_main->connections[thread_idx]->sub_conn[i]; + ip46_address_t *lcl_ip = + &sctp_main->connections[thread_idx]->sub_conn[i].connection.lcl_ip; + ip46_address_t *rmt_ip = + &sctp_main->connections[thread_idx]->sub_conn[i].connection.rmt_ip; + + if (!sub_conn->connection.is_ip4) + continue; + if (lcl_ip->ip4.as_u32 == lcl_addr->as_u32 && + rmt_ip->ip4.as_u32 == rmt_addr->as_u32) + { + sub_conn->state = SCTP_SUBCONN_STATE_DOWN; + sctp_conn->forming_association_changed = 1; + break; + } + } + return SCTP_ERROR_NONE; +} + +u8 +sctp_sub_connection_add_ip6 (vlib_main_t * vm, + ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr) +{ + sctp_connection_t *sctp_conn = sctp_sub_connection_add (vm->thread_index); + + u8 subconn_idx = sctp_next_avail_subconn (sctp_conn); + + if (subconn_idx == MAX_SCTP_CONNECTIONS) + return SCTP_ERROR_MAX_CONNECTIONS; + + clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.lcl_ip, + &lcl_addr, sizeof (lcl_addr)); + + clib_memcpy (&sctp_conn->sub_conn[subconn_idx].connection.rmt_ip, + &rmt_addr, sizeof (rmt_addr)); + + sctp_conn->forming_association_changed = 1; + + return SCTP_ERROR_NONE; +} + +u8 +sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + + u32 thread_idx = vlib_get_thread_index (); + u8 i; + + ASSERT (thread_idx == 0); + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + sctp_connection_t *sctp_conn = sctp_main->connections[thread_idx]; + sctp_sub_connection_t *sub_conn = + &sctp_main->connections[thread_idx]->sub_conn[i]; + ip46_address_t *lcl_ip = + &sctp_main->connections[thread_idx]->sub_conn[i].connection.lcl_ip; + ip46_address_t *rmt_ip = + &sctp_main->connections[thread_idx]->sub_conn[i].connection.rmt_ip; + + if (!sub_conn->connection.is_ip4) + continue; + if ((lcl_ip->ip6.as_u64[0] == lcl_addr->as_u64[0] + && lcl_ip->ip6.as_u64[1] == lcl_addr->as_u64[1]) + && (rmt_ip->ip6.as_u64[0] == rmt_addr->as_u64[0] + && rmt_ip->ip6.as_u64[1] == rmt_addr->as_u64[1])) + { + sub_conn->state = SCTP_SUBCONN_STATE_DOWN; + sctp_conn->forming_association_changed = 1; + break; + } + } + return SCTP_ERROR_NONE; +} + +u8 +sctp_configure (sctp_user_configuration_t config) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + + u32 thread_idx = vlib_get_thread_index (); + + sctp_main->connections[thread_idx]->conn_config.never_delay_sack = + config.never_delay_sack; + sctp_main->connections[thread_idx]->conn_config.never_bundle = + config.never_bundle; + + return 0; +} + +sctp_connection_t * +sctp_connection_new (u8 thread_index) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn; + + pool_get (sctp_main->connections[thread_index], sctp_conn); + clib_memset (sctp_conn, 0, sizeof (*sctp_conn)); + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = + SCTP_PRIMARY_PATH_IDX; + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index = + sctp_conn - sctp_main->connections[thread_index]; + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index = thread_index; + sctp_conn->local_tag = 0; + + return sctp_conn; +} + +sctp_connection_t * +sctp_half_open_connection_new (u8 thread_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn = 0; + ASSERT (vlib_get_thread_index () == 0); + pool_get (tm->half_open_connections, sctp_conn); + clib_memset (sctp_conn, 0, sizeof (*sctp_conn)); + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index = + sctp_conn - tm->half_open_connections; + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = + SCTP_PRIMARY_PATH_IDX; + return sctp_conn; +} + +static inline int +sctp_connection_open (transport_endpoint_cfg_t * rmt) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn; + ip46_address_t lcl_addr; + u16 lcl_port; + uword thread_id; + u32 mtu = 1460; + int rv; + + u8 idx = SCTP_PRIMARY_PATH_IDX; + + /* + * Allocate local endpoint + */ + if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses)) + || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses))) + rv = sctp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port, + rmt->is_ip4); + else + rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_SCTP, + rmt, &lcl_addr, &lcl_port); + + if (rv) + return -1; + + /* + * Create connection and send INIT CHUNK + */ + thread_id = vlib_get_thread_index (); + ASSERT (thread_id == 0); + + clib_spinlock_lock_if_init (&tm->half_open_lock); + sctp_conn = sctp_half_open_connection_new (thread_id); + if (rmt->peer.sw_if_index != ENDPOINT_INVALID_INDEX) + mtu = rmt->is_ip4 ? vnet_sw_interface_get_mtu (vnet_get_main (), + rmt->peer.sw_if_index, + VNET_MTU_IP4) : + vnet_sw_interface_get_mtu (vnet_get_main (), rmt->peer.sw_if_index, + VNET_MTU_IP6); + sctp_conn->sub_conn[idx].PMTU = mtu; + + transport_connection_t *trans_conn = &sctp_conn->sub_conn[idx].connection; + ip_copy (&trans_conn->rmt_ip, &rmt->ip, rmt->is_ip4); + ip_copy (&trans_conn->lcl_ip, &lcl_addr, rmt->is_ip4); + sctp_conn->sub_conn[idx].subconn_idx = idx; + trans_conn->rmt_port = rmt->port; + trans_conn->lcl_port = clib_host_to_net_u16 (lcl_port); + trans_conn->is_ip4 = rmt->is_ip4; + trans_conn->proto = TRANSPORT_PROTO_SCTP; + trans_conn->fib_index = rmt->fib_index; + + sctp_connection_timers_init (sctp_conn); + /* The other connection vars will be initialized after INIT_ACK chunk received */ + sctp_init_snd_vars (sctp_conn); + + sctp_send_init (sctp_conn); + + clib_spinlock_unlock_if_init (&tm->half_open_lock); + + return sctp_conn->sub_conn[idx].connection.c_index; +} + +/** + * Cleans up connection state. + * + * No notifications. + */ +void +sctp_connection_cleanup (sctp_connection_t * sctp_conn) +{ + sctp_main_t *tm = &sctp_main; + u8 i; + + /* Cleanup local endpoint if this was an active connect */ + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + transport_endpoint_cleanup (TRANSPORT_PROTO_SCTP, + &sctp_conn->sub_conn[i].connection.lcl_ip, + sctp_conn->sub_conn[i].connection.lcl_port); + + int thread_index = + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.thread_index; + + /* Make sure all timers are cleared */ + sctp_connection_timers_reset (sctp_conn); + + /* Poison the entry */ + if (CLIB_DEBUG > 0) + clib_memset (sctp_conn, 0xFA, sizeof (*sctp_conn)); + pool_put (tm->connections[thread_index], sctp_conn); +} + +int +sctp_session_open (transport_endpoint_cfg_t * tep) +{ + return sctp_connection_open (tep); +} + +u16 +sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn) +{ + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (sctp_conn->sub_conn[i].is_retransmitting == 1 || + sctp_conn->sub_conn[i].enqueue_state != SCTP_ERROR_ENQUEUED) + { + SCTP_DBG_OUTPUT + ("Connection %u has still DATA to be enqueued inboud / outboud", + sctp_conn->sub_conn[i].connection.c_index); + return 1; + } + + } + return 0; /* Indicates no more data to be read/sent */ +} + +void +sctp_connection_close (sctp_connection_t * sctp_conn) +{ + SCTP_DBG ("Closing connection %u...", + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.c_index); + + sctp_conn->state = SCTP_STATE_SHUTDOWN_PENDING; + + sctp_send_shutdown (sctp_conn); +} + +void +sctp_session_close (u32 conn_index, u32 thread_index) +{ + ASSERT (thread_index == 0); + + sctp_connection_t *sctp_conn = + sctp_connection_get (conn_index, thread_index); + if (sctp_conn != NULL) + sctp_connection_close (sctp_conn); +} + +void +sctp_session_cleanup (u32 conn_index, u32 thread_index) +{ + sctp_connection_t *sctp_conn = + sctp_connection_get (conn_index, thread_index); + + if (sctp_conn != NULL) + { + sctp_connection_timers_reset (sctp_conn); + /* Wait for the session tx events to clear */ + sctp_conn->state = SCTP_STATE_CLOSED; + } +} + +/** + * Compute maximum segment size for session layer. + */ +u16 +sctp_session_send_mss (transport_connection_t * trans_conn) +{ + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + + if (sctp_conn == NULL) + { + SCTP_DBG ("sctp_conn == NULL"); + return 0; + } + + update_cwnd (sctp_conn); + update_smallest_pmtu_idx (sctp_conn); + + u8 idx = sctp_data_subconn_select (sctp_conn); + return sctp_conn->sub_conn[idx].cwnd; +} + +u16 +sctp_snd_space (sctp_connection_t * sctp_conn) +{ + /* RFC 4096 Section 6.1; point (A) */ + if (sctp_conn->peer_rwnd == 0) + return 0; + + u8 idx = sctp_data_subconn_select (sctp_conn); + + u32 available_wnd = + clib_min (sctp_conn->peer_rwnd, sctp_conn->sub_conn[idx].cwnd); + int flight_size = (int) (sctp_conn->next_tsn - sctp_conn->last_unacked_tsn); + + if (available_wnd <= flight_size) + return 0; + + /* Finally, let's subtract the DATA chunk headers overhead */ + return available_wnd - + flight_size - + sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t); +} + +/** + * Compute TX window session is allowed to fill. + */ +u32 +sctp_session_send_space (transport_connection_t * trans_conn) +{ + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + + return sctp_snd_space (sctp_conn); +} + +transport_connection_t * +sctp_session_get_transport (u32 conn_index, u32 thread_index) +{ + sctp_connection_t *sctp_conn = + sctp_connection_get (conn_index, thread_index); + + if (PREDICT_TRUE (sctp_conn != NULL)) + return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection; + + return NULL; +} + +transport_connection_t * +sctp_session_get_listener (u32 listener_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *sctp_conn; + sctp_conn = pool_elt_at_index (tm->listener_pool, listener_index); + return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection; +} + +u8 * +format_sctp_session (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + u32 thread_index = va_arg (*args, u32); + u32 verbose = va_arg (*args, u32); + sctp_connection_t *tc; + + tc = sctp_connection_get (tci, thread_index); + if (tc) + s = format (s, "%U", format_sctp_connection, tc, verbose); + else + s = format (s, "empty\n"); + return s; +} + +u8 * +format_sctp_listener_session (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + sctp_connection_t *tc = sctp_listener_get (tci); + return format (s, "%U", format_sctp_connection_id, tc); +} + +void +sctp_expired_timers_cb (u32 conn_index, u32 timer_id) +{ + sctp_connection_t *sctp_conn; + + SCTP_DBG ("%s expired", sctp_timer_to_string (timer_id)); + + sctp_conn = sctp_connection_get (conn_index, vlib_get_thread_index ()); + /* note: the connection may have already disappeared */ + if (PREDICT_FALSE (sctp_conn == 0)) + return; + + if (sctp_conn->sub_conn[conn_index].unacknowledged_hb > + SCTP_PATH_MAX_RETRANS) + { + // The remote-peer is considered to be unreachable hence shutting down + u8 i, total_subs_down = 1; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + u32 now = sctp_time_now (); + if (now > (sctp_conn->sub_conn[i].last_seen + SCTP_HB_INTERVAL)) + { + total_subs_down += 1; + sctp_conn->sub_conn[i].state = SCTP_SUBCONN_STATE_DOWN; + } + } + + if (total_subs_down == MAX_SCTP_CONNECTIONS) + { + /* Start cleanup. App wasn't notified yet so use delete notify as + * opposed to delete to cleanup session layer state. */ + session_transport_delete_notify (&sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection); + + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); + } + return; + } + + switch (timer_id) + { + case SCTP_TIMER_T1_INIT: + sctp_send_init (sctp_conn); + break; + case SCTP_TIMER_T1_COOKIE: + sctp_send_cookie_echo (sctp_conn); + break; + case SCTP_TIMER_T2_SHUTDOWN: + sctp_send_shutdown (sctp_conn); + break; + case SCTP_TIMER_T3_RXTX: + sctp_timer_reset (sctp_conn, conn_index, timer_id); + sctp_conn->flags |= SCTP_CONN_RECOVERY; + sctp_data_retransmit (sctp_conn); + break; + case SCTP_TIMER_T4_HEARTBEAT: + sctp_timer_reset (sctp_conn, conn_index, timer_id); + goto heartbeat; + } + return; + +heartbeat: + sctp_send_heartbeat (sctp_conn); +} + +static void +sctp_expired_timers_dispatch (u32 * expired_timers) +{ + int i; + u32 connection_index, timer_id; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session index and timer id */ + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + + SCTP_DBG ("Expired timer ID: %u", timer_id); + + /* Handle expiration */ + sctp_expired_timers_cb (connection_index, timer_id); + } +} + +void +sctp_initialize_timer_wheels (sctp_main_t * tm) +{ + tw_timer_wheel_16t_2w_512sl_t *tw; + /* *INDENT-OFF* */ + foreach_vlib_main (({ + tw = &tm->timer_wheels[ii]; + tw_timer_wheel_init_16t_2w_512sl (tw, sctp_expired_timers_dispatch, + 100e-3 /* timer period 100ms */ , ~0); + tw->last_run_time = vlib_time_now (this_vlib_main); + })); + /* *INDENT-ON* */ +} + +clib_error_t * +sctp_main_enable (vlib_main_t * vm) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u32 num_threads; + int thread; + sctp_connection_t *sctp_conn __attribute__ ((unused)); + u32 preallocated_connections_per_thread; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + ip4_register_protocol (IP_PROTOCOL_SCTP, sctp4_input_node.index); + ip6_register_protocol (IP_PROTOCOL_SCTP, sctp6_input_node.index); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (tm->connections, num_threads - 1); + + /* + * Preallocate connections. Assume that thread 0 won't + * use preallocated threads when running multi-core + */ + if (num_threads == 1) + { + thread = 0; + preallocated_connections_per_thread = tm->preallocated_connections; + } + else + { + thread = 1; + preallocated_connections_per_thread = + tm->preallocated_connections / (num_threads - 1); + } + for (; thread < num_threads; thread++) + { + if (preallocated_connections_per_thread) + pool_init_fixed (tm->connections[thread], + preallocated_connections_per_thread); + } + + /* Initialize per worker thread tx buffers (used for control messages) */ + vec_validate (tm->tx_buffers, num_threads - 1); + + /* Initialize timer wheels */ + vec_validate (tm->timer_wheels, num_threads - 1); + sctp_initialize_timer_wheels (tm); + + /* Initialize clocks per tick for SCTP timestamp. Used to compute + * monotonically increasing timestamps. */ + tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock + / SCTP_TSTAMP_RESOLUTION; + + if (num_threads > 1) + { + clib_spinlock_init (&tm->half_open_lock); + } + + vec_validate (tm->tx_frames[0], num_threads - 1); + vec_validate (tm->tx_frames[1], num_threads - 1); + vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1); + vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1); + + tm->bytes_per_buffer = vlib_buffer_get_default_data_size (vm); + + vec_validate (tm->time_now, num_threads - 1); + return error; +} + +clib_error_t * +sctp_transport_enable_disable (vlib_main_t * vm, u8 is_en) +{ + if (is_en) + { + if (sctp_main.is_enabled) + return 0; + + return sctp_main_enable (vm); + } + else + { + sctp_main.is_enabled = 0; + } + + return 0; +} + +transport_connection_t * +sctp_half_open_session_get_transport (u32 conn_index) +{ + sctp_connection_t *sctp_conn = sctp_half_open_connection_get (conn_index); + return &sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection; +} + +u8 * +format_sctp_half_open (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + sctp_connection_t *sctp_conn = sctp_half_open_connection_get (tci); + return format (s, "%U", format_sctp_connection_id, sctp_conn); +} + +void +sctp_update_time (f64 now, u8 thread_index) +{ + sctp_set_time_now (thread_index); + tw_timer_expire_timers_16t_2w_512sl (&sctp_main.timer_wheels[thread_index], + now); + sctp_flush_frames_to_output (thread_index); +} + +/* *INDENT-OFF* */ +static const transport_proto_vft_t sctp_proto = { + .enable = sctp_transport_enable_disable, + .start_listen = sctp_session_bind, + .stop_listen = sctp_session_unbind, + .connect = sctp_session_open, + .close = sctp_session_close, + .cleanup = sctp_session_cleanup, + .push_header = sctp_push_header, + .send_mss = sctp_session_send_mss, + .send_space = sctp_session_send_space, + .update_time = sctp_update_time, + .get_connection = sctp_session_get_transport, + .get_listener = sctp_session_get_listener, + .get_half_open = sctp_half_open_session_get_transport, + .format_connection = format_sctp_session, + .format_listener = format_sctp_listener_session, + .format_half_open = format_sctp_half_open, + .transport_options = { + .tx_type = TRANSPORT_TX_DEQUEUE, + .service_type = TRANSPORT_SERVICE_VC, + }, +}; +/* *INDENT-ON* */ + +clib_error_t * +sctp_enable_disable (vlib_main_t * vm, u8 is_en) +{ + sctp_main_t *sm = vnet_get_sctp_main (); + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + vlib_node_t *node; + + if (!sm->is_init && is_en) + { + node = vlib_get_node_by_name (vm, (u8 *) "sctp4-established"); + sm->sctp4_established_phase_node_index = node->index; + + node = vlib_get_node_by_name (vm, (u8 *) "sctp6-established"); + sm->sctp6_established_phase_node_index = node->index; + + sm->is_init = 1; + + /* Register with IP for header parsing */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_SCTP); + if (pi == 0) + return clib_error_return (0, "SCTP protocol info AWOL"); + pi->format_header = format_sctp_header; + pi->unformat_pg_edit = unformat_pg_sctp_header; + + /* Register as transport with session layer */ + transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto, + FIB_PROTOCOL_IP4, sctp4_output_node.index); + transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto, + FIB_PROTOCOL_IP6, sctp6_output_node.index); + } + + sctp_transport_enable_disable (vm, is_en); + return 0; +} + +static u8 * +sctp_format_buffer_opaque_helper (const vlib_buffer_t * b, u8 * s) +{ + sctp_buffer_opaque_t *o = sctp_buffer_opaque (b); + + s = format (s, + "sctp.connection_index: %d, sctp.sid: %d, sctp.ssn: %d, " + "sctp.tsn: %d, sctp.hdr_offset: %d", + o->sctp.connection_index, + (u32) (o->sctp.sid), + (u32) (o->sctp.ssn), + (u32) (o->sctp.tsn), (u32) (o->sctp.hdr_offset)); + vec_add1 (s, '\n'); + + s = format + (s, "sctp.data_offset: %d, sctp.data_len: %d, sctp.subconn_idx: %d, " + "sctp.flags: 0x%x", + (u32) (o->sctp.data_offset), + (u32) (o->sctp.data_len), + (u32) (o->sctp.subconn_idx), (u32) (o->sctp.flags)); + vec_add1 (s, '\n'); + return s; +} + +clib_error_t * +sctp_init (vlib_main_t * vm) +{ + sctp_main_t *sm = vnet_get_sctp_main (); + + /* Session layer, and by implication SCTP, are disabled by default */ + sm->is_enabled = 0; + sm->is_init = 0; + + /* initialize binary API */ + sctp_plugin_api_hookup (vm); + + vnet_register_format_buffer_opaque_helper + (sctp_format_buffer_opaque_helper); + return 0; +} + +VLIB_INIT_FUNCTION (sctp_init); + +static clib_error_t * +show_sctp_punt_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + sctp_main_t *tm = &sctp_main; + if (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + return clib_error_return (0, "unknown input `%U'", format_unformat_error, + input); + vlib_cli_output (vm, "IPv4 UDP punt: %s", + tm->punt_unknown4 ? "enabled" : "disabled"); + vlib_cli_output (vm, "IPv6 UDP punt: %s", + tm->punt_unknown6 ? "enabled" : "disabled"); + return 0; +} +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_tcp_punt_command, static) = +{ + .path = "show sctp punt", + .short_help = "show sctp punt", + .function = show_sctp_punt_fn, +}; +/* *INDENT-ON* */ + +static clib_error_t * +sctp_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd_arg) +{ + unformat_input_t _line_input, *line_input = &_line_input; + clib_error_t *error; + u8 is_en; + + if (!unformat_user (input, unformat_line_input, line_input)) + return clib_error_return (0, "expected enable | disable"); + + while (unformat_check_input (line_input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (line_input, "enable")) + is_en = 1; + else if (unformat (line_input, "disable")) + is_en = 0; + else + { + error = clib_error_return (0, "unknown input `%U'", + format_unformat_error, line_input); + unformat_free (line_input); + return error; + } + } + + unformat_free (line_input); + + return sctp_enable_disable (vm, is_en); +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (show_sctp_command, static) = +{ + .path = "sctp", + .short_help = "sctp [enable | disable]", + .function = sctp_fn, +}; + +/* *INDENT-OFF* */ +VLIB_PLUGIN_REGISTER () = +{ + .version = VPP_BUILD_VER, + .description = "Stream Control Transmission Protocol (SCTP)", + .default_disabled = 1, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp.h b/src/plugins/sctp/sctp.h new file mode 100644 index 00000000000..a99b01c1c0a --- /dev/null +++ b/src/plugins/sctp/sctp.h @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sctp_h +#define included_vnet_sctp_h + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <sctp/sctp_timer.h> +#include <sctp/sctp_packet.h> +#include <vnet/session/transport.h> +#include <vnet/session/session.h> + +/* SCTP buffer opaque definition */ +typedef struct +{ + struct + { + u32 connection_index; + u16 sid; /**< Stream ID */ + u16 ssn; /**< Stream Sequence Number */ + u32 tsn; /**< Transmission Sequence Number */ + u16 hdr_offset; /**< offset relative to ip hdr */ + u16 data_offset; /**< offset relative to ip hdr */ + u16 data_len; /**< data len */ + u8 subconn_idx; /**< index of the sub_connection being used */ + u8 flags; + } sctp; +} sctp_buffer_opaque_t; + +STATIC_ASSERT (sizeof (sctp_buffer_opaque_t) <= + STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused), + "sctp_buffer_opaque_t too large for vnet_buffer_opaque_t"); + +#define sctp_buffer_opaque(b) \ + ((sctp_buffer_opaque_t *)((u8 *)((b)->opaque) + \ +STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused))) + + +/* SCTP timers */ +#define foreach_sctp_timer \ + _(T1_INIT, "T1_INIT") \ + _(T1_COOKIE, "T1_COOKIE") \ + _(T2_SHUTDOWN, "T2_SHUTDOWN") \ + _(T3_RXTX, "T3_RXTX") \ + _(T4_HEARTBEAT, "T4_HB") \ + _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD") + +typedef enum _sctp_timers +{ +#define _(sym, str) SCTP_TIMER_##sym, + foreach_sctp_timer +#undef _ + SCTP_N_TIMERS +} sctp_timers_e; + +#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0) + +always_inline char * +sctp_timer_to_string (u8 timer_id) +{ + switch (timer_id) + { + case SCTP_TIMER_T1_INIT: + return "SCTP_TIMER_T1_INIT"; + case SCTP_TIMER_T1_COOKIE: + return "SCTP_TIMER_T1_COOKIE"; + case SCTP_TIMER_T2_SHUTDOWN: + return "SCTP_TIMER_T2_SHUTDOWN"; + case SCTP_TIMER_T3_RXTX: + return "SCTP_TIMER_T3_RXTX"; + case SCTP_TIMER_T4_HEARTBEAT: + return "SCTP_TIMER_T4_HEARTBEAT"; + case SCTP_TIMER_T5_SHUTDOWN_GUARD: + return "SCTP_TIMER_T5_SHUTDOWN_GUARD"; + } + return NULL; +} + +typedef enum _sctp_error +{ +#define sctp_error(n,s) SCTP_ERROR_##n, +#include <sctp/sctp_error.def> +#undef sctp_error + SCTP_N_ERROR, +} sctp_error_t; + +#define NO_FLAG 0 + +#define IS_T_BIT_SET(var) ((var) & (1)) +#define IS_E_BIT_SET(var) ((var) & (1)) +#define IS_B_BIT_SET(var) ((var) & (1<<1)) +#define IS_U_BIT_SET(var) ((var) & (1<<2)) + +#define MAX_SCTP_CONNECTIONS 8 +#define SCTP_PRIMARY_PATH_IDX 0 + +#if (VLIB_BUFFER_TRACE_TRAJECTORY) +#define sctp_trajectory_add_start(b, start) \ +{ \ + (*vlib_buffer_trace_trajectory_cb) (b, start); \ +} +#else +#define sctp_trajectory_add_start(b, start) +#endif + +enum _sctp_subconn_state +{ + SCTP_SUBCONN_STATE_DOWN = 0, + SCTP_SUBCONN_STATE_UP, + SCTP_SUBCONN_STATE_ALLOW_HB, + SCTP_SUBCONN_AWAITING_SACK, + SCTP_SUBCONN_SACK_RECEIVED +}; + +#define SCTP_INITIAL_SSHTRESH 65535 +typedef struct _sctp_sub_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + + u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */ + u32 error_count; /**< The current error count for this destination. */ + u32 error_threshold; /**< Current error threshold for this destination, + i.e. what value marks the destination down if error count reaches this value. */ + u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. */ + u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the + sender to distinguish slow-start and congestion avoidance phases. */ + + u64 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */ + + u32 RTO; /**< The current retransmission timeout value. */ + u64 SRTT; /**< The current smoothed round-trip time. */ + f64 RTTVAR; /**< The current RTT variation. */ + + u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in + congestion avoidance mode (see Section 7.2.2).*/ + + u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */ + + u16 PMTU; /**< The current known path MTU. */ + + u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */ + + u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to + this address is currently being used to compute an RTT. + If this flag is 0, the next DATA chunk sent to this destination + should be used to compute an RTT and this flag should be set. + Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd), + clear this flag. */ + + u64 last_seen; /**< The time to which this destination was last sent a packet to. + This can be used to determine if a HEARTBEAT is needed. */ + + u64 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */ + + u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had; + If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */ + + u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */ + + u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */ + +} sctp_sub_connection_t; + +typedef struct +{ + u32 a_rwnd; /**< Maximum segment size advertised */ + +} sctp_options_t; + +/* Useful macros to deal with the out_of_order_map (array of bit) */ +#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) ) +#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) ) +#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) ) + +always_inline void +_bytes_swap (void *pv, size_t n) +{ + char *p = pv; + size_t lo, hi; + for (lo = 0, hi = n - 1; hi > lo; lo++, hi--) + { + char tmp = p[lo]; + p[lo] = p[hi]; + p[hi] = tmp; + } +} + +#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x)); + +#define MAX_INFLIGHT_PACKETS 128 +#define MAX_ENQUEABLE_SACKS 2 + +/* This parameter indicates to the receiver how much increment in + * milliseconds the sender wishes the receiver to add to its default + * cookie life-span. + */ +#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000 + +typedef struct _sctp_user_configuration +{ + u8 never_delay_sack; + u8 never_bundle; + +} sctp_user_configuration_t; + +typedef struct _sctp_connection +{ + /** Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ + sctp_user_configuration_t conn_config; /**< Allows tuning of some SCTP behaviors */ + + u8 state; /**< SCTP state as per sctp_state_t */ + u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */ + + u32 local_tag; /**< INIT_TAG generated locally */ + u32 remote_tag; /**< INIT_TAG generated by the remote peer */ + + u32 local_initial_tsn; /**< Initial TSN generated locally */ + u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */ + + u32 peer_cookie_life_span_increment; + + u32 overall_err_count; /**< The overall association error count. */ + u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count + reaches will cause this association to be torn down. */ + + u8 init_retransmit_err; /**< Error counter for the INIT transmission phase */ + + u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */ + + u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk. + This is sent in the INIT or INIT ACK chunk to the peer + and incremented each time a DATA chunk is assigned a + TSN (normally just prior to transmit or during + fragmentation). */ + + u32 last_unacked_tsn; /** < Last TSN number still unacked */ + u32 next_tsn_expected; /**< The next TSN number expected to be received. */ + + u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value + is set initially by taking the peer's initial TSN, + received in the INIT or INIT ACK chunk, and + subtracting one from it. */ + + u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order + TSNs have been received (relative to the Last Rcvd TSN). + If no gaps exist, i.e., no out-of-order packets have been received, + this array will be set to all zero. */ + + u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK. + This is initialized to 0. When a packet is received it is incremented. + If this value reaches 2 or more, a SACK is sent and the value is reset to 0. + Note: This is used only when no DATA chunks are received out-of-order. + When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */ + + u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */ + + u8 overall_sending_status; /**< 0 indicates first fragment of a user message + 1 indicates normal stream + 2 indicates last fragment of a user message */ + + u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during + the life-span of the association itself. For instance, a new sub-connection might have been added. */ + + sctp_state_cookie_param_t cookie_param; /**< Temporary location to save cookie information; it can be used to + when timeout expires and sending again a COOKIE is require. */ + +} sctp_connection_t; + +typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id); + +sctp_connection_t *sctp_connection_new (u8 thread_index); + +u8 +sctp_sub_connection_add_ip4 (vlib_main_t * vm, + ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_add_ip6 (vlib_main_t * vm, + ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 sctp_configure (sctp_user_configuration_t config); + +void sctp_connection_close (sctp_connection_t * sctp_conn); +void sctp_connection_cleanup (sctp_connection_t * sctp_conn); +void sctp_connection_del (sctp_connection_t * sctp_conn); + +u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); +void sctp_send_init (sctp_connection_t * sctp_conn); +void sctp_send_cookie_echo (sctp_connection_t * sctp_conn); +void sctp_send_shutdown (sctp_connection_t * sctp_conn); +void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0); +void sctp_send_heartbeat (sctp_connection_t * sctp_conn); +void sctp_data_retransmit (sctp_connection_t * sctp_conn); +void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, + u8 is_ip4); +void sctp_flush_frames_to_output (u8 thread_index); +void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +format_function_t format_sctp_state; + +u8 *format_sctp_connection_id (u8 * s, va_list * args); +u8 *format_sctp_connection (u8 * s, va_list * args); +u8 *format_sctp_scoreboard (u8 * s, va_list * args); +u8 *format_sctp_header (u8 * s, va_list * args); +u8 *format_sctp_tx_trace (u8 * s, va_list * args); +unformat_function_t unformat_pg_sctp_header; + +clib_error_t *sctp_init (vlib_main_t * vm); +void sctp_connection_timers_init (sctp_connection_t * sctp_conn); +void sctp_connection_timers_reset (sctp_connection_t * sctp_conn); +void sctp_init_snd_vars (sctp_connection_t * sctp_conn); +void sctp_init_mss (sctp_connection_t * sctp_conn); + +void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + u8 add_ip4, ip6_address_t * ip6_addr, + u8 add_ip6); +void sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn, + u8 idx, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 err_cause); +void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 reuse_buffer); +void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); + +u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn); + +void sctp_api_reference (void); + +#define IP_PROTOCOL_SCTP 132 + +/** SSCTP FSM state definitions as per RFC4960. */ +#define foreach_sctp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(COOKIE_WAIT, "COOKIE_WAIT") \ + _(COOKIE_ECHOED, "COOKIE_ECHOED") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \ + _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \ + _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ + _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") + +typedef enum _sctp_state +{ +#define _(sym, str) SCTP_STATE_##sym, + foreach_sctp_fsm_state +#undef _ + SCTP_N_STATES +} sctp_state_t; + +always_inline char * +sctp_state_to_string (u8 state) +{ + switch (state) + { + case SCTP_STATE_CLOSED: + return "SCTP_STATE_CLOSED"; + case SCTP_STATE_COOKIE_WAIT: + return "SCTP_STATE_COOKIE_WAIT"; + case SCTP_STATE_COOKIE_ECHOED: + return "SCTP_STATE_COOKIE_ECHOED"; + case SCTP_STATE_ESTABLISHED: + return "SCTP_STATE_ESTABLISHED"; + case SCTP_STATE_SHUTDOWN_PENDING: + return "SCTP_STATE_SHUTDOWN_PENDING"; + case SCTP_STATE_SHUTDOWN_SENT: + return "SCTP_STATE_SHUTDOWN_SENT"; + case SCTP_STATE_SHUTDOWN_RECEIVED: + return "SCTP_STATE_SHUTDOWN_RECEIVED"; + case SCTP_STATE_SHUTDOWN_ACK_SENT: + return "SCTP_STATE_SHUTDOWN_ACK_SENT"; + } + return NULL; +} + +always_inline char * +sctp_chunk_to_string (u8 type) +{ + switch (type) + { + case DATA: + return "DATA"; + case INIT: + return "INIT"; + case INIT_ACK: + return "INIT_ACK"; + case SACK: + return "SACK"; + case HEARTBEAT: + return "HEARTBEAT"; + case HEARTBEAT_ACK: + return "HEARTBEAT_ACK"; + case ABORT: + return "ABORT"; + case SHUTDOWN: + return "SHUTDOWN"; + case SHUTDOWN_ACK: + return "SHUTDOWN_ACK"; + case OPERATION_ERROR: + return "OPERATION_ERROR"; + case COOKIE_ECHO: + return "COOKIE_ECHO"; + case COOKIE_ACK: + return "COOKIE_ACK"; + case ECNE: + return "ECNE"; + case CWR: + return "CWR"; + case SHUTDOWN_COMPLETE: + return "SHUTDOWN_COMPLETE"; + } + return NULL; +} + +always_inline char * +sctp_optparam_type_to_string (u8 type) +{ + switch (type) + { + case SCTP_IPV4_ADDRESS_TYPE: + return "SCTP_IPV4_ADDRESS_TYPE"; + case SCTP_IPV6_ADDRESS_TYPE: + return "SCTP_IPV6_ADDRESS_TYPE"; + case SCTP_STATE_COOKIE_TYPE: + return "SCTP_STATE_COOKIE_TYPE"; + case SCTP_UNRECOGNIZED_TYPE: + return "SCTP_UNRECOGNIZED_TYPE"; + case SCTP_COOKIE_PRESERVATIVE_TYPE: + return "SCTP_COOKIE_PRESERVATIVE_TYPE"; + case SCTP_HOSTNAME_ADDRESS_TYPE: + return "SCTP_HOSTNAME_ADDRESS_TYPE"; + case SCTP_SUPPORTED_ADDRESS_TYPES: + return "SCTP_SUPPORTED_ADDRESS_TYPES"; + } + return NULL; +} + +#define SCTP_TICK 0.001 /**< SCTP tick period (s) */ +#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */ +#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */ + +/* As per RFC4960, page 83 */ +#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */ +#define SCTP_RTO_MIN 1 * SHZ /* 1 second */ +#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */ +#define SCTP_RTO_BURST 4 +#define SCTP_RTO_ALPHA 1/8 +#define SCTP_RTO_BETA 1/4 +#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */ +#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection +#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address +#define SCTP_MAX_INIT_RETRANS 8 // number of attempts +#define SCTP_HB_INTERVAL 30 * SHZ +#define SCTP_HB_MAX_BURST 1 +#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */ +#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */ + +#define SCTP_CONN_RECOVERY 1 << 1 +#define SCTP_FAST_RECOVERY 1 << 2 + +typedef struct _sctp_lookup_dispatch +{ + u8 next, error; +} sctp_lookup_dispatch_t; + +typedef struct _sctp_main +{ + /* Per-worker thread SCTP connection pools */ + sctp_connection_t **connections; + + /* Pool of listeners. */ + sctp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + u64 *time_now; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + /** per-worker tx frames to SCTP 4/6 output nodes */ + vlib_frame_t **tx_frames[2]; + /** per-worker tx frames to ip 4/6 lookup nodes */ + vlib_frame_t **ip_lookup_tx_frames[2]; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Pool of half-open connections on which we've sent a SYN */ + sctp_connection_t *half_open_connections; + clib_spinlock_t half_open_lock; + + /* TODO: Congestion control algorithms registered */ + /* sctp_cc_algorithm_t *cc_algos; */ + + /* Flag that indicates if stack is on or off */ + u8 is_enabled; + u8 is_init; + + /** Number of preallocated connections */ + u32 preallocated_connections; + + /** Transport table (preallocation) size parameters */ + u32 local_endpoints_table_memory; + u32 local_endpoints_table_buckets; + + /** Vectors of src addresses. Optional unless one needs > 63K active-opens */ + ip4_address_t *ip4_src_addresses; + u32 last_v4_address_rotor; + u32 last_v6_address_rotor; + ip6_address_t *ip6_src_addresses; + + /** vlib buffer size */ + u32 bytes_per_buffer; + + u8 punt_unknown4; + u8 punt_unknown6; + + u32 sctp4_established_phase_node_index; + u32 sctp6_established_phase_node_index; + + u16 msg_id_base; +} sctp_main_t; + +extern sctp_main_t sctp_main; +extern vlib_node_registration_t sctp4_input_node; +extern vlib_node_registration_t sctp6_input_node; +extern vlib_node_registration_t sctp4_output_node; +extern vlib_node_registration_t sctp6_output_node; + +always_inline sctp_main_t * +vnet_get_sctp_main () +{ + return &sctp_main; +} + +always_inline sctp_header_t * +sctp_buffer_hdr (vlib_buffer_t * b) +{ + ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE); + return (sctp_header_t *) (b->data + b->current_data + + sctp_buffer_opaque (b)->sctp.hdr_offset); +} + +clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en); +clib_error_t *sctp_plugin_api_hookup (vlib_main_t * vm); + +always_inline sctp_connection_t * +sctp_half_open_connection_get (u32 conn_index) +{ + sctp_connection_t *tc = 0; + clib_spinlock_lock_if_init (&sctp_main.half_open_lock); + if (!pool_is_free_index (sctp_main.half_open_connections, conn_index)) + tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index); + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = SCTP_PRIMARY_PATH_IDX; + clib_spinlock_unlock_if_init (&sctp_main.half_open_lock); + return tc; +} + +/** + * Cleanup half-open connection + * + */ +always_inline void +sctp_half_open_connection_del (sctp_connection_t * tc) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + clib_spinlock_lock_if_init (&sctp_main->half_open_lock); + pool_put_index (sctp_main->half_open_connections, + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index); + if (CLIB_DEBUG) + clib_memset (tc, 0xFA, sizeof (*tc)); + clib_spinlock_unlock_if_init (&sctp_main->half_open_lock); +} + +always_inline u64 +sctp_set_time_now (u32 thread_index) +{ + sctp_main.time_now[thread_index] = clib_cpu_time_now () + * sctp_main.tstamp_ticks_per_clock; + return sctp_main.time_now[thread_index]; +} + +always_inline void +sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + ASSERT (tc->sub_conn[conn_idx].timers[timer_id] == + SCTP_TIMER_HANDLE_INVALID); + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + sub->timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline void +sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id) +{ + ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ()); + if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID) + return; + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID; +} + +/** + * Try to cleanup half-open connection + * + * If called from a thread that doesn't own tc, the call won't have any + * effect. + * + * @param tc - connection to be cleaned up + * @return non-zero if cleanup failed. + */ +always_inline int +sctp_half_open_connection_cleanup (sctp_connection_t * tc) +{ + /* Make sure this is the owning thread */ + if (tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index != + vlib_get_thread_index ()) + return 1; + sctp_timer_reset (tc, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T1_INIT); + sctp_half_open_connection_del (tc); + return 0; +} + +always_inline u32 +sctp_header_bytes () +{ + return sizeof (sctp_header_t); +} + +always_inline sctp_connection_t * +sctp_get_connection_from_transport (transport_connection_t * tconn) +{ + ASSERT (tconn != NULL); + + sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn; +#if SCTP_ADV_DEBUG + if (sub == NULL) + SCTP_ADV_DBG ("sub == NULL"); + if (sub->parent == NULL) + SCTP_ADV_DBG ("sub->parent == NULL"); +#endif + if (sub->subconn_idx > 0) + return (sctp_connection_t *) sub - + (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1)); + + return (sctp_connection_t *) sub; +} + +always_inline u64 +sctp_time_now (void) +{ + return sctp_main.time_now[vlib_get_thread_index ()]; +} + +#define ABS(x) ((x) > 0) ? (x) : -(x); + +always_inline void +sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx) +{ + /* See RFC4960, 6.3.1. RTO Calculation */ + u64 RTO = 0; + f64 RTTVAR = 0; + u64 now = sctp_time_now (); + u64 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts; + u64 R = prev_ts - now; + + if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO + { + sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN; + return; + } + + if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation + { + sctp_conn->sub_conn[conn_idx].SRTT = R; + RTTVAR = R / 2; + + if (RTTVAR == 0) + RTTVAR = 100e-3; /* 100 ms */ + + sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR; + } + else // C3: RTT already exists; let's recalculate + { + RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR + + SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R); + + if (RTTVAR == 0) + RTTVAR = 100e-3; /* 100 ms */ + + sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR; + + sctp_conn->sub_conn[conn_idx].SRTT = + (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT + + SCTP_RTO_ALPHA * R; + } + + RTO = + sctp_conn->sub_conn[conn_idx].SRTT + + 4 * sctp_conn->sub_conn[conn_idx].RTTVAR; + if (RTO < SCTP_RTO_MIN) // C6 + RTO = SCTP_RTO_MIN; + + if (RTO > SCTP_RTO_MAX) // C7 + RTO = SCTP_RTO_MAX; + + sctp_conn->sub_conn[conn_idx].RTO = RTO; +} + +always_inline void +sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + + tc->sub_conn[conn_idx].timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline sctp_connection_t * +sctp_listener_get (u32 tli) +{ + return pool_elt_at_index (sctp_main.listener_pool, tli); +} + +#endif + +always_inline sctp_connection_t * +sctp_connection_get (u32 conn_index, u32 thread_index) +{ + if (PREDICT_FALSE + (pool_is_free_index (sctp_main.connections[thread_index], conn_index))) + return 0; + return pool_elt_at_index (sctp_main.connections[thread_index], conn_index); +} + +#define SELECT_MAX_RETRIES 8 + +always_inline u8 +sctp_data_subconn_select (sctp_connection_t * sctp_conn) +{ + u32 sub = SCTP_PRIMARY_PATH_IDX; + u8 i, cwnd = sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].cwnd; + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (sctp_conn->sub_conn[i].cwnd > cwnd) + { + sub = i; + cwnd = sctp_conn->sub_conn[i].cwnd; + } + } + return sub; +} + +always_inline u8 +sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] == + ip6h->dst_address.as_u64[0] && + sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] == + ip6h->dst_address.as_u64[1] && + sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] == + ip6h->src_address.as_u64[0] && + sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] == + ip6h->src_address.as_u64[1]) + return i; + } + clib_warning ("Did not find a sub-connection; defaulting to %u", + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; +} + +always_inline u8 +sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 == + ip4h->dst_address.as_u32 + && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 == + ip4h->src_address.as_u32) + return i; + } + clib_warning ("Did not find a sub-connection; defaulting to %u", + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; +} + +/** + * Push SCTP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, + u8 sctp_hdr_opts_len) +{ + sctp_full_hdr_t *full_hdr; + + full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len); + + full_hdr->hdr.src_port = sp; + full_hdr->hdr.dst_port = dp; + full_hdr->hdr.checksum = 0; + return full_hdr; +} + +/** + * Push SCTP header to buffer + * + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, + u8 sctp_hdr_opts_len) +{ + return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net, + sctp_hdr_opts_len); +} + +always_inline u8 +sctp_next_avail_subconn (sctp_connection_t * sctp_conn) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + return i; + } + return MAX_SCTP_CONNECTIONS; +} + +always_inline void +update_smallest_pmtu_idx (sctp_connection_t * sctp_conn) +{ + u8 i; + u8 smallest_pmtu_index = SCTP_PRIMARY_PATH_IDX; + + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN) + { + if (sctp_conn->sub_conn[i].PMTU < + sctp_conn->sub_conn[smallest_pmtu_index].PMTU) + smallest_pmtu_index = i; + } + } + + sctp_conn->smallest_PMTU_idx = smallest_pmtu_index; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +sctp_init_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (1) */ + sctp_conn->sub_conn[i].cwnd = + clib_min (4 * sctp_conn->sub_conn[i].PMTU, + clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380)); + + /* Section 7.2.1; point (3) */ + sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH; + + /* Section 7.2.2; point (1) */ + sctp_conn->sub_conn[i].partially_acked_bytes = 0; + } +} + +always_inline u8 +sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx) +{ + return 0; +} + +always_inline u8 +cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx) +{ + if (sctp_conn->sub_conn[idx].cwnd == 0) + return 1; + return 0; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +update_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (2) */ + if (sctp_conn->sub_conn[i].is_retransmitting) + { + sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU; + continue; + } + + /* Section 7.2.2; point (4) */ + if (sctp_conn->sub_conn[i].last_data_ts > + sctp_time_now () + SCTP_DATA_IDLE_INTERVAL) + { + sctp_conn->sub_conn[i].cwnd = + clib_max (sctp_conn->sub_conn[i].cwnd / 2, + 4 * sctp_conn->sub_conn[i].PMTU); + continue; + } + + /* Section 7.2.1; point (5) */ + if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh) + { + if (!cwnd_fully_utilized (sctp_conn, i)) + continue; + + if (sctp_in_cong_recovery (sctp_conn, i)) + continue; + + sctp_conn->sub_conn[i].cwnd = + clib_min (sctp_conn->sub_conn[i].PMTU, 1); + } + + /* Section 6.1; point (D) */ + if ((inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU) < + sctp_conn->sub_conn[i].cwnd) + sctp_conn->sub_conn[i].cwnd = + inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_all_api_h.h b/src/plugins/sctp/sctp_all_api_h.h new file mode 100644 index 00000000000..16bba76239e --- /dev/null +++ b/src/plugins/sctp/sctp_all_api_h.h @@ -0,0 +1,16 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +/* Include the generated file, see BUILT_SOURCES in Makefile.am */ +#include <sctp/sctp.api.h> diff --git a/src/plugins/sctp/sctp_api.c b/src/plugins/sctp/sctp_api.c new file mode 100644 index 00000000000..7702d340aae --- /dev/null +++ b/src/plugins/sctp/sctp_api.c @@ -0,0 +1,158 @@ +/* + *------------------------------------------------------------------ + * sctp_api.c - sctp-layer API + * + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + *------------------------------------------------------------------ + */ + +#include <vlib/vlib.h> +#include <vlibapi/api.h> +#include <vlibmemory/api.h> + +#include <sctp/sctp.h> + +#include <sctp/sctp_msg_enum.h> + +#define vl_typedefs /* define message structures */ +#include <sctp/sctp_all_api_h.h> +#undef vl_typedefs + +#define vl_endianfun /* define message structures */ +#include <sctp/sctp_all_api_h.h> +#undef vl_endianfun + +/* instantiate all the print functions we know about */ +#define vl_print(handle, ...) vlib_cli_output (handle, __VA_ARGS__) +#define vl_printfun +#include <sctp/sctp_all_api_h.h> +#undef vl_printfun + +#define vl_api_version(n,v) static u32 api_version=(v); +#include <sctp/sctp_all_api_h.h> +#undef vl_api_version + +#define REPLY_MSG_ID_BASE sctp_main.msg_id_base +#include <vlibapi/api_helper_macros.h> + +#define foreach_sctp_plugin_api_msg \ +_(SCTP_ADD_SRC_DST_CONNECTION, sctp_add_src_dst_connection) \ +_(SCTP_DEL_SRC_DST_CONNECTION, sctp_del_src_dst_connection) \ +_(SCTP_CONFIG, sctp_config) + +static void + vl_api_sctp_add_src_dst_connection_t_handler + (vl_api_sctp_add_src_dst_connection_t * mp) +{ + vlib_main_t *vm = vlib_get_main (); + vl_api_sctp_add_src_dst_connection_reply_t *rmp; + int rv; + + if (mp->is_ipv6) + rv = sctp_sub_connection_add_ip6 + (vm, + (ip6_address_t *) mp->src_address, (ip6_address_t *) mp->dst_address); + else + rv = sctp_sub_connection_add_ip4 + (vm, + (ip4_address_t *) mp->src_address, (ip4_address_t *) mp->dst_address); + + REPLY_MACRO (VL_API_SCTP_ADD_SRC_DST_CONNECTION_REPLY); +} + +static void + vl_api_sctp_del_src_dst_connection_t_handler + (vl_api_sctp_del_src_dst_connection_t * mp) +{ + vl_api_sctp_del_src_dst_connection_reply_t *rmp; + int rv; + + if (mp->is_ipv6) + rv = sctp_sub_connection_del_ip6 + ((ip6_address_t *) mp->src_address, (ip6_address_t *) mp->dst_address); + else + rv = sctp_sub_connection_del_ip4 + ((ip4_address_t *) mp->src_address, (ip4_address_t *) mp->dst_address); + + REPLY_MACRO (VL_API_SCTP_ADD_SRC_DST_CONNECTION_REPLY); +} + +static void +vl_api_sctp_config_t_handler (vl_api_sctp_config_t * mp) +{ + sctp_user_configuration_t config; + vl_api_sctp_config_reply_t *rmp; + int rv; + + config.never_delay_sack = mp->never_delay_sack; + config.never_bundle = mp->never_bundle; + rv = sctp_configure (config); + + REPLY_MACRO (VL_API_SCTP_CONFIG_REPLY); +} + +#define vl_msg_name_crc_list +#include <sctp/sctp_all_api_h.h> +#undef vl_msg_name_crc_list + +static void +setup_message_id_table (sctp_main_t * sm, api_main_t * am) +{ +#define _(id,n,crc) \ + vl_msg_api_add_msg_name_crc (am, #n "_" #crc, id + sm->msg_id_base); + foreach_vl_msg_name_crc_sctp; +#undef _ +} + +clib_error_t * +sctp_plugin_api_hookup (vlib_main_t * vm) +{ + sctp_main_t *sm = &sctp_main; + api_main_t *am = &api_main; + u8 *name; + + /* Construct the API name */ + name = format (0, "sctp_%08x%c", api_version, 0); + + /* Ask for a correctly-sized block of API message decode slots */ + sctp_main.msg_id_base = vl_msg_api_get_msg_ids + ((char *) name, VL_MSG_FIRST_AVAILABLE); + +#define _(N,n) \ + vl_msg_api_set_handlers((VL_API_##N + sm->msg_id_base), \ + #n, \ + vl_api_##n##_t_handler, \ + vl_noop_handler, \ + vl_api_##n##_t_endian, \ + vl_api_##n##_t_print, \ + sizeof(vl_api_##n##_t), 1); + foreach_sctp_plugin_api_msg; +#undef _ + + /* + * Set up the (msg_name, crc, message-id) table + */ + setup_message_id_table (sm, am); + vec_free (name); + + return 0; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_debug.h b/src/plugins/sctp/sctp_debug.h new file mode 100644 index 00000000000..b0059d5fe67 --- /dev/null +++ b/src/plugins/sctp/sctp_debug.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_sctp_debug_h__ +#define included_sctp_debug_h__ + +#include <vlib/vlib.h> + +typedef enum _sctp_dbg +{ +#define _(sym, str) SCTP_DBG_##sym, + foreach_sctp_dbg_evt +#undef _ +} sctp_dbg_e; + +#define SCTP_DEBUG_STATE_MACHINE (0) +#if SCTP_DEBUG_STATE_MACHINE +#define SCTP_DBG_STATE_MACHINE(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG_STATE_MACHINE(_fmt, _args...) +#endif + +#define SCTP_DEBUG (0) +#if SCTP_DEBUG +#define SCTP_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG(_fmt, _args...) +#endif + +#define SCTP_ADV_DEBUG (0) +#if SCTP_ADV_DEBUG +#define SCTP_ADV_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_ADV_DBG(_fmt, _args...) +#endif + +#define SCTP_DEBUG_OUTPUT (0) +#if SCTP_DEBUG_OUTPUT +#define SCTP_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG_OUTPUT(_fmt, _args...) +#endif + +#define SCTP_ADV_DEBUG_OUTPUT (0) +#if SCTP_ADV_DEBUG_OUTPUT +#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) +#endif + +#define SCTP_CONN_TRACKING_DEBUG (0) +#if SCTP_CONN_TRACKING_DEBUG +#define SCTP_CONN_TRACKING_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_CONN_TRACKING_DBG(_fmt, _args...) +#endif + +#endif /* included_sctp_debug_h__ */ diff --git a/src/plugins/sctp/sctp_error.def b/src/plugins/sctp/sctp_error.def new file mode 100644 index 00000000000..b95b71a9417 --- /dev/null +++ b/src/plugins/sctp/sctp_error.def @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sctp_error (NONE, "no error") +sctp_error (WRONG_WORKER, "Wrong worker thread") +sctp_error (FILTERED, "Packets filtered") +sctp_error (PKTS_SENT, "Packets sent") +sctp_error (INVALID_CONNECTION, "Invalid connection") +sctp_error (INVALID_TAG, "Invalid verification tag") +sctp_error (INVALID_TAG_FOR_INIT, "Invalid verification tag for INIT chunk") +sctp_error (CONNECTION_CLOSED, "Connection closed") +sctp_error (ENQUEUED, "Packets pushed into rx fifo") +sctp_error (CREATE_EXISTS, "Connection already exists") +sctp_error (INITS_RCVD, "INITs received") +sctp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") +sctp_error (NO_LISTENER, "no listener for dst port") +sctp_error (LENGTH, "inconsistent ip/sctp lengths") +sctp_error (DISPATCH, "Dispatch error") +sctp_error (ACK_DUP, "Duplicate ACK") +sctp_error (DATA_CHUNK_VIOLATION, "DATA chunk received in invalid state") +sctp_error (INIT_CHUNK_VIOLATION, "INIT chunk received in the wrong state") +sctp_error (INIT_ACK_CHUNK_VIOLATION, "INIT_ACK chunk received in the wrong state") +sctp_error (SACK_CHUNK_VIOLATION, "SACK chunk received in invalid state") +sctp_error (HEARTBEAT_CHUNK_VIOLATION, "HEARTBEAT chunk received in invalid state") +sctp_error (HEARTBEAT_ACK_CHUNK_VIOLATION, "HEARTBEAT_ACK chunk received in invalid state") +sctp_error (ABORT_CHUNK_VIOLATION, "ABORT_CHUNK chunk received in invalid state") +sctp_error (SHUTDOWN_CHUNK_VIOLATION, "SHUTDOWN chunk received in invalid state") +sctp_error (SHUTDOWN_ACK_CHUNK_VIOLATION, "SHUTDOWN_ACK chunk received in invalid state") +sctp_error (OPERATION_ERROR_VIOLATION, "OPERATION_ERROR chunk received in invalid state") +sctp_error (COOKIE_ECHO_VIOLATION, "COOKIE_ECHO chunk received in invalid state") +sctp_error (COOKIE_ACK_VIOLATION, "COOKIE_ACK chunk received in invalid state") +sctp_error (ECNE_VIOLATION, "ECNE chunk received in invalid state") +sctp_error (CWR_VIOLATION, "CWR chunk received in invalid state") +sctp_error (SHUTDOWN_COMPLETE_VIOLATION, "SHUTDOWN_COMPLETE chunk received in invalid state") +sctp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +sctp_error (PARTIALLY_ENQUEUED, "Packets partially pushed into rx fifo") +sctp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +sctp_error (UNKNOWN_CHUNK, "Unrecognized / unknown chunk or chunk-state mismatch") +sctp_error (BUNDLING_VIOLATION, "Bundling not allowed") +sctp_error (PUNT, "Packets punted") +sctp_error (MAX_CONNECTIONS, "Reached max supported subconnection") diff --git a/src/plugins/sctp/sctp_format.c b/src/plugins/sctp/sctp_format.c new file mode 100644 index 00000000000..99430c70c2c --- /dev/null +++ b/src/plugins/sctp/sctp_format.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <sctp/sctp.h> + +/* Format SCTP header. */ +u8 * +format_sctp_header (u8 * s, va_list * args) +{ + return NULL; +} + +u8 * +format_sctp_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + return NULL; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_input.c b/src/plugins/sctp/sctp_input.c new file mode 100644 index 00000000000..7f52a2f9c32 --- /dev/null +++ b/src/plugins/sctp/sctp_input.c @@ -0,0 +1,2532 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/sparse_vec.h> +#include <sctp/sctp.h> +#include <sctp/sctp_packet.h> +#include <sctp/sctp_debug.h> +#include <vnet/session/session.h> +#include <math.h> + +static char *sctp_error_strings[] = { +#define sctp_error(n,s) s, +#include <sctp/sctp_error.def> +#undef sctp_error +}; + +/* All SCTP nodes have the same outgoing arcs */ +#define foreach_sctp_state_next \ + _ (DROP4, "ip4-drop") \ + _ (DROP6, "ip6-drop") \ + _ (SCTP4_OUTPUT, "sctp4-output") \ + _ (SCTP6_OUTPUT, "sctp6-output") + +typedef enum _sctp_established_phase_next +{ +#define _(s,n) SCTP_ESTABLISHED_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_ESTABLISHED_PHASE_N_NEXT, +} sctp_established_phase_next_t; + +typedef enum _sctp_rcv_phase_next +{ +#define _(s,n) SCTP_RCV_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_RCV_PHASE_N_NEXT, +} sctp_rcv_phase_next_t; + +typedef enum _sctp_listen_phase_next +{ +#define _(s,n) SCTP_LISTEN_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_LISTEN_PHASE_N_NEXT, +} sctp_listen_phase_next_t; + +typedef enum _sctp_shutdown_phase_next +{ +#define _(s,n) SCTP_SHUTDOWN_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_SHUTDOWN_PHASE_N_NEXT, +} sctp_shutdown_phase_next_t; + +/* Generic, state independent indices */ +typedef enum _sctp_state_next +{ +#define _(s,n) SCTP_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_STATE_N_NEXT, +} sctp_state_next_t; + +typedef enum _sctp_input_next +{ + SCTP_INPUT_NEXT_DROP, + SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_INPUT_NEXT_RCV_PHASE, + SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_INPUT_NEXT_PUNT_PHASE, + SCTP_INPUT_N_NEXT +} sctp_input_next_t; + +#ifndef CLIB_MARCH_VARIANT +char * +phase_to_string (u8 phase) +{ + switch (phase) + { + case SCTP_INPUT_NEXT_DROP: + return "SCTP_INPUT_NEXT_DROP"; + case SCTP_INPUT_NEXT_LISTEN_PHASE: + return "SCTP_INPUT_NEXT_LISTEN_PHASE"; + case SCTP_INPUT_NEXT_RCV_PHASE: + return "SCTP_INPUT_NEXT_RCV_PHASE"; + case SCTP_INPUT_NEXT_ESTABLISHED_PHASE: + return "SCTP_INPUT_NEXT_ESTABLISHED_PHASE"; + case SCTP_INPUT_NEXT_SHUTDOWN_PHASE: + return "SCTP_INPUT_NEXT_SHUTDOWN_PHASE"; + case SCTP_INPUT_NEXT_PUNT_PHASE: + return "SCTP_INPUT_NEXT_PUNT_PHASE"; + } + return NULL; +} +#endif /* CLIB_MARCH_VARIANT */ + +#define foreach_sctp4_input_next \ + _ (DROP, "error-drop") \ + _ (RCV_PHASE, "sctp4-rcv") \ + _ (LISTEN_PHASE, "sctp4-listen") \ + _ (ESTABLISHED_PHASE, "sctp4-established") \ + _ (SHUTDOWN_PHASE, "sctp4-shutdown") \ + _ (PUNT_PHASE, "ip4-punt") + + +#define foreach_sctp6_input_next \ + _ (DROP, "error-drop") \ + _ (RCV_PHASE, "sctp6-rcv") \ + _ (LISTEN_PHASE, "sctp6-listen") \ + _ (ESTABLISHED_PHASE, "sctp6-established") \ + _ (SHUTDOWN_PHASE, "sctp6-shutdown") \ + _ (PUNT_PHASE, "ip6-punt") + +static u8 +sctp_lookup_is_valid (transport_connection_t * trans_conn, + sctp_header_t * sctp_hdr) +{ + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + + if (!sctp_conn) + return 1; + + u8 is_valid = (trans_conn->lcl_port == sctp_hdr->dst_port + && (sctp_conn->state == SCTP_STATE_CLOSED + || trans_conn->rmt_port == sctp_hdr->src_port)); + + return is_valid; +} + +/** + * Lookup transport connection + */ +static sctp_connection_t * +sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, + u8 is_ip4) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_header_t *sctp_hdr; + transport_connection_t *trans_conn; + sctp_connection_t *sctp_conn; + u8 is_filtered, i; + if (is_ip4) + { + ip4_header_t *ip4_hdr; + ip4_hdr = vlib_buffer_get_current (b); + sctp_hdr = ip4_next_header (ip4_hdr); + trans_conn = session_lookup_connection_wt4 (fib_index, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (trans_conn == 0) /* Not primary connection */ + { + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if ((tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip4.as_u32 == + ip4_hdr->dst_address.as_u32) + && (tm->connections[thread_index]->sub_conn[i]. + connection.rmt_ip.ip4.as_u32 == + ip4_hdr->src_address.as_u32)) + { + trans_conn = + &tm->connections[thread_index]->sub_conn[i].connection; + break; + } + } + } + ASSERT (trans_conn != 0); + ASSERT (sctp_lookup_is_valid (trans_conn, sctp_hdr)); + } + else + { + ip6_header_t *ip6_hdr; + ip6_hdr = vlib_buffer_get_current (b); + sctp_hdr = ip6_next_header (ip6_hdr); + trans_conn = session_lookup_connection_wt6 (fib_index, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (trans_conn == 0) /* Not primary connection */ + { + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if ((tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip6.as_u64[0] == + ip6_hdr->dst_address.as_u64[0] + && tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip6.as_u64[1] == + ip6_hdr->dst_address.as_u64[1]) + && (tm->connections[thread_index]->sub_conn[i]. + connection.rmt_ip.ip6.as_u64[0] == + ip6_hdr->src_address.as_u64[0] + && tm->connections[thread_index]-> + sub_conn[i].connection.rmt_ip.ip6.as_u64[1] == + ip6_hdr->src_address.as_u64[1])) + { + trans_conn = + &tm->connections[thread_index]->sub_conn[i].connection; + break; + } + } + } + ASSERT (trans_conn != 0); + ASSERT (sctp_lookup_is_valid (trans_conn, sctp_hdr)); + } + sctp_conn = sctp_get_connection_from_transport (trans_conn); + return sctp_conn; +} + +typedef struct +{ + sctp_header_t sctp_header; + sctp_connection_t sctp_connection; +} sctp_rx_trace_t; + +#define sctp_next_output(is_ip4) (is_ip4 ? SCTP_NEXT_SCTP4_OUTPUT \ + : SCTP_NEXT_SCTP6_OUTPUT) + +#define sctp_next_drop(is_ip4) (is_ip4 ? SCTP_NEXT_DROP4 \ + : SCTP_NEXT_DROP6) + +static void +sctp_set_rx_trace_data (sctp_rx_trace_t * rx_trace, + sctp_connection_t * sctp_conn, + sctp_header_t * sctp_hdr, vlib_buffer_t * b0, + u8 is_ip4) +{ + if (sctp_conn) + { + clib_memcpy_fast (&rx_trace->sctp_connection, sctp_conn, + sizeof (rx_trace->sctp_connection)); + } + else + { + sctp_hdr = sctp_buffer_hdr (b0); + } + clib_memcpy_fast (&rx_trace->sctp_header, sctp_hdr, + sizeof (rx_trace->sctp_header)); +} + +always_inline u16 +sctp_calculate_implied_length (ip4_header_t * ip4_hdr, ip6_header_t * ip6_hdr, + int is_ip4) +{ + u16 sctp_implied_packet_length = 0; + + if (is_ip4) + sctp_implied_packet_length = + clib_net_to_host_u16 (ip4_hdr->length) - ip4_header_bytes (ip4_hdr); + else + sctp_implied_packet_length = + clib_net_to_host_u16 (ip6_hdr->payload_length) - sizeof (ip6_hdr); + + return sctp_implied_packet_length; +} + +always_inline u8 +sctp_is_bundling (u16 sctp_implied_length, + sctp_chunks_common_hdr_t * sctp_common_hdr) +{ + if (sctp_implied_length != + sizeof (sctp_header_t) + vnet_sctp_get_chunk_length (sctp_common_hdr)) + return 1; + return 0; +} + +always_inline u16 +sctp_handle_operation_err (sctp_header_t * sctp_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u16 * next0) +{ + sctp_operation_error_t *op_err = (sctp_operation_error_t *) sctp_hdr; + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + if (clib_net_to_host_u16 (op_err->err_causes[0].param_hdr.type) == + STALE_COOKIE_ERROR) + { + if (sctp_conn->state != SCTP_STATE_COOKIE_ECHOED) + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + else + { + sctp_connection_cleanup (sctp_conn); + + session_transport_closing_notify (&sctp_conn-> + sub_conn[idx].connection); + } + } + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_init (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_init_chunk_t *init_chunk = (sctp_init_chunk_t *) (sctp_hdr); + ip4_address_t ip4_addr; + ip6_address_t ip6_addr; + u8 add_ip4 = 0; + u8 add_ip6 = 0; + char hostname[FQDN_MAX_LENGTH]; + + /* Check the current state of the connection + * + * The logic required by the RFC4960 Section 5.2.2 is already taken care of + * in the code below and by the "sctp_prepare_initack_chunk" function. + * However, for debugging purposes it is nice to have a message printed out + * for these corner-case scenarios. + */ + if (sctp_conn->state != SCTP_STATE_CLOSED) + { /* UNEXPECTED scenario */ + switch (sctp_conn->state) + { + case SCTP_STATE_COOKIE_WAIT: + SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_WAIT state"); + sctp_prepare_initack_chunk_for_collision (sctp_conn, + SCTP_PRIMARY_PATH_IDX, + b0, &ip4_addr, &ip6_addr); + return SCTP_ERROR_NONE; + case SCTP_STATE_COOKIE_ECHOED: + case SCTP_STATE_SHUTDOWN_ACK_SENT: + SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_ECHOED state"); + if (sctp_conn->forming_association_changed == 0) + sctp_prepare_initack_chunk_for_collision (sctp_conn, + SCTP_PRIMARY_PATH_IDX, + b0, &ip4_addr, + &ip6_addr); + else + sctp_prepare_abort_for_collision (sctp_conn, + SCTP_PRIMARY_PATH_IDX, b0, + &ip4_addr, &ip6_addr); + return SCTP_ERROR_NONE; + } + } + + if (sctp_hdr->verification_tag != 0x0) + return SCTP_ERROR_INVALID_TAG_FOR_INIT; + + /* + * It is not possible to bundle any other CHUNK with the INIT chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* Save the INITIATE_TAG of the remote peer for this connection: + * it MUST be used for the VERIFICATION_TAG parameter in the SCTP HEADER */ + sctp_conn->remote_tag = init_chunk->initiate_tag; + sctp_conn->remote_initial_tsn = + clib_net_to_host_u32 (init_chunk->initial_tsn); + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->next_tsn_expected = sctp_conn->remote_initial_tsn + 1; + SCTP_CONN_TRACKING_DBG ("sctp_conn->remote_initial_tsn = %u", + sctp_conn->remote_initial_tsn); + + sctp_conn->peer_rwnd = clib_net_to_host_u32 (init_chunk->a_rwnd); + /* + * If the length specified in the INIT message is bigger than the size in bytes of our structure it means that + * optional parameters have been sent with the INIT chunk and we need to parse them. + */ + u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr); + if (length > sizeof (sctp_init_chunk_t)) + { + /* There are optional parameters in the INIT chunk */ + u16 pointer_offset = sizeof (sctp_init_chunk_t); + while (pointer_offset < length) + { + sctp_opt_params_hdr_t *opt_params_hdr = + (sctp_opt_params_hdr_t *) init_chunk + pointer_offset; + + switch (clib_net_to_host_u16 (opt_params_hdr->type)) + { + case SCTP_IPV4_ADDRESS_TYPE: + { + sctp_ipv4_addr_param_t *ipv4 = + (sctp_ipv4_addr_param_t *) opt_params_hdr; + clib_memcpy_fast (&ip4_addr, &ipv4->address, + sizeof (ip4_address_t)); + + if (sctp_sub_connection_add_ip4 (vlib_get_main (), + &sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection. + lcl_ip.ip4, + &ipv4->address) == + SCTP_ERROR_NONE) + add_ip4 = 1; + + break; + } + case SCTP_IPV6_ADDRESS_TYPE: + { + sctp_ipv6_addr_param_t *ipv6 = + (sctp_ipv6_addr_param_t *) opt_params_hdr; + clib_memcpy_fast (&ip6_addr, &ipv6->address, + sizeof (ip6_address_t)); + + if (sctp_sub_connection_add_ip6 (vlib_get_main (), + &sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection. + lcl_ip.ip6, + &ipv6->address) == + SCTP_ERROR_NONE) + add_ip6 = 1; + + break; + } + case SCTP_COOKIE_PRESERVATIVE_TYPE: + { + sctp_cookie_preservative_param_t *cookie_pres = + (sctp_cookie_preservative_param_t *) opt_params_hdr; + sctp_conn->peer_cookie_life_span_increment = + cookie_pres->life_span_inc; + break; + } + case SCTP_HOSTNAME_ADDRESS_TYPE: + { + sctp_hostname_param_t *hostname_addr = + (sctp_hostname_param_t *) opt_params_hdr; + clib_memcpy_fast (hostname, hostname_addr->hostname, + FQDN_MAX_LENGTH); + break; + } + case SCTP_SUPPORTED_ADDRESS_TYPES: + { + /* TODO */ + break; + } + } + pointer_offset += clib_net_to_host_u16 (opt_params_hdr->length); + } + } + + /* Reuse buffer to make init-ack and send */ + sctp_prepare_initack_chunk (sctp_conn, SCTP_PRIMARY_PATH_IDX, b0, &ip4_addr, + add_ip4, &ip6_addr, add_ip6); + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_is_valid_init_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_init_ack_chunk_t *init_ack_chunk = + (sctp_init_ack_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the INIT_ACK chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_init_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 sctp_implied_length) +{ + sctp_init_ack_chunk_t *init_ack_chunk = + (sctp_init_ack_chunk_t *) (sctp_hdr); + + char hostname[FQDN_MAX_LENGTH]; + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the INIT chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* Stop the T1_INIT timer */ + sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T1_INIT); + + sctp_calculate_rto (sctp_conn, idx); + + /* remote_tag to be placed in the VERIFICATION_TAG field of the COOKIE_ECHO chunk */ + sctp_conn->remote_tag = init_ack_chunk->initiate_tag; + sctp_conn->remote_initial_tsn = + clib_net_to_host_u32 (init_ack_chunk->initial_tsn); + sctp_conn->last_rcvd_tsn = sctp_conn->remote_initial_tsn; + sctp_conn->next_tsn_expected = sctp_conn->remote_initial_tsn + 1; + SCTP_CONN_TRACKING_DBG ("sctp_conn->remote_initial_tsn = %u", + sctp_conn->remote_initial_tsn); + sctp_conn->peer_rwnd = clib_net_to_host_u32 (init_ack_chunk->a_rwnd); + + u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr); + + if (length > sizeof (sctp_init_ack_chunk_t)) + /* + * There are optional parameters in the INIT ACK chunk + */ + { + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + while (pointer_offset < length) + { + sctp_opt_params_hdr_t *opt_params_hdr = + (sctp_opt_params_hdr_t *) ((char *) init_ack_chunk + + pointer_offset); + + switch (clib_net_to_host_u16 (opt_params_hdr->type)) + { + case SCTP_IPV4_ADDRESS_TYPE: + { + sctp_ipv4_addr_param_t *ipv4 = + (sctp_ipv4_addr_param_t *) opt_params_hdr; + + sctp_sub_connection_add_ip4 (vlib_get_main (), + &sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection. + lcl_ip.ip4, &ipv4->address); + + break; + } + case SCTP_IPV6_ADDRESS_TYPE: + { + sctp_ipv6_addr_param_t *ipv6 = + (sctp_ipv6_addr_param_t *) opt_params_hdr; + + sctp_sub_connection_add_ip6 (vlib_get_main (), + &sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection. + lcl_ip.ip6, &ipv6->address); + + break; + } + case SCTP_STATE_COOKIE_TYPE: + { + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) opt_params_hdr; + + clib_memcpy_fast (&(sctp_conn->cookie_param), + state_cookie_param, + sizeof (sctp_state_cookie_param_t)); + + break; + } + case SCTP_HOSTNAME_ADDRESS_TYPE: + { + sctp_hostname_param_t *hostname_addr = + (sctp_hostname_param_t *) opt_params_hdr; + clib_memcpy_fast (hostname, hostname_addr->hostname, + FQDN_MAX_LENGTH); + break; + } + case SCTP_UNRECOGNIZED_TYPE: + { + break; + } + } + u16 increment = clib_net_to_host_u16 (opt_params_hdr->length); + /* This indicates something really bad happened */ + if (increment == 0) + { + return SCTP_ERROR_INVALID_TAG; + } + pointer_offset += increment; + } + } + + sctp_prepare_cookie_echo_chunk (sctp_conn, idx, b0, 1); + + /* Start the T1_COOKIE timer */ + sctp_timer_set (sctp_conn, idx, + SCTP_TIMER_T1_COOKIE, sctp_conn->sub_conn[idx].RTO); + + return SCTP_ERROR_NONE; +} + +/** Enqueue data out-of-order for delivery to application */ +always_inline int +sctp_session_enqueue_data_ooo (sctp_connection_t * sctp_conn, + vlib_buffer_t * b, u16 data_len, u8 conn_idx) +{ + int written, error = SCTP_ERROR_ENQUEUED; + + written = + session_enqueue_stream_connection (&sctp_conn-> + sub_conn[conn_idx].connection, b, 0, + 1 /* queue event */ , + 0); + + /* Update next_tsn_expected */ + if (PREDICT_TRUE (written == data_len)) + { + sctp_conn->next_tsn_expected += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] == DATA_LEN [%d]", + sctp_conn->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + sctp_conn->next_tsn_expected += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > DATA_LEN [%d]", + sctp_conn->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + else if (written > 0) + { + /* We've written something but FIFO is probably full now */ + sctp_conn->next_tsn_expected += written; + + error = SCTP_ERROR_PARTIALLY_ENQUEUED; + + SCTP_ADV_DBG + ("CONN = %u, WRITTEN [%u] > 0 (SCTP_ERROR_PARTIALLY_ENQUEUED)", + sctp_conn->sub_conn[conn_idx].connection.c_index, written); + } + else + { + SCTP_ADV_DBG ("CONN = %u, WRITTEN == 0 (SCTP_ERROR_FIFO_FULL)", + sctp_conn->sub_conn[conn_idx].connection.c_index); + + return SCTP_ERROR_FIFO_FULL; + } + + /* TODO: Update out_of_order_map & SACK list */ + + return error; +} + +/** Enqueue data for delivery to application */ +always_inline int +sctp_session_enqueue_data (sctp_connection_t * sctp_conn, vlib_buffer_t * b, + u16 data_len, u8 conn_idx) +{ + int written, error = SCTP_ERROR_ENQUEUED; + + written = + session_enqueue_stream_connection (&sctp_conn-> + sub_conn[conn_idx].connection, b, 0, + 1 /* queue event */ , + 1); + + /* Update next_tsn_expected */ + if (PREDICT_TRUE (written == data_len)) + { + sctp_conn->next_tsn_expected += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] == DATA_LEN [%d]", + sctp_conn->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + sctp_conn->next_tsn_expected += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > DATA_LEN [%d]", + sctp_conn->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + else if (written > 0) + { + /* We've written something but FIFO is probably full now */ + sctp_conn->next_tsn_expected += written; + + error = SCTP_ERROR_PARTIALLY_ENQUEUED; + + SCTP_ADV_DBG + ("CONN = %u, WRITTEN [%u] > 0 (SCTP_ERROR_PARTIALLY_ENQUEUED)", + sctp_conn->sub_conn[conn_idx].connection.c_index, written); + } + else + { + SCTP_ADV_DBG ("CONN = %u, WRITTEN == 0 (SCTP_ERROR_FIFO_FULL)", + sctp_conn->sub_conn[conn_idx].connection.c_index); + + return SCTP_ERROR_FIFO_FULL; + } + + return error; +} + +always_inline u8 +sctp_is_sack_delayable (sctp_connection_t * sctp_conn, u8 idx, u8 is_gapping) +{ + if (sctp_conn->conn_config.never_delay_sack) + { + SCTP_CONN_TRACKING_DBG ("sctp_conn->conn_config.never_delay_sack = ON"); + return 0; + } + + /* Section 4.4 of the RFC4960 */ + if (sctp_conn->state == SCTP_STATE_SHUTDOWN_SENT) + { + SCTP_CONN_TRACKING_DBG ("sctp_conn->state = %s; SACK not delayable", + sctp_state_to_string (sctp_conn->state)); + return 0; + } + + if (is_gapping) + { + SCTP_CONN_TRACKING_DBG + ("gapping != 0: CONN_INDEX = %u, sctp_conn->ack_state = %u", + sctp_conn->sub_conn[idx].connection.c_index, sctp_conn->ack_state); + return 0; + } + + sctp_conn->ack_state += 1; + if (sctp_conn->ack_state >= MAX_ENQUEABLE_SACKS) + { + SCTP_CONN_TRACKING_DBG + ("sctp_conn->ack_state >= MAX_ENQUEABLE_SACKS: CONN_INDEX = %u, sctp_conn->ack_state = %u", + sctp_conn->sub_conn[idx].connection.c_index, sctp_conn->ack_state); + return 0; + } + + return 1; +} + +always_inline void +sctp_is_connection_gapping (sctp_connection_t * sctp_conn, u32 tsn, + u8 * gapping) +{ + if (sctp_conn->next_tsn_expected != tsn) // It means data transmission is GAPPING + { + SCTP_CONN_TRACKING_DBG + ("GAPPING: CONN_INDEX = %u, sctp_conn->next_tsn_expected = %u, tsn = %u, diff = %u", + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.c_index, + sctp_conn->next_tsn_expected, tsn, + sctp_conn->next_tsn_expected - tsn); + + *gapping = 1; + } +} + +always_inline u16 +sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk, + sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b, + u16 * next0) +{ + u32 error = 0, n_data_bytes; + u8 is_gapping = 0; + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_data_chunk->sctp_hdr.verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + sctp_conn->sub_conn[idx].enqueue_state = SCTP_ERROR_INVALID_TAG; + return sctp_conn->sub_conn[idx].enqueue_state; + } + + sctp_buffer_opaque (b)->sctp.sid = sctp_data_chunk->stream_id; + sctp_buffer_opaque (b)->sctp.ssn = sctp_data_chunk->stream_seq; + + u32 tsn = clib_net_to_host_u32 (sctp_data_chunk->tsn); + + vlib_buffer_advance (b, sctp_buffer_opaque (b)->sctp.data_offset); + u32 chunk_len = vnet_sctp_get_chunk_length (&sctp_data_chunk->chunk_hdr) - + (sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_header_t)); + + ASSERT (sctp_buffer_opaque (b)->sctp.data_len); + ASSERT (chunk_len); + + /* Padding was added: see RFC 4096 section 3.3.1 */ + if (sctp_buffer_opaque (b)->sctp.data_len > chunk_len) + { + /* Let's change the data_len to the right amount calculated here now. + * We cannot do that in the generic sctp46_input_dispatcher node since + * that is common to all CHUNKS handling. + */ + sctp_buffer_opaque (b)->sctp.data_len = chunk_len; + /* We need to change b->current_length so that downstream calls to + * session_enqueue_stream_connection (called by sctp_session_enqueue_data) + * push the correct amount of data to be enqueued. + */ + b->current_length = chunk_len; + } + n_data_bytes = sctp_buffer_opaque (b)->sctp.data_len; + + sctp_is_connection_gapping (sctp_conn, tsn, &is_gapping); + + sctp_conn->last_rcvd_tsn = tsn; + + SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data); + + u8 bbit = vnet_sctp_get_bbit (&sctp_data_chunk->chunk_hdr); + u8 ebit = vnet_sctp_get_ebit (&sctp_data_chunk->chunk_hdr); + + if (bbit == 1 && ebit == 1) /* Unfragmented message */ + { + /* In order data, enqueue. Fifo figures out by itself if any out-of-order + * segments can be enqueued after fifo tail offset changes. */ + if (PREDICT_FALSE (is_gapping == 1)) + error = + sctp_session_enqueue_data_ooo (sctp_conn, b, n_data_bytes, idx); + else + error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); + } + else if (bbit == 1 && ebit == 0) /* First piece of a fragmented user message */ + { + error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); + } + else if (bbit == 0 && ebit == 1) /* Last piece of a fragmented user message */ + { + if (PREDICT_FALSE (is_gapping == 1)) + error = + sctp_session_enqueue_data_ooo (sctp_conn, b, n_data_bytes, idx); + else + error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); + } + else /* Middle piece of a fragmented user message */ + { + if (PREDICT_FALSE (is_gapping == 1)) + error = + sctp_session_enqueue_data_ooo (sctp_conn, b, n_data_bytes, idx); + else + error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); + } + sctp_conn->last_rcvd_tsn = tsn; + + SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data); + + if (!sctp_is_sack_delayable (sctp_conn, idx, is_gapping)) + { + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); + sctp_prepare_sack_chunk (sctp_conn, idx, b); + } + else + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + + sctp_conn->sub_conn[idx].enqueue_state = error; + + return error; +} + +always_inline u16 +sctp_handle_cookie_echo (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 * next0) +{ + u64 now = sctp_time_now (); + + sctp_cookie_echo_chunk_t *cookie_echo = + (sctp_cookie_echo_chunk_t *) sctp_hdr; + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + sctp_calculate_rto (sctp_conn, idx); + + u64 creation_time = + clib_net_to_host_u64 (cookie_echo->cookie.creation_time); + u64 cookie_lifespan = + clib_net_to_host_u32 (cookie_echo->cookie.cookie_lifespan); + + if (now > creation_time + cookie_lifespan) + { + SCTP_DBG ("now (%u) > creation_time (%u) + cookie_lifespan (%u)", + now, creation_time, cookie_lifespan); + return SCTP_ERROR_COOKIE_ECHO_VIOLATION; + } + + sctp_prepare_cookie_ack_chunk (sctp_conn, idx, b0); + + /* Change state */ + sctp_conn->state = SCTP_STATE_ESTABLISHED; + sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_STATE_UP; + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); + + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT, + sctp_conn->sub_conn[idx].RTO); + + session_stream_accept_notify (&sctp_conn->sub_conn[idx].connection); + + return SCTP_ERROR_NONE; + +} + +always_inline u16 +sctp_handle_cookie_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 * next0) +{ + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + sctp_calculate_rto (sctp_conn, idx); + + sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T1_COOKIE); + /* Change state */ + sctp_conn->state = SCTP_STATE_ESTABLISHED; + sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_STATE_UP; + + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT, + sctp_conn->sub_conn[idx].RTO); + + session_stream_accept_notify (&sctp_conn->sub_conn[idx].connection); + + return SCTP_ERROR_NONE; + +} + +always_inline uword +sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn, *new_sctp_conn; + u16 sctp_implied_length = 0; + u16 error0 = SCTP_ERROR_NONE, next0 = sctp_next_drop (is_ip4); + u8 idx; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* If we are in SCTP_COOKIE_WAIT_STATE then the connection + * will come from the half-open connections pool. + */ + sctp_conn = + sctp_half_open_connection_get (sctp_buffer_opaque (b0)-> + sctp.connection_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_ADV_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto drop; + } + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr); + } + + sctp_conn->sub_conn[idx].subconn_idx = idx; + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + + sctp_chunk_hdr = + (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr); + + sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + + switch (chunk_type) + { + case INIT_ACK: + error0 = + sctp_is_valid_init_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + b0, sctp_implied_length); + + if (error0 == SCTP_ERROR_NONE) + { + pool_get (tm->connections[my_thread_index], new_sctp_conn); + clib_memcpy_fast (new_sctp_conn, sctp_conn, + sizeof (*new_sctp_conn)); + new_sctp_conn->sub_conn[idx].c_c_index = + new_sctp_conn - tm->connections[my_thread_index]; + new_sctp_conn->sub_conn[idx].c_thread_index = + my_thread_index; + new_sctp_conn->sub_conn[idx].PMTU = + sctp_conn->sub_conn[idx].PMTU; + new_sctp_conn->sub_conn[idx].subconn_idx = idx; + + if (sctp_half_open_connection_cleanup (sctp_conn)) + { + SCTP_DBG + ("Cannot cleanup half-open connection; not the owning thread"); + } + + sctp_connection_timers_init (new_sctp_conn); + + sctp_init_cwnd (new_sctp_conn); + + error0 = + sctp_handle_init_ack (sctp_hdr, sctp_chunk_hdr, + new_sctp_conn, idx, b0, + sctp_implied_length); + + if (session_stream_connect_notify + (&new_sctp_conn->sub_conn[idx].connection, 0)) + { + SCTP_DBG + ("conn_index = %u: session_stream_connect_notify error; cleaning up connection", + new_sctp_conn->sub_conn[idx].connection.c_index); + sctp_connection_cleanup (new_sctp_conn); + goto drop; + } + next0 = sctp_next_output (is_ip4); + } + break; + + case OPERATION_ERROR: + error0 = + sctp_handle_operation_err (sctp_hdr, sctp_conn, idx, b0, + &next0); + break; + + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = sctp_next_drop (is_ip4); + goto drop; + } + + if (error0 != SCTP_ERROR_NONE) + { + clib_warning ("error while parsing chunk"); + sctp_connection_cleanup (sctp_conn); + next0 = sctp_next_drop (is_ip4); + goto drop; + } + + drop: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +VLIB_NODE_FN (sctp4_rcv_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_rcv_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_init_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_rcv_phase_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +static u8 * +format_sctp_rx_trace_short (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *); + + s = format (s, "%d -> %d (%U)", + clib_net_to_host_u16 (t->sctp_header.src_port), + clib_net_to_host_u16 (t->sctp_header.dst_port), + format_sctp_state, t->sctp_connection.state); + + return s; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_rcv_phase_node) = +{ + .name = "sctp4-rcv", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_RCV_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_init_phase_node) = +{ + .name = "sctp6-rcv", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_RCV_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +always_inline u16 +sctp_handle_shutdown (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 sctp_implied_length, + u16 * next0) +{ + sctp_shutdown_association_chunk_t *shutdown_chunk = + (sctp_shutdown_association_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + switch (sctp_conn->state) + { + case SCTP_STATE_ESTABLISHED: + if (sctp_check_outstanding_data_chunks (sctp_conn) == 0) + sctp_conn->state = SCTP_STATE_SHUTDOWN_RECEIVED; + sctp_send_shutdown_ack (sctp_conn, idx, b0); + break; + + case SCTP_STATE_SHUTDOWN_SENT: + sctp_send_shutdown_ack (sctp_conn, idx, b0); + break; + } + + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_shutdown_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 sctp_implied_length, + u16 * next0) +{ + sctp_shutdown_ack_chunk_t *shutdown_ack_chunk = + (sctp_shutdown_ack_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* Whether we are in SCTP_STATE_SHUTDOWN_SENT or SCTP_STATE_SHUTDOWN_ACK_SENT + * the reception of a SHUTDOWN_ACK chunk leads to the same actions: + * - STOP T2_SHUTDOWN timer + * - SEND SHUTDOWN_COMPLETE chunk + */ + sctp_timer_reset (sctp_conn, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T2_SHUTDOWN); + + sctp_send_shutdown_complete (sctp_conn, idx, b0); + + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_shutdown_complete (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 sctp_implied_length, + u16 * next0) +{ + sctp_shutdown_complete_chunk_t *shutdown_complete = + (sctp_shutdown_complete_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_complete->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN); + + session_transport_closing_notify (&sctp_conn->sub_conn[idx].connection); + + sctp_conn->state = SCTP_STATE_CLOSED; + + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline uword +sctp46_shutdown_phase_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_rx_trace_t *sctp_trace; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn; + u16 sctp_implied_length = 0; + u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT; + u8 idx = 0; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_conn = + sctp_connection_get (sctp_buffer_opaque (b0)-> + sctp.connection_index, my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto drop; + } + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr); + } + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); + switch (chunk_type) + { + case SHUTDOWN: + error0 = + sctp_handle_shutdown (sctp_hdr, sctp_chunk_hdr, sctp_conn, + idx, b0, sctp_implied_length, &next0); + break; + + case SHUTDOWN_ACK: + error0 = + sctp_handle_shutdown_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + idx, b0, sctp_implied_length, + &next0); + break; + + case SHUTDOWN_COMPLETE: + error0 = + sctp_handle_shutdown_complete (sctp_hdr, sctp_chunk_hdr, + sctp_conn, idx, b0, + sctp_implied_length, &next0); + + sctp_connection_cleanup (sctp_conn); + break; + + /* + * DATA chunks can still be transmitted/received in the SHUTDOWN-PENDING + * and SHUTDOWN-SENT states (as per RFC4960 Section 6) + */ + case DATA: + error0 = + sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr, + sctp_conn, idx, b0, &next0); + break; + + case OPERATION_ERROR: + error0 = + sctp_handle_operation_err (sctp_hdr, sctp_conn, idx, b0, + &next0); + break; + + case COOKIE_ECHO: /* Cookie Received While Shutting Down */ + sctp_prepare_operation_error (sctp_conn, idx, b0, + COOKIE_RECEIVED_WHILE_SHUTTING_DOWN); + error0 = SCTP_ERROR_NONE; + next0 = sctp_next_output (is_ip4); + break; + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = sctp_next_drop (is_ip4); + goto drop; + } + + if (error0 != SCTP_ERROR_NONE) + { + clib_warning ("error while parsing chunk"); + sctp_connection_cleanup (sctp_conn); + next0 = sctp_next_drop (is_ip4); + goto drop; + } + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_trace = + vlib_add_trace (vm, node, b0, sizeof (*sctp_trace)); + + if (sctp_hdr != NULL) + clib_memcpy_fast (&sctp_trace->sctp_header, sctp_hdr, + sizeof (sctp_trace->sctp_header)); + + if (sctp_conn != NULL) + clib_memcpy_fast (&sctp_trace->sctp_connection, sctp_conn, + sizeof (sctp_trace->sctp_connection)); + } + + b0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; + +} + +VLIB_NODE_FN (sctp4_shutdown_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_shutdown_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_shutdown_phase_node) = +{ + .name = "sctp4-shutdown", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_shutdown_phase_node) = +{ + .name = "sctp6-shutdown", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +always_inline u16 +sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk, + sctp_connection_t * sctp_conn, u8 idx, vlib_buffer_t * b0, + u16 * next0) +{ + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sack_chunk->sctp_hdr.verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_SACK_RECEIVED; + + sctp_conn->sub_conn[idx].last_seen = sctp_time_now (); + + /* Section 7.2.2; point (2) */ + if (sctp_conn->sub_conn[idx].cwnd > sctp_conn->sub_conn[idx].ssthresh) + sctp_conn->sub_conn[idx].partially_acked_bytes = + sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack; + + /* Section 7.2.2; point (5) */ + if (sctp_conn->next_tsn - sack_chunk->cumulative_tsn_ack == 0) + sctp_conn->sub_conn[idx].partially_acked_bytes = 0; + + sctp_conn->last_unacked_tsn = sack_chunk->cumulative_tsn_ack; + + sctp_calculate_rto (sctp_conn, idx); + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, + sctp_conn->sub_conn[idx].RTO); + + sctp_conn->sub_conn[idx].RTO_pending = 0; + + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_heartbeat (sctp_hb_req_chunk_t * sctp_hb_chunk, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 * next0) +{ + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hb_chunk->sctp_hdr.verification_tag) + { + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + return SCTP_ERROR_INVALID_TAG; + } + + sctp_prepare_heartbeat_ack_chunk (sctp_conn, idx, b0); + + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].connection.is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_heartbeat_ack (sctp_hb_ack_chunk_t * sctp_hb_ack_chunk, + sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0, u16 * next0) +{ + sctp_conn->sub_conn[idx].last_seen = sctp_time_now (); + + sctp_conn->sub_conn[idx].unacknowledged_hb -= 1; + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T4_HEARTBEAT, + sctp_conn->sub_conn[idx].RTO); + + *next0 = sctp_next_drop (sctp_conn->sub_conn[idx].c_is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline void +sctp_node_inc_counter (vlib_main_t * vm, u32 sctp4_node, u32 sctp6_node, + u8 is_ip4, u8 evt, u8 val) +{ + if (PREDICT_TRUE (!val)) + return; + + if (is_ip4) + vlib_node_increment_counter (vm, sctp4_node, evt, val); + else + vlib_node_increment_counter (vm, sctp6_node, evt, val); +} + +always_inline uword +sctp46_listen_process_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + ip4_header_t *ip4_hdr; + ip6_header_t *ip6_hdr; + sctp_connection_t *child_conn; + sctp_connection_t *sctp_listener; + u16 next0 = sctp_next_drop (is_ip4), error0 = SCTP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_listener = + sctp_listener_get (sctp_buffer_opaque (b0)-> + sctp.connection_index); + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + } + + child_conn = + sctp_lookup_connection (sctp_listener->sub_conn + [SCTP_PRIMARY_PATH_IDX].c_fib_index, b0, + my_thread_index, is_ip4); + + if (PREDICT_FALSE (child_conn->state != SCTP_STATE_CLOSED)) + { + SCTP_DBG + ("conn_index = %u: child_conn->state != SCTP_STATE_CLOSED.... STATE=%s", + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX]. + connection.c_index, + sctp_state_to_string (child_conn->state)); + error0 = SCTP_ERROR_CREATE_EXISTS; + goto drop; + } + + /* Create child session and send SYN-ACK */ + child_conn = sctp_connection_new (my_thread_index); + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = + SCTP_PRIMARY_PATH_IDX; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_lcl_port = + sctp_hdr->dst_port; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_rmt_port = + sctp_hdr->src_port; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_is_ip4 = is_ip4; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto = + sctp_listener->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.proto; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU = + sctp_listener->sub_conn[SCTP_PRIMARY_PATH_IDX].PMTU; + child_conn->state = SCTP_STATE_CLOSED; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].connection.fib_index = + sctp_listener->sub_conn[SCTP_PRIMARY_PATH_IDX]. + connection.fib_index; + + if (is_ip4) + { + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_lcl_ip4.as_u32 = + ip4_hdr->dst_address.as_u32; + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].c_rmt_ip4.as_u32 = + ip4_hdr->src_address.as_u32; + } + else + { + clib_memcpy_fast (&child_conn-> + sub_conn[SCTP_PRIMARY_PATH_IDX].c_lcl_ip6, + &ip6_hdr->dst_address, + sizeof (ip6_address_t)); + clib_memcpy_fast (&child_conn-> + sub_conn[SCTP_PRIMARY_PATH_IDX].c_rmt_ip6, + &ip6_hdr->src_address, + sizeof (ip6_address_t)); + } + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = &full_hdr->common_hdr; + + u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); + if (chunk_type != INIT && chunk_type != DATA + && chunk_type != OPERATION_ERROR) + { + SCTP_DBG + ("conn_index = %u: chunk_type != INIT... chunk_type=%s", + child_conn->sub_conn[SCTP_PRIMARY_PATH_IDX]. + connection.c_index, sctp_chunk_to_string (chunk_type)); + + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = sctp_next_drop (is_ip4); + goto drop; + } + + u16 sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + switch (chunk_type) + { + case INIT: + sctp_connection_timers_init (child_conn); + + sctp_init_snd_vars (child_conn); + + sctp_init_cwnd (child_conn); + + error0 = + sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0, + sctp_implied_length); + + if (error0 == SCTP_ERROR_NONE) + { + if (session_stream_accept + (&child_conn-> + sub_conn[SCTP_PRIMARY_PATH_IDX].connection, + sctp_listener-> + sub_conn[SCTP_PRIMARY_PATH_IDX].c_s_index, + sctp_listener-> + sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index, 0)) + { + clib_warning ("session accept fail"); + sctp_connection_cleanup (child_conn); + error0 = SCTP_ERROR_CREATE_SESSION_FAIL; + goto drop; + } + next0 = sctp_next_output (is_ip4); + } + break; + + /* Reception of a DATA chunk whilst in the CLOSED state is called + * "Out of the Blue" packet and handling of the chunk needs special treatment + * as per RFC4960 section 8.4 + */ + case DATA: + break; + + case OPERATION_ERROR: + error0 = + sctp_handle_operation_err (sctp_hdr, child_conn, + SCTP_PRIMARY_PATH_IDX, b0, &next0); + break; + } + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + clib_memcpy_fast (&t0->sctp_header, sctp_hdr, + sizeof (t0->sctp_header)); + clib_memcpy_fast (&t0->sctp_connection, sctp_listener, + sizeof (t0->sctp_connection)); + } + + b0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + } + return from_frame->n_vectors; +} + +VLIB_NODE_FN (sctp4_listen_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_listen_process_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_listen_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_listen_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +always_inline uword +sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + sctp_main_t *sm = vnet_get_sctp_main (); + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index, errors = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn; + u16 error0 = SCTP_ERROR_ENQUEUED, next0 = + SCTP_ESTABLISHED_PHASE_N_NEXT; + u8 idx; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_conn = + sctp_connection_get (sctp_buffer_opaque (b0)-> + sctp.connection_index, my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto done; + } + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + idx = sctp_sub_conn_id_via_ip4h (sctp_conn, ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + idx = sctp_sub_conn_id_via_ip6h (sctp_conn, ip6_hdr); + } + + sctp_conn->sub_conn[idx].subconn_idx = idx; + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = + (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr); + + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + + switch (chunk_type) + { + case COOKIE_ECHO: + error0 = + sctp_handle_cookie_echo (sctp_hdr, sctp_chunk_hdr, sctp_conn, + idx, b0, &next0); + break; + + case COOKIE_ACK: + error0 = + sctp_handle_cookie_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + idx, b0, &next0); + break; + + case SACK: + error0 = + sctp_handle_sack ((sctp_selective_ack_chunk_t *) sctp_hdr, + sctp_conn, idx, b0, &next0); + break; + + case HEARTBEAT: + error0 = + sctp_handle_heartbeat ((sctp_hb_req_chunk_t *) sctp_hdr, + sctp_conn, idx, b0, &next0); + break; + + case HEARTBEAT_ACK: + error0 = + sctp_handle_heartbeat_ack ((sctp_hb_ack_chunk_t *) sctp_hdr, + sctp_conn, idx, b0, &next0); + break; + + case DATA: + error0 = + sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr, + sctp_conn, idx, b0, &next0); + break; + + case OPERATION_ERROR: + error0 = + sctp_handle_operation_err (sctp_hdr, sctp_conn, idx, b0, + &next0); + break; + + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = sctp_next_drop (is_ip4); + goto done; + } + + done: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_main_flush_enqueue_events (TRANSPORT_PROTO_SCTP, + my_thread_index); + + sctp_node_inc_counter (vm, is_ip4, sm->sctp4_established_phase_node_index, + sm->sctp6_established_phase_node_index, + SCTP_ERROR_EVENT_FIFO_FULL, errors); + sctp_flush_frame_to_output (vm, my_thread_index, is_ip4); + + return from_frame->n_vectors; +} + +VLIB_NODE_FN (sctp4_established_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_established_phase_inline (vm, node, from_frame, + 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_established_phase_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_established_phase_inline (vm, node, from_frame, + 0 /* is_ip4 */ ); +} + +static u8 * +format_sctp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "%U\n%U%U", + format_sctp_header, &t->sctp_header, 128, + format_white_space, indent, + format_sctp_connection, &t->sctp_connection, 1); + + return s; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_listen_phase_node) = +{ + .name = "sctp4-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_listen_phase_node) = +{ + .name = "sctp6-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_established_phase_node) = +{ + .name = "sctp4-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_ESTABLISHED_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_ESTABLISHED_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_established_phase_node) = +{ + .name = "sctp6-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +/* + * This is the function executed first for the SCTP graph. + * It takes care of doing the initial message parsing and + * dispatch to the specialized function. + */ +always_inline uword +sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + u8 result; + sctp_main_t *tm = vnet_get_sctp_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + sctp_set_time_now (my_thread_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + int n_advance_bytes0, n_data_bytes0; + u32 bi0, fib_index0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + sctp_connection_t *sctp_conn; + transport_connection_t *trans_conn; + ip4_header_t *ip4_hdr; + ip6_header_t *ip6_hdr; + u32 error0 = SCTP_ERROR_NO_LISTENER, next0 = SCTP_INPUT_NEXT_DROP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_buffer_opaque (b0)->sctp.flags = 0; + fib_index0 = vnet_buffer (b0)->ip.fib_index; + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + n_advance_bytes0 = + (ip4_header_bytes (ip4_hdr) + + sizeof (sctp_payload_data_chunk_t)); + n_data_bytes0 = + clib_net_to_host_u16 (ip4_hdr->length) - n_advance_bytes0; + + trans_conn = session_lookup_connection_wt4 (fib_index0, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &result); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + n_advance_bytes0 = sctp_header_bytes (); + n_data_bytes0 = + clib_net_to_host_u16 (ip6_hdr->payload_length) - + n_advance_bytes0; + n_advance_bytes0 += sizeof (ip6_hdr[0]); + + trans_conn = session_lookup_connection_wt6 (fib_index0, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &result); + } + + /* Length check */ + if (PREDICT_FALSE (n_advance_bytes0 < 0)) + { + error0 = SCTP_ERROR_LENGTH; + goto done; + } + + sctp_conn = sctp_get_connection_from_transport (trans_conn); + vnet_sctp_common_hdr_params_net_to_host (sctp_chunk_hdr); + + u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); + if (chunk_type >= UNKNOWN) + { + clib_warning + ("Received an unrecognized chunk; sending back OPERATION_ERROR chunk"); + + sctp_prepare_operation_error (sctp_conn, SCTP_PRIMARY_PATH_IDX, + b0, UNRECOGNIZED_CHUNK_TYPE); + + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = sctp_next_output (is_ip4); + goto done; + } + + sctp_buffer_opaque (b0)->sctp.hdr_offset = + (u8 *) sctp_hdr - (u8 *) vlib_buffer_get_current (b0); + + /* Session exists */ + if (PREDICT_TRUE (0 != sctp_conn)) + { + /* Save connection index */ + sctp_buffer_opaque (b0)->sctp.connection_index + = trans_conn->c_index; + sctp_buffer_opaque (b0)->sctp.data_offset = n_advance_bytes0; + sctp_buffer_opaque (b0)->sctp.data_len = n_data_bytes0; + + next0 = tm->dispatch_table[sctp_conn->state][chunk_type].next; + error0 = tm->dispatch_table[sctp_conn->state][chunk_type].error; + + SCTP_DBG_STATE_MACHINE + ("S_INDEX = %u, C_INDEX = %u, TRANS_CONN = %p, SCTP_CONN = %p, CURRENT_CONNECTION_STATE = %s," + "CHUNK_TYPE_RECEIVED = %s " "NEXT_PHASE = %s", + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX]. + connection.s_index, + sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX]. + connection.c_index, trans_conn, sctp_conn, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type), phase_to_string (next0)); + + if (chunk_type == DATA) + SCTP_ADV_DBG ("n_advance_bytes0 = %u, n_data_bytes0 = %u", + n_advance_bytes0, n_data_bytes0); + + } + else + { + if (result) + { + next0 = SCTP_INPUT_NEXT_DROP; + error0 = SCTP_ERROR_NONE + result; + } + else if ((is_ip4 && tm->punt_unknown4) || + (!is_ip4 && tm->punt_unknown6)) + { + next0 = SCTP_INPUT_NEXT_PUNT_PHASE; + error0 = SCTP_ERROR_PUNT; + } + else + { + next0 = SCTP_INPUT_NEXT_DROP; + error0 = SCTP_ERROR_NO_LISTENER; + } + SCTP_DBG_STATE_MACHINE ("sctp_conn == NULL, NEXT_PHASE = %s", + phase_to_string (next0)); + sctp_conn = 0; + } + + done: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +VLIB_NODE_FN (sctp4_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_input_dispatcher (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_input_node) (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_input_dispatcher (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_input_node) = +{ + .name = "sctp4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_INPUT_NEXT_##s] = n, + foreach_sctp4_input_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_rx_trace, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_input_node) = +{ + .name = "sctp6-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_INPUT_NEXT_##s] = n, + foreach_sctp6_input_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_rx_trace, +}; +/* *INDENT-ON* */ + +#ifndef CLIB_MARCH_VARIANT +static void +sctp_dispatch_table_init (sctp_main_t * tm) +{ + int i, j; + for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++) + for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++) + { + tm->dispatch_table[i][j].next = SCTP_INPUT_NEXT_DROP; + tm->dispatch_table[i][j].error = SCTP_ERROR_DISPATCH; + } + +#define _(t,f,n,e) \ +do { \ + tm->dispatch_table[SCTP_STATE_##t][f].next = (n); \ + tm->dispatch_table[SCTP_STATE_##t][f].error = (e); \ +} while (0) + + /* + * SCTP STATE-MACHINE states: + * + * _(CLOSED, "CLOSED") \ + * _(COOKIE_WAIT, "COOKIE_WAIT") \ + * _(COOKIE_ECHOED, "COOKIE_ECHOED") \ + * _(ESTABLISHED, "ESTABLISHED") \ + * _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \ + * _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \ + * _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ + * _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") + */ + //_(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED DATA chunk which requires special handling */ + _(CLOSED, INIT, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); + _(CLOSED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(CLOSED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(CLOSED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(CLOSED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(CLOSED, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(CLOSED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(CLOSED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(CLOSED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(CLOSED, COOKIE_ECHO, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(CLOSED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(CLOSED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(CLOSED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(CLOSED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(CLOSED, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); + + _(COOKIE_WAIT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE); /* UNEXPECTED DATA chunk which requires special handling */ + _(COOKIE_WAIT, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED INIT chunk which requires special handling */ + _(COOKIE_WAIT, INIT_ACK, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(COOKIE_WAIT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(COOKIE_WAIT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(COOKIE_WAIT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(COOKIE_WAIT, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(COOKIE_WAIT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(COOKIE_WAIT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(COOKIE_WAIT, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(COOKIE_WAIT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(COOKIE_WAIT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(COOKIE_WAIT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(COOKIE_WAIT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(COOKIE_WAIT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(COOKIE_WAIT, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(COOKIE_ECHOED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE); + _(COOKIE_ECHOED, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED INIT chunk which requires special handling */ + _(COOKIE_ECHOED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(COOKIE_ECHOED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(COOKIE_ECHOED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(COOKIE_ECHOED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(COOKIE_ECHOED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(COOKIE_ECHOED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(COOKIE_ECHOED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(COOKIE_ECHOED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(COOKIE_ECHOED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(COOKIE_ECHOED, COOKIE_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(COOKIE_ECHOED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(COOKIE_ECHOED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(COOKIE_ECHOED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(COOKIE_ECHOED, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(ESTABLISHED, DATA, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(ESTABLISHED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(ESTABLISHED, SACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, HEARTBEAT, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(ESTABLISHED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(ESTABLISHED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(ESTABLISHED, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(ESTABLISHED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(ESTABLISHED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(ESTABLISHED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(ESTABLISHED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(ESTABLISHED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(ESTABLISHED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(ESTABLISHED, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(SHUTDOWN_PENDING, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_PENDING, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_PENDING, SACK, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, HEARTBEAT, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, HEARTBEAT_ACK, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_PENDING, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(SHUTDOWN_PENDING, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(SHUTDOWN_PENDING, COOKIE_ECHO, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_PENDING, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_PENDING, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_PENDING, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(SHUTDOWN_PENDING, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(SHUTDOWN_SENT, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(SHUTDOWN_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_SENT, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(SHUTDOWN_SENT, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(SHUTDOWN_RECEIVED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION); /* UNEXPECTED DATA chunk */ + _(SHUTDOWN_RECEIVED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_RECEIVED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_RECEIVED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_RECEIVED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_RECEIVED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_RECEIVED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_RECEIVED, COOKIE_ECHO, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_RECEIVED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_RECEIVED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_RECEIVED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + _(SHUTDOWN_RECEIVED, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + _(SHUTDOWN_ACK_SENT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION); /* UNEXPECTED DATA chunk */ + _(SHUTDOWN_ACK_SENT, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_ACK_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_ACK_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_ACK_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_ACK_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_ACK_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(SHUTDOWN_ACK_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_ACK_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_ACK_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_ACK_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_ACK_SENT, OPERATION_ERROR, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + + /* TODO: Handle COOKIE ECHO when a TCB Exists */ + +#undef _ +} + +clib_error_t * +sctp_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + sctp_main_t *tm = vnet_get_sctp_main (); + + if ((error = vlib_call_init_function (vm, sctp_init))) + return error; + + /* Initialize dispatch table. */ + sctp_dispatch_table_init (tm); + + return error; +} + +VLIB_INIT_FUNCTION (sctp_input_init); +#endif /* CLIB_MARCH_VARIANT */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_msg_enum.h b/src/plugins/sctp/sctp_msg_enum.h new file mode 100644 index 00000000000..cbf84c659f6 --- /dev/null +++ b/src/plugins/sctp/sctp_msg_enum.h @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019 Cisco and/or its affiliates. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_sctp_msg_enum_h +#define included_sctp_msg_enum_h + +#include <vppinfra/byte_order.h> + +#define vl_msg_id(n,h) n, +typedef enum { +#include <sctp/sctp_all_api_h.h> + /* We'll want to know how many messages IDs we need... */ + VL_MSG_FIRST_AVAILABLE, +} vl_msg_id_t; +#undef vl_msg_id + +#endif /* included_http_static_msg_enum_h */ diff --git a/src/plugins/sctp/sctp_output.c b/src/plugins/sctp/sctp_output.c new file mode 100644 index 00000000000..955010a34b3 --- /dev/null +++ b/src/plugins/sctp/sctp_output.c @@ -0,0 +1,1568 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sctp/sctp.h> +#include <sctp/sctp_debug.h> +#include <vppinfra/random.h> +#include <openssl/hmac.h> + +/** + * Flush tx frame populated by retransmits and timer pops + */ +void +sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4) +{ + if (sctp_main.tx_frames[!is_ip4][thread_index]) + { + u32 next_index; + next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index; + vlib_put_frame_to_node (vm, next_index, + sctp_main.tx_frames[!is_ip4][thread_index]); + sctp_main.tx_frames[!is_ip4][thread_index] = 0; + } +} + +/** + * Flush ip lookup tx frames populated by timer pops + */ +always_inline void +sctp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4) +{ + if (sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index]) + { + u32 next_index; + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + vlib_put_frame_to_node (vm, next_index, + sctp_main.ip_lookup_tx_frames[!is_ip4] + [thread_index]); + sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0; + } +} + +/** + * Flush v4 and v6 sctp and ip-lookup tx frames for thread index + */ +void +sctp_flush_frames_to_output (u8 thread_index) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_flush_frame_to_output (vm, thread_index, 1); + sctp_flush_frame_to_output (vm, thread_index, 0); + sctp_flush_frame_to_ip_lookup (vm, thread_index, 1); + sctp_flush_frame_to_ip_lookup (vm, thread_index, 0); +} + +u32 +ip4_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0) +{ + ip_csum_t checksum; + u32 ip_header_length, payload_length_host_byte_order; + u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer; + void *data_this_buffer; + + /* Initialize checksum with ip header. */ + ip_header_length = ip4_header_bytes (ip0); + payload_length_host_byte_order = + clib_net_to_host_u16 (ip0->length) - ip_header_length; + checksum = + clib_host_to_net_u32 (payload_length_host_byte_order + + (ip0->protocol << 16)); + + if (BITS (uword) == 32) + { + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->src_address, u32)); + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->src_address, u64)); + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + data_this_buffer = (void *) ip0 + ip_header_length; + n_ip_bytes_this_buffer = + p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); + if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer) + { + n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ? + n_ip_bytes_this_buffer - ip_header_length : 0; + } + while (1) + { + checksum = + ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + return checksum; +} + +u32 +ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip6_header_t * ip0, int *bogus_lengthp) +{ + ip_csum_t checksum; + u16 payload_length_host_byte_order; + u32 i, n_this_buffer, n_bytes_left; + u32 headers_size = sizeof (ip0[0]); + void *data_this_buffer; + + ASSERT (bogus_lengthp); + *bogus_lengthp = 0; + + /* Initialize checksum with ip header. */ + checksum = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); + data_this_buffer = (void *) (ip0 + 1); + + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) + { + checksum = ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0-> + src_address.as_uword + [i], uword)); + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->dst_address.as_uword[i], + uword)); + } + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) + * or UDP-Ping packets */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + u32 skip_bytes; + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) data_this_buffer; + + /* validate really icmp6 next */ + ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_SCTP)); + + skip_bytes = 8 * (1 + ext_hdr->n_data_u64s); + data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes); + + payload_length_host_byte_order -= skip_bytes; + headers_size += skip_bytes; + } + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + if (p0 && n_this_buffer + headers_size > p0->current_length) + n_this_buffer = + p0->current_length > + headers_size ? p0->current_length - headers_size : 0; + while (1) + { + checksum = + ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + *bogus_lengthp = 1; + return 0xfefe; + } + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + return checksum; +} + +void +sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * sctp_sub_conn, + vlib_buffer_t * b) +{ + sctp_header_t *th = vlib_buffer_get_current (b); + vlib_main_t *vm = vlib_get_main (); + if (sctp_sub_conn->c_is_ip4) + { + ip4_header_t *ih; + ih = vlib_buffer_push_ip4 (vm, b, &sctp_sub_conn->c_lcl_ip4, + &sctp_sub_conn->c_rmt_ip4, IP_PROTOCOL_SCTP, + 1); + th->checksum = ip4_sctp_compute_checksum (vm, b, ih); + } + else + { + ip6_header_t *ih; + int bogus = ~0; + + ih = vlib_buffer_push_ip6 (vm, b, &sctp_sub_conn->c_lcl_ip6, + &sctp_sub_conn->c_rmt_ip6, IP_PROTOCOL_SCTP); + th->checksum = ip6_sctp_compute_checksum (vm, b, ih, &bogus); + ASSERT (!bogus); + } +} + +always_inline void * +sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + vlib_buffer_free_one (vm, b->next_buffer); + /* Zero all flags but free list index and trace flag */ + b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1; + b->current_data = 0; + b->current_length = 0; + b->total_length_not_including_first_buffer = 0; + sctp_buffer_opaque (b)->sctp.flags = 0; + sctp_buffer_opaque (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; + + /* Leave enough space for headers */ + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); +} + +always_inline void * +sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->total_length_not_including_first_buffer = 0; + sctp_buffer_opaque (b)->sctp.flags = 0; + sctp_buffer_opaque (b)->sctp.subconn_idx = MAX_SCTP_CONNECTIONS; + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + /* Leave enough space for headers */ + return vlib_buffer_make_headroom (b, TRANSPORT_MAX_HDRS_LEN); +} + +always_inline int +sctp_alloc_tx_buffers (sctp_main_t * tm, u8 thread_index, u32 n_free_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 current_length = vec_len (tm->tx_buffers[thread_index]); + u32 n_allocated; + + vec_validate (tm->tx_buffers[thread_index], + current_length + n_free_buffers - 1); + n_allocated = + vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length], + n_free_buffers); + _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated; + /* buffer shortage, report failure */ + if (vec_len (tm->tx_buffers[thread_index]) == 0) + { + clib_warning ("out of buffers"); + return -1; + } + return 0; +} + +always_inline int +sctp_get_free_buffer_index (sctp_main_t * tm, u32 * bidx) +{ + u32 *my_tx_buffers; + u32 thread_index = vlib_get_thread_index (); + if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0)) + { + if (sctp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE)) + return -1; + } + my_tx_buffers = tm->tx_buffers[thread_index]; + *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1]; + _vec_len (my_tx_buffers) -= 1; + return 0; +} + +always_inline void +sctp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4, u8 flush) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + u32 thread_index = vlib_get_thread_index (); + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->error = 0; + + /* Decide where to send the packet */ + next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index; + sctp_trajectory_add_start (b, 2); + + /* Get frame to v4/6 output node */ + f = tm->tx_frames[!is_ip4][thread_index]; + if (!f) + { + f = vlib_get_frame_to_node (vm, next_index); + ASSERT (f); + tm->tx_frames[!is_ip4][thread_index] = f; + } + to_next = vlib_frame_vector_args (f); + to_next[f->n_vectors] = bi; + f->n_vectors += 1; + if (flush || f->n_vectors == VLIB_FRAME_SIZE) + { + vlib_put_frame_to_node (vm, next_index, f); + tm->tx_frames[!is_ip4][thread_index] = 0; + } +} + +always_inline void +sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + sctp_enqueue_to_output_i (vm, b, bi, is_ip4, 1); +} + +always_inline void +sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4, u32 fib_index, u8 flush) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + u32 thread_index = vlib_get_thread_index (); + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->error = 0; + + vnet_buffer (b)->sw_if_index[VLIB_TX] = fib_index; + vnet_buffer (b)->sw_if_index[VLIB_RX] = 0; + + /* Send to IP lookup */ + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) + { + b->pre_data[0] = 2; + b->pre_data[1] = next_index; + } + + f = tm->ip_lookup_tx_frames[!is_ip4][thread_index]; + if (!f) + { + f = vlib_get_frame_to_node (vm, next_index); + ASSERT (f); + tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f; + } + + to_next = vlib_frame_vector_args (f); + to_next[f->n_vectors] = bi; + f->n_vectors += 1; + if (flush || f->n_vectors == VLIB_FRAME_SIZE) + { + vlib_put_frame_to_node (vm, next_index, f); + tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0; + } +} + +always_inline void +sctp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4, u32 fib_index) +{ + sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, fib_index, 0); + if (vm->thread_index == 0 && vlib_num_workers ()) + session_flush_frames_main_thread (vm); +} + +/** + * Convert buffer to INIT + */ +void +sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u32 random_seed = random_default_seed (); + u16 alloc_bytes = sizeof (sctp_init_chunk_t); + sctp_sub_connection_t *sub_conn = &sctp_conn->sub_conn[idx]; + + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + + if (sub_conn->c_is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_chunk_t *init_chunk = vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (init_chunk); + if (sub_conn->c_is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_chunk + pointer_offset; + ip4_param->address.as_u32 = sub_conn->c_lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_chunk + pointer_offset; + ip6_param->address.as_u64[0] = sub_conn->c_lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = sub_conn->c_lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + init_chunk->sctp_hdr.src_port = sub_conn->c_lcl_port; /* No need of host_to_net conversion, already in net-byte order */ + init_chunk->sctp_hdr.dst_port = sub_conn->c_rmt_port; /* No need of host_to_net conversion, already in net-byte order */ + init_chunk->sctp_hdr.checksum = 0; + /* The sender of an INIT must set the VERIFICATION_TAG to 0 as per RFC 4960 Section 8.5.1 */ + init_chunk->sctp_hdr.verification_tag = 0x0; + + vnet_sctp_set_chunk_type (&init_chunk->chunk_hdr, INIT); + vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len); + vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr); + + sctp_init_cwnd (sctp_conn); + + init_chunk->a_rwnd = clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); + init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); + init_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + init_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("sctp_conn->local_initial_tsn = %u", + sctp_conn->local_initial_tsn); + + sctp_conn->local_tag = init_chunk->initiate_tag; + + sctp_buffer_opaque (b)->sctp.connection_index = sub_conn->c_c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; + + SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + "CHUNK_TYPE = %s, " + "SRC_PORT = %u, DST_PORT = %u", + sub_conn->connection.c_index, + sctp_conn->state, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (INIT), + init_chunk->sctp_hdr.src_port, + init_chunk->sctp_hdr.dst_port); +} + +void +sctp_compute_mac (sctp_connection_t * sctp_conn, + sctp_state_cookie_param_t * state_cookie) +{ +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + HMAC_CTX *ctx; +#else + HMAC_CTX ctx; +#endif + unsigned int len = 0; + const EVP_MD *md = EVP_sha1 (); +#if OPENSSL_VERSION_NUMBER >= 0x10100000L + ctx = HMAC_CTX_new (); + HMAC_Init_ex (ctx, &state_cookie->creation_time, + sizeof (state_cookie->creation_time), md, NULL); + HMAC_Update (ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn)); + HMAC_Final (ctx, state_cookie->mac, &len); +#else + HMAC_CTX_init (&ctx); + HMAC_Init_ex (&ctx, &state_cookie->creation_time, + sizeof (state_cookie->creation_time), md, NULL); + HMAC_Update (&ctx, (const unsigned char *) &sctp_conn, sizeof (sctp_conn)); + HMAC_Final (&ctx, state_cookie->mac, &len); + HMAC_CTX_cleanup (&ctx); +#endif + + ENDIANESS_SWAP (state_cookie->mac); +} + +void +sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + u16 alloc_bytes = sizeof (sctp_cookie_ack_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_cookie_ack_chunk_t *cookie_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + cookie_ack_chunk->sctp_hdr.checksum = 0; + cookie_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + cookie_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK); + vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +void +sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 reuse_buffer) +{ + vlib_main_t *vm = vlib_get_main (); + + if (reuse_buffer) + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = sizeof (sctp_cookie_echo_chunk_t); + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + sctp_cookie_echo_chunk_t *cookie_echo_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + cookie_echo_chunk->sctp_hdr.checksum = 0; + cookie_echo_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + cookie_echo_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + cookie_echo_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO); + vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len); + clib_memcpy_fast (&(cookie_echo_chunk->cookie), &sctp_conn->cookie_param, + sizeof (sctp_state_cookie_param_t)); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + + +/* + * Send COOKIE_ECHO + */ +void +sctp_send_cookie_echo (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS)) + { + clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection."); + + session_stream_connect_notify (&sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection, 1); + + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); + } + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + u8 idx = SCTP_PRIMARY_PATH_IDX; + + sctp_init_buffer (vm, b); + sctp_prepare_cookie_echo_chunk (sctp_conn, idx, b, 0); + sctp_enqueue_to_output_now (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4); + + /* Start the T1_INIT timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT, + sctp_conn->sub_conn[idx].RTO); + + /* Change state to COOKIE_WAIT */ + sctp_conn->state = SCTP_STATE_COOKIE_WAIT; + + /* Measure RTT with this */ + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); +} + + +/** + * Convert buffer to ERROR + */ +void +sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 err_cause) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_operation_error_t */ + u16 alloc_bytes = + sizeof (sctp_operation_error_t) + sizeof (sctp_err_cause_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_operation_error_t *err_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + /* src_port & dst_port are already in network byte-order */ + err_chunk->sctp_hdr.checksum = 0; + err_chunk->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + err_chunk->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */ + err_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag; + + err_chunk->err_causes[0].param_hdr.length = + clib_host_to_net_u16 (sizeof (err_chunk->err_causes[0].param_hdr.type) + + sizeof (err_chunk->err_causes[0].param_hdr.length)); + err_chunk->err_causes[0].param_hdr.type = clib_host_to_net_u16 (err_cause); + + vnet_sctp_set_chunk_type (&err_chunk->chunk_hdr, OPERATION_ERROR); + vnet_sctp_set_chunk_length (&err_chunk->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to ABORT + */ +void +sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_abort_chunk_t */ + u16 alloc_bytes = sizeof (sctp_abort_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_abort_chunk_t *abort_chunk = vlib_buffer_push_uninit (b, alloc_bytes); + + /* src_port & dst_port are already in network byte-order */ + abort_chunk->sctp_hdr.checksum = 0; + abort_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + abort_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* As per RFC4960 Section 5.2.2: copy the INITIATE_TAG into the VERIFICATION_TAG of the ABORT chunk */ + abort_chunk->sctp_hdr.verification_tag = sctp_conn->local_tag; + + vnet_sctp_set_chunk_type (&abort_chunk->chunk_hdr, ABORT); + vnet_sctp_set_chunk_length (&abort_chunk->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to INIT-ACK + */ +void +sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn, + u8 idx, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = + sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_ack_chunk_t *init_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + /* Create State Cookie parameter */ + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset); + + state_cookie_param->param_hdr.type = + clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + state_cookie_param->param_hdr.length = + clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t)); + state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ()); + state_cookie_param->cookie_lifespan = + clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE); + + sctp_compute_mac (sctp_conn, state_cookie_param); + + pointer_offset += sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + sctp_ipv4_addr_param_t *ipv4_addr = + (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv4_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + ipv4_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH); + ipv4_addr->address.as_u32 = ip4_addr->as_u32; + + pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + sctp_ipv6_addr_param_t *ipv6_addr = + (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv6_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + ipv6_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH); + ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0]; + ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1]; + + pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + ip4_param->address.as_u32 = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + ip6_param->address.as_u64[0] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + /* src_port & dst_port are already in network byte-order */ + init_ack_chunk->sctp_hdr.checksum = 0; + init_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + init_ack_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u", + init_ack_chunk->initial_tsn); + + vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); + vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); + + init_ack_chunk->initiate_tag = sctp_conn->local_tag; + + init_ack_chunk->a_rwnd = + clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); + init_ack_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_ack_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to INIT-ACK + */ +void +sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + u8 add_ip4, ip6_address_t * ip6_addr, u8 add_ip6) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + u32 random_seed = random_default_seed (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = + sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t); + + if (PREDICT_FALSE (add_ip4 == 1)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_FALSE (add_ip6 == 1)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_ack_chunk_t *init_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + /* Create State Cookie parameter */ + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset); + + state_cookie_param->param_hdr.type = + clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + state_cookie_param->param_hdr.length = + clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t)); + state_cookie_param->creation_time = clib_host_to_net_u64 (sctp_time_now ()); + state_cookie_param->cookie_lifespan = + clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE); + + sctp_compute_mac (sctp_conn, state_cookie_param); + + pointer_offset += sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + sctp_ipv4_addr_param_t *ipv4_addr = + (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv4_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + ipv4_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH); + ipv4_addr->address.as_u32 = ip4_addr->as_u32; + + pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + sctp_ipv6_addr_param_t *ipv6_addr = + (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv6_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + ipv6_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH); + ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0]; + ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1]; + + pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (sctp_conn->sub_conn[idx].connection.is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + ip4_param->address.as_u32 = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + ip6_param->address.as_u64[0] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = + sctp_conn->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + /* src_port & dst_port are already in network byte-order */ + init_ack_chunk->sctp_hdr.checksum = 0; + init_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + /* the sctp_conn->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + init_ack_chunk->initial_tsn = + clib_host_to_net_u32 (sctp_conn->local_initial_tsn); + SCTP_CONN_TRACKING_DBG ("init_ack_chunk->initial_tsn = %u", + init_ack_chunk->initial_tsn); + + vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); + vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); + + init_ack_chunk->initiate_tag = + clib_host_to_net_u32 (random_u32 (&random_seed)); + + init_ack_chunk->a_rwnd = + clib_host_to_net_u32 (sctp_conn->sub_conn[idx].cwnd); + init_ack_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_ack_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + sctp_conn->local_tag = init_ack_chunk->initiate_tag; + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to SHUTDOWN + */ +void +sctp_prepare_shutdown_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_shutdown_association_chunk_t *shutdown_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_chunk->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + shutdown_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + vnet_sctp_set_chunk_type (&shutdown_chunk->chunk_hdr, SHUTDOWN); + vnet_sctp_set_chunk_length (&shutdown_chunk->chunk_hdr, chunk_len); + + shutdown_chunk->cumulative_tsn_ack = sctp_conn->last_rcvd_tsn; + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/* + * Send SHUTDOWN + */ +void +sctp_send_shutdown (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) + return; + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + u8 idx = SCTP_PRIMARY_PATH_IDX; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_shutdown_chunk (sctp_conn, idx, b); + + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); +} + +/** + * Convert buffer to SHUTDOWN_ACK + */ +void +sctp_prepare_shutdown_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + sctp_shutdown_ack_chunk_t *shutdown_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_ack_chunk->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_ack_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + shutdown_ack_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_ack_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + + vnet_sctp_set_chunk_type (&shutdown_ack_chunk->chunk_hdr, SHUTDOWN_ACK); + vnet_sctp_set_chunk_length (&shutdown_ack_chunk->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/* + * Send SHUTDOWN_ACK + */ +void +sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) + return; + + sctp_reuse_buffer (vm, b); + + sctp_prepare_shutdown_ack_chunk (sctp_conn, idx, b); +} + +/** + * Convert buffer to SACK + */ +void +sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + + sctp_reuse_buffer (vm, b); + + u16 alloc_bytes = sizeof (sctp_selective_ack_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_selective_ack_chunk_t *sack = vlib_buffer_push_uninit (b, alloc_bytes); + + sack->sctp_hdr.checksum = 0; + sack->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + sack->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + sack->sctp_hdr.verification_tag = sctp_conn->remote_tag; + vnet_sctp_set_chunk_type (&sack->chunk_hdr, SACK); + vnet_sctp_set_chunk_length (&sack->chunk_hdr, chunk_len); + + sack->cumulative_tsn_ack = sctp_conn->next_tsn_expected; + + sctp_conn->ack_state = 0; + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to HEARTBEAT_ACK + */ +void +sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + + u16 alloc_bytes = sizeof (sctp_hb_ack_chunk_t); + + sctp_reuse_buffer (vm, b); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_hb_ack_chunk_t *hb_ack = vlib_buffer_push_uninit (b, alloc_bytes); + + hb_ack->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + hb_ack->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + hb_ack->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + hb_ack->sctp_hdr.verification_tag = sctp_conn->remote_tag; + hb_ack->hb_info.param_hdr.type = clib_host_to_net_u16 (1); + hb_ack->hb_info.param_hdr.length = + clib_host_to_net_u16 (sizeof (hb_ack->hb_info.hb_info)); + + vnet_sctp_set_chunk_type (&hb_ack->chunk_hdr, HEARTBEAT_ACK); + vnet_sctp_set_chunk_length (&hb_ack->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +/** + * Convert buffer to HEARTBEAT + */ +void +sctp_prepare_heartbeat_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u16 alloc_bytes = sizeof (sctp_hb_req_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_hb_req_chunk_t *hb_req = vlib_buffer_push_uninit (b, alloc_bytes); + + hb_req->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + hb_req->sctp_hdr.src_port = sctp_conn->sub_conn[idx].connection.lcl_port; + hb_req->sctp_hdr.dst_port = sctp_conn->sub_conn[idx].connection.rmt_port; + hb_req->sctp_hdr.verification_tag = sctp_conn->remote_tag; + hb_req->hb_info.param_hdr.type = clib_host_to_net_u16 (1); + hb_req->hb_info.param_hdr.length = + clib_host_to_net_u16 (sizeof (hb_req->hb_info.hb_info)); + + vnet_sctp_set_chunk_type (&hb_req->chunk_hdr, HEARTBEAT); + vnet_sctp_set_chunk_length (&hb_req->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +void +sctp_send_heartbeat (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + u8 i; + u64 now = sctp_time_now (); + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (now > (sctp_conn->sub_conn[i].last_seen + SCTP_HB_INTERVAL)) + { + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_heartbeat_chunk (sctp_conn, i, b); + + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[i]. + connection.is_ip4); + + sctp_conn->sub_conn[i].unacknowledged_hb += 1; + } + } +} + +/** + * Convert buffer to SHUTDOWN_COMPLETE + */ +void +sctp_prepare_shutdown_complete_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b) +{ + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + sctp_shutdown_complete_chunk_t *shutdown_complete = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_complete->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_complete->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + shutdown_complete->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + shutdown_complete->sctp_hdr.verification_tag = sctp_conn->remote_tag; + + vnet_sctp_set_chunk_type (&shutdown_complete->chunk_hdr, SHUTDOWN_COMPLETE); + vnet_sctp_set_chunk_length (&shutdown_complete->chunk_hdr, chunk_len); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +void +sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0) +{ + vlib_main_t *vm = vlib_get_main (); + + if (sctp_check_outstanding_data_chunks (sctp_conn) > 0) + return; + + sctp_reuse_buffer (vm, b0); + + sctp_prepare_shutdown_complete_chunk (sctp_conn, idx, b0); +} + +/* + * Send INIT + */ +void +sctp_send_init (sctp_connection_t * sctp_conn) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_conn->init_retransmit_err > SCTP_MAX_INIT_RETRANS)) + { + clib_warning ("Reached MAX_INIT_RETRANS times. Aborting connection."); + + session_stream_connect_notify (&sctp_conn->sub_conn + [SCTP_PRIMARY_PATH_IDX].connection, 1); + + sctp_connection_timers_reset (sctp_conn); + + sctp_connection_cleanup (sctp_conn); + + return; + } + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + u8 idx = SCTP_PRIMARY_PATH_IDX; + + sctp_init_buffer (vm, b); + sctp_prepare_init_chunk (sctp_conn, idx, b); + + sctp_push_ip_hdr (tm, &sctp_conn->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, sctp_conn->sub_conn[idx].c_is_ip4, + sctp_conn->sub_conn[idx].c_fib_index); + + /* Start the T1_INIT timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T1_INIT, + sctp_conn->sub_conn[idx].RTO); + + /* Change state to COOKIE_WAIT */ + sctp_conn->state = SCTP_STATE_COOKIE_WAIT; + + /* Measure RTT with this */ + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); +} + +/** + * Push SCTP header and update connection variables + */ +static void +sctp_push_hdr_i (sctp_connection_t * sctp_conn, vlib_buffer_t * b, + sctp_state_t next_state) +{ + u16 data_len = b->current_length; + + if (b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID) + data_len += b->total_length_not_including_first_buffer; + + ASSERT (!b->total_length_not_including_first_buffer + || (b->flags & VLIB_BUFFER_NEXT_PRESENT) + || !(b->flags & VLIB_BUFFER_TOTAL_LENGTH_VALID)); + + SCTP_ADV_DBG_OUTPUT ("b->current_length = %u, " + "b->current_data = %p " + "data_len = %u", + b->current_length, b->current_data, data_len); + + u16 data_padding = vnet_sctp_calculate_padding (b->current_length); + if (data_padding > 0) + { + u8 *p_tail = vlib_buffer_put_uninit (b, data_padding); + clib_memset_u8 (p_tail, 0, data_padding); + } + + u16 bytes_to_add = sizeof (sctp_payload_data_chunk_t); + u16 chunk_length = data_len + bytes_to_add - sizeof (sctp_header_t); + + sctp_payload_data_chunk_t *data_chunk = + vlib_buffer_push_uninit (b, bytes_to_add); + + u8 idx = sctp_data_subconn_select (sctp_conn); + SCTP_DBG_OUTPUT + ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u", + sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->sub_conn[idx].connection.lcl_port, + sctp_conn->sub_conn[idx].connection.rmt_port); + data_chunk->sctp_hdr.checksum = 0; + data_chunk->sctp_hdr.src_port = + sctp_conn->sub_conn[idx].connection.lcl_port; + data_chunk->sctp_hdr.dst_port = + sctp_conn->sub_conn[idx].connection.rmt_port; + data_chunk->sctp_hdr.verification_tag = sctp_conn->remote_tag; + + data_chunk->tsn = clib_host_to_net_u32 (sctp_conn->next_tsn); + data_chunk->stream_id = clib_host_to_net_u16 (0); + data_chunk->stream_seq = clib_host_to_net_u16 (0); + + vnet_sctp_set_chunk_type (&data_chunk->chunk_hdr, DATA); + vnet_sctp_set_chunk_length (&data_chunk->chunk_hdr, chunk_length); + + vnet_sctp_set_bbit (&data_chunk->chunk_hdr); + vnet_sctp_set_ebit (&data_chunk->chunk_hdr); + + SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u", + b->data, b->current_data); + + if (sctp_conn->sub_conn[idx].state != SCTP_SUBCONN_AWAITING_SACK) + { + sctp_conn->sub_conn[idx].state = SCTP_SUBCONN_AWAITING_SACK; + sctp_conn->last_unacked_tsn = sctp_conn->next_tsn; + } + + sctp_conn->next_tsn += data_len; + + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + /* Section 7.2.2; point (3) */ + if (sctp_conn->sub_conn[idx].partially_acked_bytes >= + sctp_conn->sub_conn[idx].cwnd + && inflight >= sctp_conn->sub_conn[idx].cwnd) + { + sctp_conn->sub_conn[idx].cwnd += sctp_conn->sub_conn[idx].PMTU; + sctp_conn->sub_conn[idx].partially_acked_bytes -= + sctp_conn->sub_conn[idx].cwnd; + } + + sctp_conn->sub_conn[idx].last_data_ts = sctp_time_now (); + + sctp_buffer_opaque (b)->sctp.connection_index = + sctp_conn->sub_conn[idx].connection.c_index; + + sctp_buffer_opaque (b)->sctp.subconn_idx = idx; +} + +u32 +sctp_push_header (transport_connection_t * trans_conn, vlib_buffer_t * b) +{ + sctp_connection_t *sctp_conn = + sctp_get_connection_from_transport (trans_conn); + + SCTP_DBG_OUTPUT ("TRANS_CONN = %p, SCTP_CONN = %p, " + "S_INDEX = %u, C_INDEX = %u," + "trans_conn->LCL_PORT = %u, trans_conn->RMT_PORT = %u", + trans_conn, + sctp_conn, + trans_conn->s_index, + trans_conn->c_index, + trans_conn->lcl_port, trans_conn->rmt_port); + + sctp_push_hdr_i (sctp_conn, b, SCTP_STATE_ESTABLISHED); + + sctp_trajectory_add_start (b, 3); + + return 0; +} + +u32 +sctp_prepare_data_retransmit (sctp_connection_t * sctp_conn, + u8 idx, + u32 offset, + u32 max_deq_bytes, vlib_buffer_t ** b) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + int n_bytes = 0; + u32 bi, available_bytes, seg_size; + u8 *data; + + ASSERT (sctp_conn->state >= SCTP_STATE_ESTABLISHED); + ASSERT (max_deq_bytes != 0); + + /* + * Make sure we can retransmit something + */ + available_bytes = + transport_max_tx_dequeue (&sctp_conn->sub_conn[idx].connection); + ASSERT (available_bytes >= offset); + available_bytes -= offset; + if (!available_bytes) + return 0; + max_deq_bytes = clib_min (sctp_conn->sub_conn[idx].cwnd, max_deq_bytes); + max_deq_bytes = clib_min (available_bytes, max_deq_bytes); + + seg_size = max_deq_bytes; + + /* + * Allocate and fill in buffer(s) + */ + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return 0; + *b = vlib_get_buffer (vm, bi); + data = sctp_init_buffer (vm, *b); + + /* Easy case, buffer size greater than mss */ + if (PREDICT_TRUE (seg_size <= tm->bytes_per_buffer)) + { + n_bytes = + session_tx_fifo_peek_bytes (&sctp_conn->sub_conn[idx].connection, + data, offset, max_deq_bytes); + ASSERT (n_bytes == max_deq_bytes); + b[0]->current_length = n_bytes; + sctp_push_hdr_i (sctp_conn, *b, sctp_conn->state); + } + + return n_bytes; +} + +void +sctp_data_retransmit (sctp_connection_t * sctp_conn) +{ + vlib_main_t *vm = vlib_get_main (); + vlib_buffer_t *b = 0; + u32 bi, n_bytes = 0; + + u8 idx = sctp_data_subconn_select (sctp_conn); + + SCTP_DBG_OUTPUT + ("SCTP_CONN = %p, IDX = %u, S_INDEX = %u, C_INDEX = %u, sctp_conn->[...].LCL_PORT = %u, sctp_conn->[...].RMT_PORT = %u", + sctp_conn, idx, sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->sub_conn[idx].connection.lcl_port, + sctp_conn->sub_conn[idx].connection.rmt_port); + + if (sctp_conn->state >= SCTP_STATE_ESTABLISHED) + { + return; + } + + n_bytes = + sctp_prepare_data_retransmit (sctp_conn, idx, 0, + sctp_conn->sub_conn[idx].cwnd, &b); + if (n_bytes > 0) + SCTP_DBG_OUTPUT ("We have data (%u bytes) to retransmit", n_bytes); + + bi = vlib_get_buffer_index (vm, b); + + sctp_enqueue_to_output_now (vm, b, bi, + sctp_conn->sub_conn[idx].connection.is_ip4); + + return; +} + +#if SCTP_DEBUG_STATE_MACHINE +always_inline u8 +sctp_validate_output_state_machine (sctp_connection_t * sctp_conn, + u8 chunk_type) +{ + u8 result = 0; + switch (sctp_conn->state) + { + case SCTP_STATE_CLOSED: + if (chunk_type != INIT && chunk_type != INIT_ACK) + result = 1; + break; + case SCTP_STATE_ESTABLISHED: + if (chunk_type != DATA && chunk_type != HEARTBEAT && + chunk_type != HEARTBEAT_ACK && chunk_type != SACK && + chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN) + result = 1; + break; + case SCTP_STATE_COOKIE_WAIT: + if (chunk_type != COOKIE_ECHO) + result = 1; + break; + case SCTP_STATE_SHUTDOWN_SENT: + if (chunk_type != SHUTDOWN_COMPLETE) + result = 1; + break; + case SCTP_STATE_SHUTDOWN_RECEIVED: + if (chunk_type != SHUTDOWN_ACK) + result = 1; + break; + } + return result; +} +#endif + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_output_node.c b/src/plugins/sctp/sctp_output_node.c new file mode 100644 index 00000000000..7bf2e896acc --- /dev/null +++ b/src/plugins/sctp/sctp_output_node.c @@ -0,0 +1,397 @@ +/* + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <sctp/sctp.h> +#include <sctp/sctp_debug.h> +#include <vppinfra/random.h> +#include <openssl/hmac.h> + +u32 +ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip6_header_t * ip0, int *bogus_lengthp); + +u32 +ip4_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0); + +#define foreach_sctp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_sctp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static char *sctp_error_strings[] = { +#define sctp_error(n,s) s, +#include <sctp/sctp_error.def> +#undef sctp_error +}; + +typedef enum _sctp_output_next +{ + SCTP_OUTPUT_NEXT_DROP, + SCTP_OUTPUT_NEXT_IP_LOOKUP, + SCTP_OUTPUT_N_NEXT +} sctp_output_next_t; + +typedef struct +{ + sctp_header_t sctp_header; + sctp_connection_t sctp_connection; +} sctp_tx_trace_t; + +always_inline u8 +sctp_is_retransmitting (sctp_connection_t * sctp_conn, u8 idx) +{ + return sctp_conn->sub_conn[idx].is_retransmitting; +} + +always_inline uword +sctp46_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + sctp_set_time_now (my_thread_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_connection_t *sctp_conn; + sctp_tx_trace_t *t0; + sctp_header_t *th0 = 0; + u32 error0 = SCTP_ERROR_PKTS_SENT, next0 = + SCTP_OUTPUT_NEXT_IP_LOOKUP; + +#if SCTP_DEBUG_STATE_MACHINE + u16 packet_length = 0; +#endif + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + sctp_conn = + sctp_connection_get (sctp_buffer_opaque (b0)-> + sctp.connection_index, my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + error0 = SCTP_ERROR_INVALID_CONNECTION; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + + u8 idx = sctp_buffer_opaque (b0)->sctp.subconn_idx; + + th0 = vlib_buffer_get_current (b0); + + if (is_ip4) + { + ip4_header_t *iph4 = vlib_buffer_push_ip4 (vm, + b0, + &sctp_conn->sub_conn + [idx].connection. + lcl_ip.ip4, + &sctp_conn-> + sub_conn + [idx].connection. + rmt_ip.ip4, + IP_PROTOCOL_SCTP, 1); + + u32 checksum = ip4_sctp_compute_checksum (vm, b0, iph4); + + sctp_hdr = ip4_next_header (iph4); + sctp_hdr->checksum = checksum; + + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + +#if SCTP_DEBUG_STATE_MACHINE + packet_length = clib_net_to_host_u16 (iph4->length); +#endif + } + else + { + ip6_header_t *iph6 = vlib_buffer_push_ip6 (vm, + b0, + &sctp_conn->sub_conn + [idx]. + connection.lcl_ip. + ip6, + &sctp_conn->sub_conn + [idx]. + connection.rmt_ip. + ip6, + IP_PROTOCOL_SCTP); + + int bogus = ~0; + u32 checksum = ip6_sctp_compute_checksum (vm, b0, iph6, &bogus); + ASSERT (!bogus); + + sctp_hdr = ip6_next_header (iph6); + sctp_hdr->checksum = checksum; + + vnet_buffer (b0)->l3_hdr_offset = (u8 *) iph6 - b0->data; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + +#if SCTP_DEBUG_STATE_MACHINE + packet_length = clib_net_to_host_u16 (iph6->payload_length); +#endif + } + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + if (chunk_type >= UNKNOWN) + { + clib_warning + ("Trying to send an unrecognized chunk... something is really bad."); + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + +#if SCTP_DEBUG_STATE_MACHINE + u8 is_valid = + (sctp_conn->sub_conn[idx].connection.lcl_port == + sctp_hdr->src_port + || sctp_conn->sub_conn[idx].connection.lcl_port == + sctp_hdr->dst_port) + && (sctp_conn->sub_conn[idx].connection.rmt_port == + sctp_hdr->dst_port + || sctp_conn->sub_conn[idx].connection.rmt_port == + sctp_hdr->src_port); + + if (!is_valid) + { + SCTP_DBG_STATE_MACHINE ("BUFFER IS INCORRECT: conn_index = %u, " + "packet_length = %u, " + "chunk_type = %u [%s], " + "connection.lcl_port = %u, sctp_hdr->src_port = %u, " + "connection.rmt_port = %u, sctp_hdr->dst_port = %u", + sctp_conn->sub_conn[idx]. + connection.c_index, packet_length, + chunk_type, + sctp_chunk_to_string (chunk_type), + sctp_conn->sub_conn[idx]. + connection.lcl_port, sctp_hdr->src_port, + sctp_conn->sub_conn[idx]. + connection.rmt_port, + sctp_hdr->dst_port); + + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } +#endif + SCTP_DBG_STATE_MACHINE + ("SESSION_INDEX = %u, CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u", + sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_conn->state, sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port, + full_hdr->hdr.dst_port); + + /* Let's make sure the state-machine does not send anything crazy */ +#if SCTP_DEBUG_STATE_MACHINE + if (sctp_validate_output_state_machine (sctp_conn, chunk_type) != 0) + { + SCTP_DBG_STATE_MACHINE + ("Sending the wrong chunk (%s) based on state-machine status (%s)", + sctp_chunk_to_string (chunk_type), + sctp_state_to_string (sctp_conn->state)); + + error0 = SCTP_ERROR_UNKNOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + + } +#endif + + /* Karn's algorithm: RTT measurements MUST NOT be made using + * packets that were retransmitted + */ + if (!sctp_is_retransmitting (sctp_conn, idx)) + { + /* Measure RTT with this */ + if (chunk_type == DATA + && sctp_conn->sub_conn[idx].RTO_pending == 0) + { + sctp_conn->sub_conn[idx].RTO_pending = 1; + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); + } + else + sctp_conn->sub_conn[idx].rtt_ts = sctp_time_now (); + } + + /* Let's take care of TIMERS */ + switch (chunk_type) + { + case COOKIE_ECHO: + { + sctp_conn->state = SCTP_STATE_COOKIE_ECHOED; + break; + } + case DATA: + { + SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length); + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, + sctp_conn->sub_conn[idx].RTO); + break; + } + case SHUTDOWN: + { + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, + sctp_conn->sub_conn[idx].RTO); + sctp_conn->state = SCTP_STATE_SHUTDOWN_SENT; + break; + } + case SHUTDOWN_ACK: + { + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T2_SHUTDOWN, + sctp_conn->sub_conn[idx].RTO); + sctp_conn->state = SCTP_STATE_SHUTDOWN_ACK_SENT; + break; + } + case SHUTDOWN_COMPLETE: + { + sctp_conn->state = SCTP_STATE_CLOSED; + break; + } + } + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = + sctp_conn->sub_conn[idx].c_fib_index; + + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + + SCTP_DBG_STATE_MACHINE + ("SESSION_INDEX = %u, CONNECTION_INDEX = %u, " "NEW_STATE = %s, " + "CHUNK_SENT = %s", sctp_conn->sub_conn[idx].connection.s_index, + sctp_conn->sub_conn[idx].connection.c_index, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (chunk_type)); + + vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr); + + done: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + if (th0) + { + clib_memcpy_fast (&t0->sctp_header, th0, + sizeof (t0->sctp_header)); + } + else + { + clib_memset (&t0->sctp_header, 0, sizeof (t0->sctp_header)); + } + clib_memcpy_fast (&t0->sctp_connection, sctp_conn, + sizeof (t0->sctp_connection)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +VLIB_NODE_FN (sctp4_output_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +VLIB_NODE_FN (sctp6_output_node) (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_output_node) = +{ + .name = "sctp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n, + foreach_sctp4_output_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_tx_trace, +}; +/* *INDENT-ON* */ + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_output_node) = +{ + .name = "sctp6-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n, + foreach_sctp6_output_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_tx_trace, +}; +/* *INDENT-ON* */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_packet.h b/src/plugins/sctp/sctp_packet.h new file mode 100644 index 00000000000..04995aa7ee3 --- /dev/null +++ b/src/plugins/sctp/sctp_packet.h @@ -0,0 +1,1470 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sctp_packet_h +#define included_vnet_sctp_packet_h + +#include <stdbool.h> + +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +/* + * As per RFC 4960 + * https://tools.ietf.org/html/rfc4960 + */ + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Source Port Number | Destination Port Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Verification Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + /* + * This is the SCTP sender's port number. It can be used by the + * receiver in combination with the source IP address, the SCTP + * destination port, and possibly the destination IP address to + * identify the association to which this packet belongs. + * The port number 0 MUST NOT be used. + */ + u16 src_port; + + /* + * This is the SCTP port number to which this packet is destined. + * The receiving host will use this port number to de-multiplex the + * SCTP packet to the correct receiving endpoint/application. + * The port number 0 MUST NOT be used. + */ + u16 dst_port; + + /* + * The receiver of this packet uses the Verification Tag to validate + * the sender of this SCTP packet. On transmit, the value of this + * Verification Tag MUST be set to the value of the Initiate Tag + * received from the peer endpoint during the association + * initialization, with the following exceptions: + * - A packet containing an INIT chunk MUST have a zero Verification + * Tag. + * - A packet containing a SHUTDOWN COMPLETE chunk with the T bit + * set MUST have the Verification Tag copied from the packet with + * the SHUTDOWN ACK chunk. + * - A packet containing an ABORT chunk may have the verification tag + * copied from the packet that caused the ABORT to be sent. + * An INIT chunk MUST be the only chunk in the SCTP packet carrying it. + */ + u32 verification_tag; + + /* + * This field contains the checksum of this SCTP packet. + * SCTP uses the CRC32c algorithm. + */ + u32 checksum; + +} sctp_header_t; + +always_inline void +vnet_set_sctp_src_port (sctp_header_t * h, u16 src_port) +{ + h->src_port = clib_host_to_net_u16 (src_port); +} + +always_inline u16 +vnet_get_sctp_src_port (sctp_header_t * h) +{ + return (clib_net_to_host_u16 (h->src_port)); +} + +always_inline void +vnet_set_sctp_dst_port (sctp_header_t * h, u16 dst_port) +{ + h->dst_port = clib_host_to_net_u16 (dst_port); +} + +always_inline u16 +vnet_get_sctp_dst_port (sctp_header_t * h) +{ + return (clib_net_to_host_u16 (h->dst_port)); +} + +always_inline void +vnet_set_sctp_verification_tag (sctp_header_t * h, u32 verification_tag) +{ + h->verification_tag = clib_host_to_net_u32 (verification_tag); +} + +always_inline u32 +vnet_get_sctp_verification_tag (sctp_header_t * h) +{ + return (clib_net_to_host_u32 (h->verification_tag)); +} + +always_inline void +vnet_set_sctp_checksum (sctp_header_t * h, u32 checksum) +{ + h->checksum = clib_host_to_net_u32 (checksum); +} + +always_inline u32 +vnet_get_sctp_checksum (sctp_header_t * h) +{ + return (clib_net_to_host_u32 (h->checksum)); +} + +/* + * Multiple chunks can be bundled into one SCTP packet up to the MTU + * size, except for the INIT, INIT ACK, and SHUTDOWN COMPLETE chunks. + * These chunks MUST NOT be bundled with any other chunk in a packet. + * + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Common Header | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk #1 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk #n | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +typedef enum +{ + DATA = 0, + INIT, + INIT_ACK, + SACK, + HEARTBEAT, + HEARTBEAT_ACK, + ABORT, + SHUTDOWN, + SHUTDOWN_ACK, + OPERATION_ERROR, + COOKIE_ECHO, + COOKIE_ACK, + ECNE, + CWR, + SHUTDOWN_COMPLETE, + UNKNOWN +} sctp_chunk_type; + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk Type | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + /* + * This field identifies the type of information contained in the + * Chunk Value field. It takes a value from 0 to 254. + * The value of 255 is reserved for future use as an extension field. + * + * The values of Chunk Types are defined as follows: + * ID Value Chunk Type + * ----- ---------- + * 0 - Payload Data (DATA) + * 1 - Initiation (INIT) + * 2 - Initiation Acknowledgement (INIT ACK) + * 3 - Selective Acknowledgement (SACK) + * 4 - Heartbeat Request (HEARTBEAT) + * 5 - Heartbeat Acknowledgement (HEARTBEAT ACK) + * 6 - Abort (ABORT) + * 7 - Shutdown (SHUTDOWN) + * 8 - Shutdown Acknowledgement (SHUTDOWN ACK) + * 9 - Operation Error (ERROR) + * 10 - State Cookie (COOKIE ECHO) + * 11 - Cookie Acknowledgement (COOKIE ACK) + * 12 - Reserved for Explicit Congestion Notification Echo (ECNE) + * 13 - Reserved for Congestion Window Reduced (CWR) + * 14 - Shutdown Complete (SHUTDOWN COMPLETE) + * 15 to 62 - available + * 63 - reserved for IETF-defined Chunk Extensions + * 64 to 126 - available + * 127 - reserved for IETF-defined Chunk Extensions + * 128 to 190 - available + * 191 - reserved for IETF-defined Chunk Extensions + * 192 to 254 - available + * 255 - reserved for IETF-defined Chunk Extensions + * + * Chunk Types are encoded such that the highest-order 2 bits specify + * the action that must be taken if the processing endpoint does not + * recognize the Chunk Type. + * 00 - Stop processing this SCTP packet and discard it, do not + * process any further chunks within it. + * 01 - Stop processing this SCTP packet and discard it, do not + * process any further chunks within it, and report the + * unrecognized chunk in an 'Unrecognized Chunk Type'. + * 10 - Skip this chunk and continue processing. + * 11 - Skip this chunk and continue processing, but report in an + * ERROR chunk using the 'Unrecognized Chunk Type' cause of error. + * + * Note: The ECNE and CWR chunk types are reserved for future use of + * Explicit Congestion Notification (ECN); + */ + //u8 type; + + /* + * The usage of these bits depends on the Chunk type as given by the + * Chunk Type field. Unless otherwise specified, they are set to 0 on + * transmit and are ignored on receipt. + */ + //u8 flags; + + /* + * This value represents the size of the chunk in bytes, including + * the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields. + * Therefore, if the Chunk Value field is zero-length, the Length + * field will be set to 4. + * The Chunk Length field does not count any chunk padding. + * Chunks (including Type, Length, and Value fields) are padded out + * by the sender with all zero bytes to be a multiple of 4 bytes + * long. This padding MUST NOT be more than 3 bytes in total. The + * Chunk Length value does not include terminating padding of the + * chunk. However, it does include padding of any variable-length + * parameter except the last parameter in the chunk. The receiver + * MUST ignore the padding. + * + * Note: A robust implementation should accept the chunk whether or + * not the final padding has been included in the Chunk Length. + */ + //u16 length; + + u32 params; + +} sctp_chunks_common_hdr_t; + +typedef struct +{ + sctp_header_t hdr; + sctp_chunks_common_hdr_t common_hdr; + +} sctp_full_hdr_t; + +#define CHUNK_TYPE_MASK 0xFF000000 +#define CHUNK_TYPE_SHIFT 24 + +#define CHUNK_FLAGS_MASK 0x00FF0000 +#define CHUNK_FLAGS_SHIFT 16 + +#define CHUNK_UBIT_MASK 0x00040000 +#define CHUNK_UBIT_SHIFT 18 + +#define CHUNK_BBIT_MASK 0x00020000 +#define CHUNK_BBIT_SHIFT 17 + +#define CHUNK_EBIT_MASK 0x00010000 +#define CHUNK_EBIT_SHIFT 16 + +#define CHUNK_LENGTH_MASK 0x0000FFFF +#define CHUNK_LENGTH_SHIFT 0 + +always_inline void +vnet_sctp_common_hdr_params_host_to_net (sctp_chunks_common_hdr_t * h) +{ + h->params = clib_host_to_net_u32 (h->params); +} + +always_inline void +vnet_sctp_common_hdr_params_net_to_host (sctp_chunks_common_hdr_t * h) +{ + h->params = clib_net_to_host_u32 (h->params); +} + +always_inline void +vnet_sctp_set_ubit (sctp_chunks_common_hdr_t * h) +{ + h->params &= ~(CHUNK_UBIT_MASK); + h->params |= (1 << CHUNK_UBIT_SHIFT) & CHUNK_UBIT_MASK; +} + +always_inline u8 +vnet_sctp_get_ubit (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_UBIT_MASK) >> CHUNK_UBIT_SHIFT); +} + +always_inline void +vnet_sctp_set_bbit (sctp_chunks_common_hdr_t * h) +{ + h->params &= ~(CHUNK_BBIT_MASK); + h->params |= (1 << CHUNK_BBIT_SHIFT) & CHUNK_BBIT_MASK; +} + +always_inline u8 +vnet_sctp_get_bbit (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_BBIT_MASK) >> CHUNK_BBIT_SHIFT); +} + +always_inline void +vnet_sctp_set_ebit (sctp_chunks_common_hdr_t * h) +{ + h->params &= ~(CHUNK_EBIT_MASK); + h->params |= (1 << CHUNK_EBIT_SHIFT) & CHUNK_EBIT_MASK; +} + +always_inline u8 +vnet_sctp_get_ebit (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_EBIT_MASK) >> CHUNK_EBIT_SHIFT); +} + +always_inline void +vnet_sctp_set_chunk_type (sctp_chunks_common_hdr_t * h, sctp_chunk_type t) +{ + h->params &= ~(CHUNK_TYPE_MASK); + h->params |= (t << CHUNK_TYPE_SHIFT) & CHUNK_TYPE_MASK; +} + +always_inline u8 +vnet_sctp_get_chunk_type (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_TYPE_MASK) >> CHUNK_TYPE_SHIFT); +} + +always_inline void +vnet_sctp_set_chunk_length (sctp_chunks_common_hdr_t * h, u16 length) +{ + h->params &= ~(CHUNK_LENGTH_MASK); + h->params |= (length << CHUNK_LENGTH_SHIFT) & CHUNK_LENGTH_MASK; +} + +always_inline u16 +vnet_sctp_get_chunk_length (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_LENGTH_MASK) >> CHUNK_LENGTH_SHIFT); +} + +/* + * Payload chunk + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 0 | Reserved|U|B|E| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Identifier S | Stream Sequence Number n | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Payload Protocol Identifier | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / User Data (seq n of Stream S) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + /* + * Type (8 bits): 0 + * Flags (8 bits): + * -- Reserved (5 bits): all 0s + * -- U bit + * -- B bit + * -- E bit + * Length (16 bits): This field indicates the length of the DATA chunk in + * bytes from the beginning of the type field to the end of the User Data + * field excluding any padding. + * A DATA chunk with one byte of user data will have Length set to 17 + * (indicating 17 bytes). A DATA chunk with a User Data field of length L + * will have the Length field set to (16 + L) (indicating 16+L bytes) where + * L MUST be greater than 0. + */ + + /* + * Fragment Description Table: + * + * B E Description + * ============================================================ + * | 1 0 | First piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 0 0 | Middle piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 0 1 | Last piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 1 1 | Unfragmented message | + * ============================================================ + */ + sctp_chunks_common_hdr_t chunk_hdr; + + /* + * This value represents the TSN for this DATA chunk. + * The valid range of TSN is from 0 to 4294967295 (2**32 - 1). + * TSN wraps back to 0 after reaching 4294967295. + */ + u32 tsn; + + /* + * Identifies the stream to which the following user data belongs. + */ + u16 stream_id; + + /* + * This value represents the Stream Sequence Number of the following user data + * within the stream S. Valid range is 0 to 65535. + * When a user message is fragmented by SCTP for transport, the same Stream + * Sequence Number MUST be carried in each of the fragments of the message. + */ + u16 stream_seq; + + /* + * This value represents an application (or upper layer) specified protocol + * identifier. This value is passed to SCTP by its upper layer and sent to its + * peer. This identifier is not used by SCTP but can be used by certain network + * entities, as well as by the peer application, to identify the type of + * information being carried in this DATA chunk. This field must be sent even + * in fragmented DATA chunks (to make sure it is available for agents in the + * middle of the network). Note that this field is NOT touched by an SCTP + * implementation; therefore, its byte order is NOT necessarily big endian. + * The upper layer is responsible for any byte order conversions to this field. + * The value 0 indicates that no application identifier is specified by the + * upper layer for this payload data. + */ + u32 payload_id; + + /* + * This is the payload user data. The implementation MUST pad the end of the + * data to a 4-byte boundary with all-zero bytes. Any padding MUST NOT be + * included in the Length field. A sender MUST never add more than 3 bytes of + * padding. + */ + u32 data[]; + +} sctp_payload_data_chunk_t; + +always_inline void +vnet_sctp_set_tsn (sctp_payload_data_chunk_t * p, u32 tsn) +{ + p->tsn = clib_host_to_net_u32 (tsn); +} + +always_inline u32 +vnet_sctp_get_tsn (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u32 (p->tsn)); +} + +always_inline void +vnet_sctp_set_stream_id (sctp_payload_data_chunk_t * p, u16 stream_id) +{ + p->stream_id = clib_host_to_net_u16 (stream_id); +} + +always_inline u16 +vnet_sctp_get_stream_id (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u16 (p->stream_id)); +} + +always_inline void +vnet_sctp_set_stream_seq (sctp_payload_data_chunk_t * p, u16 stream_seq) +{ + p->stream_seq = clib_host_to_net_u16 (stream_seq); +} + +always_inline u16 +vnet_sctp_get_stream_seq (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u16 (p->stream_seq)); +} + +always_inline void +vnet_sctp_set_payload_id (sctp_payload_data_chunk_t * p, u32 payload_id) +{ + p->payload_id = clib_host_to_net_u32 (payload_id); +} + +always_inline u32 +vnet_sctp_get_payload_id (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u32 (p->payload_id)); +} + +always_inline u16 +vnet_sctp_calculate_padding (u16 base_length) +{ + if (base_length % 4 == 0) + return 0; + + return (4 - base_length % 4); +} + +#define INBOUND_STREAMS_COUNT 1 +#define OUTBOUND_STREAMS_COUNT 1 + +/* + * INIT chunk + * + * This chunk is used to initiate an SCTP association between two + * endpoints. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 1 | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initiate Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit (a_rwnd) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Outbound Streams | Number of Inbound Streams | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initial TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Optional/Variable-Length Parameters / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * The INIT chunk contains the following parameters. Unless otherwise + * noted, each parameter MUST only be included once in the INIT chunk. + * + * Fixed Parameters Status + * ---------------------------------------------- + * Initiate Tag Mandatory + * Advertised Receiver Window Credit Mandatory + * Number of Outbound Streams Mandatory + * Number of Inbound Streams Mandatory + * Initial TSN Mandatory + * + * Variable Parameters Status Type Value + * ------------------------------------------------------------- + * IPv4 Address (Note 1) Optional 5 + * IPv6 Address (Note 1) Optional 6 + * Cookie Preservative Optional 9 + * Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + * Host Name Address (Note 3) Optional 11 + * Supported Address Types (Note 4) Optional 12 + * + * Note 1: The INIT chunks can contain multiple addresses that can be + * IPv4 and/or IPv6 in any combination. + * + * Note 2: The ECN Capable field is reserved for future use of Explicit + * Congestion Notification. + * + * Note 3: An INIT chunk MUST NOT contain more than one Host Name Address + * parameter. Moreover, the sender of the INIT MUST NOT combine any other + * address types with the Host Name Address in the INIT. The receiver of + * INIT MUST ignore any other address types if the Host Name Address parameter + * is present in the received INIT chunk. + * + * Note 4: This parameter, when present, specifies all the address types the + * sending endpoint can support. The absence of this parameter indicates that + * the sending endpoint can support any address type. + * + * IMPLEMENTATION NOTE: If an INIT chunk is received with known parameters that + * are not optional parameters of the INIT chunk, then the receiver SHOULD + * process the INIT chunk and send back an INIT ACK. The receiver of the INIT + * chunk MAY bundle an ERROR chunk with the COOKIE ACK chunk later. + * However, restrictive implementations MAY send back an ABORT chunk in response + * to the INIT chunk. The Chunk Flags field in INIT is reserved, and all bits + * in it should be set to 0 by the sender and ignored by the receiver. + * The sequence of parameters within an INIT can be processed in any order. + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + + /* + * The receiver of the INIT (the responding end) records the value of + * the Initiate Tag parameter. + * This value MUST be placed into the Verification Tag field of every + * SCTP packet that the receiver of the INIT transmits within this association. + * The Initiate Tag is allowed to have any value except 0. + * + * If the value of the Initiate Tag in a received INIT chunk is found + * to be 0, the receiver MUST treat it as an error and close the + * association by transmitting an ABORT. + * + * The value of the INIT TAG is recommended to be random for security + * reasons. A good method is described in https://tools.ietf.org/html/rfc4086 + */ + u32 initiate_tag; + + /* + * This value represents the dedicated buffer space, in number of bytes, + * the sender of the INIT has reserved in association with this window. + * During the life of the association, this buffer space SHOULD NOT be + * lessened (i.e., dedicated buffers taken away from this association); + * however, an endpoint MAY change the value of a_rwnd it sends in SACK + * chunks. + */ + u32 a_rwnd; + + /* + * Defines the number of outbound streams the sender of this INIT chunk + * wishes to create in this association. + * The value of 0 MUST NOT be used. + * + * Note: A receiver of an INIT with the OS value set to 0 SHOULD abort + * the association. + */ + u16 outbound_streams_count; + + /* + * Defines the maximum number of streams the sender of this INIT + * chunk allows the peer end to create in this association. + * The value 0 MUST NOT be used. + * + * Note: There is no negotiation of the actual number of streams but + * instead the two endpoints will use the min(requested, offered). + * + * Note: A receiver of an INIT with the MIS value of 0 SHOULD abort + * the association. + */ + u16 inboud_streams_count; + + /* + * Defines the initial TSN that the sender will use. + * The valid range is from 0 to 4294967295. + * This field MAY be set to the value of the Initiate Tag field. + */ + u32 initial_tsn; + + /* The following field allows to have multiple optional fields which are: + * - sctp_ipv4_address + * - sctp_ipv6_address + * - sctp_cookie_preservative + * - sctp_hostname_address + * - sctp_supported_address_types + */ + u32 optional_fields[]; + +} sctp_init_chunk_t; + +/* + * INIT ACK chunk + * + * The INIT ACK chunk is used to acknowledge the initiation of an SCTP + * association. The parameter part of INIT ACK is formatted similarly to the + * INIT chunk. + * + * It uses two extra variable parameters: + * - the State Cookie and + * - the Unrecognized Parameter: + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 2 | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initiate Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Outbound Streams | Number of Inbound Streams | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initial TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Optional/Variable-Length Parameters / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef sctp_init_chunk_t sctp_init_ack_chunk_t; + +typedef struct +{ + u16 type; + u16 length; + +} sctp_opt_params_hdr_t; + +#define SHA1_OUTPUT_LENGTH 20 +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type | Parameter Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Parameter Value / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + unsigned char mac[SHA1_OUTPUT_LENGTH]; /* RFC 2104 */ + u64 creation_time; + u32 cookie_lifespan; + +} sctp_state_cookie_param_t; + +/* + * This chunk is used only during the initialization of an association. + * It is sent by the initiator of an association to its peer to complete + * the initialization process. This chunk MUST precede any DATA chunk + * sent within the association, but MAY be bundled with one or more DATA + * chunks in the same packet. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 10 |Chunk Flags | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Cookie / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + + sctp_state_cookie_param_t cookie; + +} sctp_cookie_echo_chunk_t; + + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 11 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + +} sctp_cookie_ack_chunk_t; + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 14 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + +} sctp_shutdown_complete_chunk_t; + +/* OPTIONAL or VARIABLE-LENGTH parameters for INIT */ +#define SCTP_IPV4_ADDRESS_TYPE 5 +#define SCTP_IPV4_ADDRESS_TYPE_LENGTH 8 +#define SCTP_IPV6_ADDRESS_TYPE 6 +#define SCTP_IPV6_ADDRESS_TYPE_LENGTH 20 +#define SCTP_STATE_COOKIE_TYPE 7 +#define SCTP_UNRECOGNIZED_TYPE 8 +#define SCTP_COOKIE_PRESERVATIVE_TYPE 9 +#define SCTP_COOKIE_PRESERVATIVE_TYPE_LENGTH 8 +#define SCTP_HOSTNAME_ADDRESS_TYPE 11 +#define SCTP_SUPPORTED_ADDRESS_TYPES 12 + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 5 | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | IPv4 Address | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * Contains an IPv4 address of the sending endpoint. + * It is binary encoded. + */ + ip4_address_t address; + +} sctp_ipv4_addr_param_t; + +always_inline void +vnet_sctp_set_ipv4_address (sctp_ipv4_addr_param_t * a, ip4_address_t address) +{ + a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + a->param_hdr.length = clib_host_to_net_u16 (8); + a->address.as_u32 = clib_host_to_net_u32 (address.as_u32); +} + +always_inline u32 +vnet_sctp_get_ipv4_address (sctp_ipv4_addr_param_t * a) +{ + return (clib_net_to_host_u32 (a->address.as_u32)); +} + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 6 | Length = 20 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | IPv6 Address | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * Contains an IPv6 address of the sending endpoint. + * It is binary encoded. + */ + ip6_address_t address; + +} sctp_ipv6_addr_param_t; + +always_inline void +vnet_sctp_set_ipv6_address (sctp_ipv6_addr_param_t * a, ip6_address_t address) +{ + a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + a->param_hdr.length = clib_host_to_net_u16 (20); + a->address.as_u64[0] = clib_host_to_net_u64 (address.as_u64[0]); + a->address.as_u64[1] = clib_host_to_net_u64 (address.as_u64[1]); +} + +always_inline ip6_address_t +vnet_sctp_get_ipv6_address (sctp_ipv6_addr_param_t * a) +{ + ip6_address_t ip6_address; + + ip6_address.as_u64[0] = clib_net_to_host_u64 (a->address.as_u64[0]); + ip6_address.as_u64[1] = clib_net_to_host_u64 (a->address.as_u64[1]); + + return ip6_address; +} + +/* + * The sender of the INIT shall use this parameter to suggest to the + * receiver of the INIT for a longer life-span of the State Cookie. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 9 | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Suggested Cookie Life-Span Increment (msec.) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * This parameter indicates to the receiver how much increment in + * milliseconds the sender wishes the receiver to add to its default + * cookie life-span. + * + * This optional parameter should be added to the INIT chunk by the + * sender when it reattempts establishing an association with a peer + * to which its previous attempt of establishing the association + * failed due to a stale cookie operation error. The receiver MAY + * choose to ignore the suggested cookie life-span increase for its + * own security reasons. + */ + u32 life_span_inc; + +} sctp_cookie_preservative_param_t; + +always_inline void +vnet_sctp_set_cookie_preservative (sctp_cookie_preservative_param_t * c, + u32 life_span_inc) +{ + c->param_hdr.type = clib_host_to_net_u16 (SCTP_COOKIE_PRESERVATIVE_TYPE); + c->param_hdr.length = clib_host_to_net_u16 (8); + c->life_span_inc = clib_host_to_net_u32 (life_span_inc); +} + +always_inline u32 +vnet_sctp_get_cookie_preservative (sctp_cookie_preservative_param_t * c) +{ + return (clib_net_to_host_u32 (c->life_span_inc)); +} + +#define FQDN_MAX_LENGTH 256 + +/* + * The sender of INIT uses this parameter to pass its Host Name (in + * place of its IP addresses) to its peer. + * The peer is responsible for resolving the name. + * Using this parameter might make it more likely for the association to work + * across a NAT box. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 11 | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Host Name / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + + /* + * This field contains a host name in "host name syntax" per RFC 1123 + * Section 2.1 + * + * Note: At least one null terminator is included in the Host Name + * string and must be included in the length. + */ + char hostname[FQDN_MAX_LENGTH]; + +} sctp_hostname_param_t; + +always_inline void +vnet_sctp_set_hostname_address (sctp_hostname_param_t * h, char *hostname) +{ + h->param_hdr.length = FQDN_MAX_LENGTH; + h->param_hdr.type = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE); + clib_memset (h->hostname, '0', FQDN_MAX_LENGTH); + memcpy (h->hostname, hostname, FQDN_MAX_LENGTH); +} + +#define MAX_SUPPORTED_ADDRESS_TYPES 3 + +/* + * The sender of INIT uses this parameter to list all the address types + * it can support. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 12 | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Address Type #1 | Address Type #2 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ...... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + u16 address_type[MAX_SUPPORTED_ADDRESS_TYPES]; + +} sctp_supported_addr_types_param_t; + +always_inline void +vnet_sctp_set_supported_address_types (sctp_supported_addr_types_param_t * s) +{ + s->param_hdr.type = clib_host_to_net_u16 (SCTP_SUPPORTED_ADDRESS_TYPES); + s->param_hdr.length = 4 /* base = type + length */ + + MAX_SUPPORTED_ADDRESS_TYPES * 4; /* each address type is 4 bytes */ + + s->address_type[0] = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + s->address_type[1] = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + s->address_type[2] = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE); +} + +/* + * Error cause codes to be used for the sctp_error_cause.cause_code field + */ +#define INVALID_STREAM_IDENTIFIER 1 +#define MISSING_MANDATORY_PARAMETER 2 +#define STALE_COOKIE_ERROR 3 +#define OUT_OF_RESOURCE 4 +#define UNRESOLVABLE_ADDRESS 5 +#define UNRECOGNIZED_CHUNK_TYPE 6 +#define INVALID_MANDATORY_PARAMETER 7 +#define UNRECOGNIZED_PARAMETER 8 +#define NO_USER_DATA 9 +#define COOKIE_RECEIVED_WHILE_SHUTTING_DOWN 10 +#define RESTART_OF_ASSOCIATION_WITH_NEW_ADDR 11 +#define USER_INITIATED_ABORT 12 +#define PROTOCOL_VIOLATION 13 + +always_inline void +vnet_sctp_set_state_cookie (sctp_state_cookie_param_t * s) +{ + s->param_hdr.type = clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + + /* TODO: length & value to be populated */ +} + +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + u32 value[]; + +} sctp_unrecognized_param_t; + +always_inline void +vnet_sctp_set_unrecognized_param (sctp_unrecognized_param_t * u) +{ + u->param_hdr.type = clib_host_to_net_u16 (UNRECOGNIZED_PARAMETER); + + /* TODO: length & value to be populated */ +} + +/* + * Selective ACK (SACK) chunk + * + * This chunk is sent to the peer endpoint to acknowledge received DATA + * chunks and to inform the peer endpoint of gaps in the received + * subsequences of DATA chunks as represented by their TSNs. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 3 |Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cumulative TSN Ack | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit (a_rwnd) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Gap Ack Blocks = N | Number of Duplicate TSNs = X | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Gap Ack Block #1 Start | Gap Ack Block #1 End | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ ... \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Gap Ack Block #N Start | Gap Ack Block #N End | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Duplicate TSN 1 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ ... \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Duplicate TSN X | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + /* + * This parameter contains the TSN of the last DATA chunk received in + * sequence before a gap. In the case where no DATA chunk has been + * received, this value is set to the peer's Initial TSN minus one. + */ + u32 cumulative_tsn_ack; + + /* + * This field indicates the updated receive buffer space in bytes of + * the sender of this SACK. + */ + u32 a_rwnd; + + /* + * Indicates the number of Gap Ack Blocks included in this SACK. + */ + u16 gap_ack_blocks_count; + + /* + * This field contains the number of duplicate TSNs the endpoint has + * received. Each duplicate TSN is listed following the Gap Ack Block + * list. + */ + u16 duplicate_tsn_count; + + /* + * Indicates the Start offset TSN for this Gap Ack Block. To calculate + * the actual TSN number the Cumulative TSN Ack is added to this offset + * number. This calculated TSN identifies the first TSN in this Gap Ack + * Block that has been received. + */ + u16 *gap_ack_block_start; + + /* + * Indicates the End offset TSN for this Gap Ack Block. To calculate + * the actual TSN number, the Cumulative TSN Ack is added to this offset + * number. This calculated TSN identifies the TSN of the last DATA chunk + * received in this Gap Ack Block. + */ + u16 *gap_ack_block_end; + + /* + * Indicates the number of times a TSN was received in duplicate since + * the last SACK was sent. Every time a receiver gets a duplicate TSN + * (before sending the SACK), it adds it to the list of duplicates. + * The duplicate count is reinitialized to zero after sending each SACK. + */ + u32 duplicate_tsn; + +} sctp_selective_ack_chunk_t; + +always_inline void +vnet_sctp_set_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s, + u32 cumulative_tsn_ack) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->cumulative_tsn_ack = clib_host_to_net_u32 (cumulative_tsn_ack); +} + +always_inline u32 +vnet_sctp_get_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->cumulative_tsn_ack); +} + +always_inline void +vnet_sctp_set_arwnd (sctp_selective_ack_chunk_t * s, u32 a_rwnd) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->a_rwnd = clib_host_to_net_u32 (a_rwnd); +} + +always_inline u32 +vnet_sctp_get_arwnd (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->a_rwnd); +} + +always_inline void +vnet_sctp_set_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s, + u16 gap_ack_blocks_count) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->gap_ack_blocks_count = clib_host_to_net_u16 (gap_ack_blocks_count); + + if (s->gap_ack_block_start == NULL) + s->gap_ack_block_start = + clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count); + if (s->gap_ack_block_end == NULL) + s->gap_ack_block_end = + clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count); +} + +always_inline u16 +vnet_sctp_get_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->gap_ack_blocks_count); +} + +always_inline void +vnet_sctp_set_duplicate_tsn_count (sctp_selective_ack_chunk_t * s, + u16 duplicate_tsn_count) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->duplicate_tsn_count = clib_host_to_net_u16 (duplicate_tsn_count); +} + +always_inline u16 +vnet_sctp_get_duplicate_tsn_count (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u16 (s->duplicate_tsn_count); +} + +/* + * Heartbeat Info + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Heartbeat Info Type=1 | HB Info Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Sender-Specific Heartbeat Info / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * The Sender-Specific Heartbeat Info field should normally include + * information about the sender's current time when this HEARTBEAT + * chunk is sent and the destination transport address to which this + * HEARTBEAT is sent. + * This information is simply reflected back by the receiver in the + * HEARTBEAT ACK message. + * + * Note also that the HEARTBEAT message is both for reachability + * checking and for path verification. + * When a HEARTBEAT chunk is being used for path verification purposes, + * it MUST hold a 64-bit random nonce. + */ + u64 hb_info; + +} sctp_hb_info_param_t; + +always_inline void +vnet_sctp_set_heartbeat_info (sctp_hb_info_param_t * h, u64 hb_info, + u16 hb_info_length) +{ + h->hb_info = clib_host_to_net_u16 (1); + h->param_hdr.length = clib_host_to_net_u16 (hb_info_length); + h->hb_info = clib_host_to_net_u64 (hb_info); +} + +/* + * Heartbeat Request + * + * An endpoint should send this chunk to its peer endpoint to probe the + * reachability of a particular destination transport address defined in + * the present association. + * The parameter field contains the Heartbeat Information, which is a + * variable-length opaque data structure understood only by the sender. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 4 | Chunk Flags | Heartbeat Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Heartbeat Information TLV (Variable-Length) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + sctp_hb_info_param_t hb_info; + +} sctp_hb_req_chunk_t; + +always_inline void +vnet_sctp_set_hb_request_info (sctp_hb_req_chunk_t * h, + sctp_hb_info_param_t * hb_info) +{ + vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT); + memcpy (&h->hb_info, hb_info, sizeof (h->hb_info)); +} + +/* + * Heartbeat Acknowledgement + * + * An endpoint should send this chunk to its peer endpoint as a response + * to a HEARTBEAT chunk. + * A HEARTBEAT ACK is always sent to the source IP address of the IP datagram + * containing the HEARTBEAT chunk to which this ack is responding. + */ +/* + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 5 | Chunk Flags | Heartbeat Ack Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Heartbeat Information TLV (Variable-Length) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef sctp_hb_req_chunk_t sctp_hb_ack_chunk_t; + +always_inline void +vnet_sctp_set_hb_ack_info (sctp_hb_ack_chunk_t * h, + sctp_hb_info_param_t * hb_info) +{ + vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT_ACK); + memcpy (&h->hb_info, hb_info, sizeof (h->hb_info)); +} + +/* + * Error cause + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code | Cause Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Cause-Specific Information / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ +typedef struct +{ + + sctp_opt_params_hdr_t param_hdr; + u64 cause_info; + +} sctp_err_cause_param_t; + + +/* + * An end-point sends this chunk to its peer end-point to notify it of + * certain error conditions. It contains one or more error causes. + * An Operation Error is not considered fatal in and of itself, but may be + * used with an ABORT chunk to report a fatal condition. It has the + * following parameters: + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 9 | Chunk Flags | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / one or more Error Causes / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + sctp_err_cause_param_t err_causes[]; + +} sctp_operation_error_t; + +/* + * Abort Association (ABORT) + * + * The ABORT chunk is sent to the peer of an association to close the + * association. The ABORT chunk may contain Cause Parameters to inform + * the receiver about the reason of the abort. DATA chunks MUST NOT be + * bundled with ABORT. Control chunks (except for INIT, INIT ACK, and + * SHUTDOWN COMPLETE) MAY be bundled with an ABORT, but they MUST be + * placed before the ABORT in the SCTP packet or they will be ignored by + * the receiver. + * + * If an endpoint receives an ABORT with a format error or no TCB is + * found, it MUST silently discard it. Moreover, under any + * circumstances, an endpoint that receives an ABORT MUST NOT respond to + * that ABORT by sending an ABORT of its own. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 6 |Reserved |T| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / zero or more Error Causes / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + sctp_err_cause_param_t err_causes[]; + +} sctp_abort_chunk_t; + +always_inline void +vnet_sctp_set_tbit (sctp_abort_chunk_t * a) +{ + vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT); + // a->chunk_hdr.flags = clib_host_to_net_u16 (1); +} + +always_inline void +vnet_sctp_unset_tbit (sctp_abort_chunk_t * a) +{ + vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT); + // a->chunk_hdr.flags = clib_host_to_net_u16 (0); +} + +/* + * Shutdown Association (SHUTDOWN) + * + * An endpoint in an association MUST use this chunk to initiate a + * graceful close of the association with its peer. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 7 | Chunk Flags | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cumulative TSN Ack | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + /* + * This parameter contains the TSN of the last chunk received in + * sequence before any gaps. + * + * Note: Since the SHUTDOWN message does not contain Gap Ack Blocks, + * it cannot be used to acknowledge TSNs received out of order. In a + * SACK, lack of Gap Ack Blocks that were previously included + * indicates that the data receiver reneged on the associated DATA + * chunks. Since SHUTDOWN does not contain Gap Ack Blocks, the + * receiver of the SHUTDOWN shouldn't interpret the lack of a Gap Ack + * Block as a renege. + */ + u32 cumulative_tsn_ack; + +} sctp_shutdown_association_chunk_t; + +always_inline void +vnet_sctp_set_tsn_last_received_chunk (sctp_shutdown_association_chunk_t * s, + u32 tsn_last_chunk) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN); + s->cumulative_tsn_ack = clib_host_to_net_u32 (tsn_last_chunk); +} + +/* + * Shutdown Acknowledgement (SHUTDOWN ACK) + * + * This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + * chunk at the completion of the shutdown process. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 8 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; +} sctp_shutdown_ack_chunk_t; + +always_inline void +vnet_sctp_fill_shutdown_ack (sctp_shutdown_ack_chunk_t * s) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN_ACK); + vnet_sctp_set_chunk_length (&s->chunk_hdr, 4); +} + +#endif /* included_vnet_sctp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_pg.c b/src/plugins/sctp/sctp_pg.c new file mode 100644 index 00000000000..d253330143c --- /dev/null +++ b/src/plugins/sctp/sctp_pg.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +uword +unformat_pg_sctp_header (unformat_input_t * input, va_list * args) +{ + return 1; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/plugins/sctp/sctp_timer.h b/src/plugins/sctp/sctp_timer.h new file mode 100644 index 00000000000..259dea92e09 --- /dev/null +++ b/src/plugins/sctp/sctp_timer.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_sctp_timer_h__ +#define included_sctp_timer_h__ + +#include <vppinfra/tw_timer_16t_2w_512sl.h> +#include <vppinfra/tw_timer_16t_1w_2048sl.h> + +#endif /* included_sctp_timer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ |