diff options
author | Marco Varlese <marco.varlese@suse.com> | 2017-10-30 18:17:21 +0100 |
---|---|---|
committer | Florin Coras <florin.coras@gmail.com> | 2018-01-24 15:45:35 +0000 |
commit | 191a59401c0552e5ea79041f34456eb9fcc1f311 (patch) | |
tree | 286d871aee72cb37e6d751715e89c6b178c7b527 /src/vnet | |
parent | ce111d2ee3bacec2a09d8f5b664dcfafa0dd50c7 (diff) |
SCTP stack (RFC4960)
== CONTENT ==
* SCTP chunks definition as per RFC4960;
* Helper functions to set/get values to/from the corresponding chunks;
* Hooks to the session/application layers;
* Complete state-machine handling;
* Implementation for unexpected chunk received in a certain
state (state-machine error handling)
* Support for 1-single connection;
* Sample application to test receive/transmit data-path;
* Test to validate SCTP stack;
Change-Id: I1b55c455ab400be9513f4e094dadfc3181d2ebc9
Signed-off-by: Marco Varlese <marco.varlese@suse.com>
Diffstat (limited to 'src/vnet')
-rw-r--r-- | src/vnet/buffer.h | 13 | ||||
-rw-r--r-- | src/vnet/ip/format.h | 5 | ||||
-rw-r--r-- | src/vnet/ip/punt.c | 20 | ||||
-rw-r--r-- | src/vnet/ipsec/ipsec_output.c | 8 | ||||
-rw-r--r-- | src/vnet/sctp/builtin_client.c | 834 | ||||
-rw-r--r-- | src/vnet/sctp/builtin_client.h | 121 | ||||
-rw-r--r-- | src/vnet/sctp/builtin_server.c | 472 | ||||
-rw-r--r-- | src/vnet/sctp/sctp.c | 848 | ||||
-rw-r--r-- | src/vnet/sctp/sctp.h | 645 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_debug.h | 62 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_error.def | 50 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_format.c | 40 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_input.c | 2202 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_output.c | 1331 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_packet.h | 1445 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_pg.c | 30 | ||||
-rw-r--r-- | src/vnet/sctp/sctp_timer.h | 29 | ||||
-rw-r--r-- | src/vnet/session/application_interface.c | 17 | ||||
-rw-r--r-- | src/vnet/session/transport.c | 7 | ||||
-rw-r--r-- | src/vnet/session/transport.h | 3 |
20 files changed, 8170 insertions, 12 deletions
diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 317f8bb8e4a..097f68f6c06 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -329,6 +329,19 @@ typedef struct u8 flags; } tcp; + /* SCTP */ + struct + { + u32 connection_index; + u16 sid; /**< Stream ID */ + u16 ssn; /**< Stream Sequence Number */ + u32 tsn; /**< Transmission Sequence Number */ + u16 hdr_offset; /**< offset relative to ip hdr */ + u16 data_offset; /**< offset relative to ip hdr */ + u16 data_len; /**< data len */ + u8 flags; + } sctp; + /* SNAT */ struct { diff --git a/src/vnet/ip/format.h b/src/vnet/ip/format.h index c35f0f4bb74..d527e31a05e 100644 --- a/src/vnet/ip/format.h +++ b/src/vnet/ip/format.h @@ -99,9 +99,10 @@ format_function_t format_ip6_header; unformat_function_t unformat_pg_ip6_header; /* Format a TCP/UDP headers. */ -format_function_t format_tcp_header, format_udp_header; +format_function_t format_tcp_header, format_udp_header, format_sctp_header; -unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header; +unformat_function_t unformat_pg_tcp_header, unformat_pg_udp_header, + unformat_pg_sctp_header; #endif /* included_ip_format_h */ diff --git a/src/vnet/ip/punt.c b/src/vnet/ip/punt.c index b417427288c..4a027bfdadb 100644 --- a/src/vnet/ip/punt.c +++ b/src/vnet/ip/punt.c @@ -27,6 +27,7 @@ #include <vnet/pg/pg.h> #include <vnet/udp/udp.h> #include <vnet/tcp/tcp.h> +#include <vnet/sctp/sctp.h> #include <vnet/ip/punt.h> #include <vppinfra/sparse_vec.h> #include <vlib/unix/unix.h> @@ -689,11 +690,13 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, bool is_add) { - /* For now we only support UDP punt */ - if (protocol != IP_PROTOCOL_UDP && protocol != IP_PROTOCOL_TCP) + /* For now we only support TCP, UDP and SCTP punt */ + if (protocol != IP_PROTOCOL_UDP && + protocol != IP_PROTOCOL_TCP && protocol != IP_PROTOCOL_SCTP) return clib_error_return (0, - "only UDP (%d) and TCP (%d) protocols are supported, got %d", - IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, protocol); + "only UDP (%d), TCP (%d) and SCTP (%d) protocols are supported, got %d", + IP_PROTOCOL_UDP, IP_PROTOCOL_TCP, + IP_PROTOCOL_SCTP, protocol); if (ipv != (u8) ~ 0 && ipv != 4 && ipv != 6) return clib_error_return (0, "IP version must be 4 or 6, got %d", ipv); @@ -706,6 +709,8 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, udp_punt_unknown (vm, 1, is_add); else if (protocol == IP_PROTOCOL_TCP) tcp_punt_unknown (vm, 1, is_add); + else if (protocol == IP_PROTOCOL_SCTP) + sctp_punt_unknown (vm, 1, is_add); } if ((ipv == 6) || (ipv == (u8) ~ 0)) @@ -714,6 +719,8 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, udp_punt_unknown (vm, 0, is_add); else if (protocol == IP_PROTOCOL_TCP) tcp_punt_unknown (vm, 0, is_add); + else if (protocol == IP_PROTOCOL_SCTP) + sctp_punt_unknown (vm, 0, is_add); } return 0; @@ -721,8 +728,9 @@ vnet_punt_add_del (vlib_main_t * vm, u8 ipv, u8 protocol, u16 port, else if (is_add) { - if (protocol == IP_PROTOCOL_TCP) - return clib_error_return (0, "punt TCP ports is not supported yet"); + if (protocol == IP_PROTOCOL_TCP || protocol == IP_PROTOCOL_SCTP) + return clib_error_return (0, + "punt TCP/SCTP ports is not supported yet"); if (ipv == 4 || ipv == (u8) ~ 0) udp_register_dst_port (vm, port, udp4_punt_node.index, 1); diff --git a/src/vnet/ipsec/ipsec_output.c b/src/vnet/ipsec/ipsec_output.c index e86292c0d17..d56b665157d 100644 --- a/src/vnet/ipsec/ipsec_output.c +++ b/src/vnet/ipsec/ipsec_output.c @@ -100,7 +100,9 @@ ipsec_output_policy_match (ipsec_spd_t * spd, u8 pr, u32 la, u32 ra, u16 lp, if (ra > clib_net_to_host_u32 (p->raddr.stop.ip4.as_u32)) continue; - if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP))) + if (PREDICT_FALSE + ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) + && (pr != IP_PROTOCOL_SCTP))) return p; if (lp < p->lport.start) @@ -153,7 +155,9 @@ ipsec_output_ip6_policy_match (ipsec_spd_t * spd, if (!ip6_addr_match_range (la, &p->laddr.start.ip6, &p->laddr.stop.ip6)) continue; - if (PREDICT_FALSE ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP))) + if (PREDICT_FALSE + ((pr != IP_PROTOCOL_TCP) && (pr != IP_PROTOCOL_UDP) + && (pr != IP_PROTOCOL_SCTP))) return p; if (lp < p->lport.start) diff --git a/src/vnet/sctp/builtin_client.c b/src/vnet/sctp/builtin_client.c new file mode 100644 index 00000000000..4e50c0ae2ea --- /dev/null +++ b/src/vnet/sctp/builtin_client.c @@ -0,0 +1,834 @@ +/* + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vnet/plugin/plugin.h> +#include <vnet/sctp/builtin_client.h> + +#include <vlibapi/api.h> +#include <vlibmemory/api.h> +#include <vpp/app/version.h> + +tclient_main_t tclient_main; + +#define SCTP_BUILTIN_CLIENT_DBG (0) + +static void +signal_evt_to_cli_i (int *code) +{ + tclient_main_t *tm = &tclient_main; + ASSERT (vlib_get_thread_index () == 0); + vlib_process_signal_event (tm->vlib_main, tm->cli_node_index, *code, 0); +} + +static void +signal_evt_to_cli (int code) +{ + if (vlib_get_thread_index () != 0) + vl_api_rpc_call_main_thread (signal_evt_to_cli_i, (u8 *) & code, + sizeof (code)); + else + signal_evt_to_cli_i (&code); +} + +static void +send_test_chunk (tclient_main_t * tm, session_t * s) +{ + u8 *test_data = tm->connect_test_data; + int test_buf_offset; + u32 bytes_this_chunk; + session_fifo_event_t evt; + svm_fifo_t *txf; + int rv; + + ASSERT (vec_len (test_data) > 0); + + test_buf_offset = s->bytes_sent % vec_len (test_data); + bytes_this_chunk = vec_len (test_data) - test_buf_offset; + bytes_this_chunk = bytes_this_chunk < s->bytes_to_send + ? bytes_this_chunk : s->bytes_to_send; + + txf = s->server_tx_fifo; + rv = svm_fifo_enqueue_nowait (txf, bytes_this_chunk, + test_data + test_buf_offset); + + /* If we managed to enqueue data... */ + if (rv > 0) + { + /* Account for it... */ + s->bytes_to_send -= rv; + s->bytes_sent += rv; + + if (SCTP_BUILTIN_CLIENT_DBG) + { + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = + { + .format = "tx-enq: xfer %d bytes, sent %u remain %u", + .format_args = "i4i4i4", + }; + /* *INDENT-ON* */ + struct + { + u32 data[3]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = rv; + ed->data[1] = s->bytes_sent; + ed->data[2] = s->bytes_to_send; + } + + /* Poke the session layer */ + if (svm_fifo_set_event (txf)) + { + /* Fabricate TX event, send to vpp */ + evt.fifo = txf; + evt.event_type = FIFO_EVENT_APP_TX; + + if (svm_queue_add + (tm->vpp_event_queue[txf->master_thread_index], (u8 *) & evt, + 0 /* do wait for mutex */ )) + clib_warning ("could not enqueue event"); + } + } +} + +static void +receive_test_chunk (tclient_main_t * tm, session_t * s) +{ + svm_fifo_t *rx_fifo = s->server_rx_fifo; + u32 my_thread_index = vlib_get_thread_index (); + int n_read, i; + + /* Allow enqueuing of new event */ + // svm_fifo_unset_event (rx_fifo); + + if (tm->test_bytes) + { + n_read = svm_fifo_dequeue_nowait (rx_fifo, + vec_len (tm->rx_buf[my_thread_index]), + tm->rx_buf[my_thread_index]); + } + else + { + n_read = svm_fifo_max_dequeue (rx_fifo); + svm_fifo_dequeue_drop (rx_fifo, n_read); + } + + if (SCTP_BUILTIN_CLIENT_DBG) + clib_warning ("Receiving test chunk; n_read = %d", n_read); + + if (n_read > 0) + { + if (SCTP_BUILTIN_CLIENT_DBG) + { + /* *INDENT-OFF* */ + ELOG_TYPE_DECLARE (e) = + { + .format = "rx-deq: %d bytes", + .format_args = "i4", + }; + /* *INDENT-ON* */ + struct + { + u32 data[1]; + } *ed; + ed = ELOG_DATA (&vlib_global_main.elog_main, e); + ed->data[0] = n_read; + } + + if (tm->test_bytes) + { + for (i = 0; i < n_read; i++) + { + if (tm->rx_buf[my_thread_index][i] + != ((s->bytes_received + i) & 0xff)) + { + clib_warning ("read %d error at byte %lld, 0x%x not 0x%x", + n_read, s->bytes_received + i, + tm->rx_buf[my_thread_index][i], + ((s->bytes_received + i) & 0xff)); + tm->test_failed = 1; + } + } + } + + if (s->bytes_to_receive < n_read) + { + s->bytes_to_receive = 0; + s->bytes_received += s->bytes_received; + } + else + { + s->bytes_to_receive -= n_read; + s->bytes_received += n_read; + } + } +} + +static uword +builtin_client_node_fn (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * frame) +{ + tclient_main_t *tm = &tclient_main; + int my_thread_index = vlib_get_thread_index (); + session_t *sp; + int i; + int delete_session; + u32 *connection_indices; + u32 *connections_this_batch; + u32 nconnections_this_batch; + + connection_indices = tm->connection_index_by_thread[my_thread_index]; + connections_this_batch = + tm->connections_this_batch_by_thread[my_thread_index]; + + if ((tm->run_test == 0) || + ((vec_len (connection_indices) == 0) + && vec_len (connections_this_batch) == 0)) + return 0; + + /* Grab another pile of connections */ + if (PREDICT_FALSE (vec_len (connections_this_batch) == 0)) + { + nconnections_this_batch = + clib_min (tm->connections_per_batch, vec_len (connection_indices)); + + ASSERT (nconnections_this_batch > 0); + vec_validate (connections_this_batch, nconnections_this_batch - 1); + clib_memcpy (connections_this_batch, + connection_indices + vec_len (connection_indices) + - nconnections_this_batch, + nconnections_this_batch * sizeof (u32)); + _vec_len (connection_indices) -= nconnections_this_batch; + } + + if (PREDICT_FALSE (tm->prev_conns != tm->connections_per_batch + && tm->prev_conns == vec_len (connections_this_batch))) + { + tm->repeats++; + tm->prev_conns = vec_len (connections_this_batch); + if (tm->repeats == 500000) + { + clib_warning ("stuck clients"); + } + } + else + { + tm->prev_conns = vec_len (connections_this_batch); + tm->repeats = 0; + } + + for (i = 0; i < vec_len (connections_this_batch); i++) + { + delete_session = 1; + + sp = pool_elt_at_index (tm->sessions, connections_this_batch[i]); + + if (sp->bytes_to_send > 0) + { + send_test_chunk (tm, sp); + delete_session = 0; + } + + if (sp->bytes_to_receive > 0) + { + receive_test_chunk (tm, sp); + delete_session = 0; + } + if (PREDICT_FALSE (delete_session == 1)) + { + u32 index, thread_index; + stream_session_t *s; + + __sync_fetch_and_add (&tm->tx_total, sp->bytes_sent); + __sync_fetch_and_add (&tm->rx_total, sp->bytes_received); + + session_parse_handle (sp->vpp_session_handle, + &index, &thread_index); + s = session_get_if_valid (index, thread_index); + + if (s) + { + vnet_disconnect_args_t _a, *a = &_a; + a->handle = session_handle (s); + a->app_index = tm->app_index; + vnet_disconnect_session (a); + + vec_delete (connections_this_batch, 1, i); + i--; + __sync_fetch_and_add (&tm->ready_connections, -1); + } + else + clib_warning ("session AWOL?"); + + /* Kick the debug CLI process */ + if (tm->ready_connections == 0) + { + signal_evt_to_cli (2); + } + } + } + + tm->connection_index_by_thread[my_thread_index] = connection_indices; + tm->connections_this_batch_by_thread[my_thread_index] = + connections_this_batch; + return 0; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (builtin_sctp_client_node) = +{ + .function = builtin_client_node_fn, + .name = "builtin-sctp-client", + .type = VLIB_NODE_TYPE_INPUT, + .state = VLIB_NODE_STATE_DISABLED, +}; +/* *INDENT-ON* */ + +static int +create_api_loopback (tclient_main_t * tm) +{ + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr; + + shmem_hdr = am->shmem_hdr; + tm->vl_input_queue = shmem_hdr->vl_input_queue; + tm->my_client_index = + vl_api_memclnt_create_internal ("sctp_test_client", tm->vl_input_queue); + return 0; +} + +static int +sctp_test_clients_init (vlib_main_t * vm) +{ + tclient_main_t *tm = &tclient_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + int i; + + if (create_api_loopback (tm)) + return -1; + + num_threads = 1 /* main thread */ + vtm->n_threads; + + /* Init test data. Big buffer */ + vec_validate (tm->connect_test_data, 1024 * 1024 - 1); + for (i = 0; i < vec_len (tm->connect_test_data); i++) + tm->connect_test_data[i] = i & 0xff; + + vec_validate (tm->rx_buf, num_threads - 1); + for (i = 0; i < num_threads; i++) + vec_validate (tm->rx_buf[i], vec_len (tm->connect_test_data) - 1); + + tm->is_init = 1; + + vec_validate (tm->connection_index_by_thread, vtm->n_vlib_mains); + vec_validate (tm->connections_this_batch_by_thread, vtm->n_vlib_mains); + vec_validate (tm->vpp_event_queue, vtm->n_vlib_mains); + + return 0; +} + +static int +builtin_session_connected_callback (u32 app_index, u32 api_context, + stream_session_t * s, u8 is_fail) +{ + tclient_main_t *tm = &tclient_main; + session_t *session; + u32 session_index; + u8 thread_index = vlib_get_thread_index (); + + if (is_fail) + { + clib_warning ("connection %d failed!", api_context); + signal_evt_to_cli (-1); + return 0; + } + + ASSERT (s->thread_index == thread_index); + + if (!tm->vpp_event_queue[thread_index]) + tm->vpp_event_queue[thread_index] = + session_manager_get_vpp_event_queue (thread_index); + + /* + * Setup session + */ + clib_spinlock_lock_if_init (&tm->sessions_lock); + pool_get (tm->sessions, session); + clib_spinlock_unlock_if_init (&tm->sessions_lock); + + memset (session, 0, sizeof (*session)); + session_index = session - tm->sessions; + session->bytes_to_send = tm->bytes_to_send; + session->bytes_to_receive = tm->no_return ? 0ULL : tm->bytes_to_send; + session->server_rx_fifo = s->server_rx_fifo; + session->server_rx_fifo->client_session_index = session_index; + session->server_tx_fifo = s->server_tx_fifo; + session->server_tx_fifo->client_session_index = session_index; + session->vpp_session_handle = session_handle (s); + + vec_add1 (tm->connection_index_by_thread[thread_index], session_index); + __sync_fetch_and_add (&tm->ready_connections, 1); + if (tm->ready_connections == tm->expected_connections) + { + tm->run_test = 1; + /* Signal the CLI process that the action is starting... */ + signal_evt_to_cli (1); + } + + return 0; +} + +static void +builtin_session_reset_callback (stream_session_t * s) +{ + if (s->session_state == SESSION_STATE_READY) + clib_warning ("Reset active connection %U", format_stream_session, s, 2); + stream_session_cleanup (s); + return; +} + +static int +builtin_session_create_callback (stream_session_t * s) +{ + return 0; +} + +static void +builtin_session_disconnect_callback (stream_session_t * s) +{ + tclient_main_t *tm = &tclient_main; + vnet_disconnect_args_t _a, *a = &_a; + a->handle = session_handle (s); + a->app_index = tm->app_index; + vnet_disconnect_session (a); + return; +} + +static int +builtin_server_rx_callback (stream_session_t * s) +{ + return 0; +} + +/* *INDENT-OFF* */ +static session_cb_vft_t builtin_clients = { + .session_reset_callback = builtin_session_reset_callback, + .session_connected_callback = builtin_session_connected_callback, + .session_accept_callback = builtin_session_create_callback, + .session_disconnect_callback = builtin_session_disconnect_callback, + .builtin_server_rx_callback = builtin_server_rx_callback +}; +/* *INDENT-ON* */ + +static clib_error_t * +attach_builtin_test_clients_app (u8 * appns_id, u64 appns_flags, + u64 appns_secret) +{ + u32 segment_name_length, prealloc_fifos, segment_size = 2 << 20; + tclient_main_t *tm = &tclient_main; + vnet_app_attach_args_t _a, *a = &_a; + u8 segment_name[128]; + u64 options[16]; + clib_error_t *error = 0; + + segment_name_length = ARRAY_LEN (segment_name); + + memset (a, 0, sizeof (*a)); + memset (options, 0, sizeof (options)); + + a->api_client_index = tm->my_client_index; + a->segment_name = segment_name; + a->segment_name_length = segment_name_length; + a->session_cb_vft = &builtin_clients; + + prealloc_fifos = tm->prealloc_fifos ? tm->expected_connections : 1; + + if (tm->private_segment_size) + segment_size = tm->private_segment_size; + + options[APP_OPTIONS_ACCEPT_COOKIE] = 0x12345678; + options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; + options[APP_OPTIONS_RX_FIFO_SIZE] = tm->fifo_size; + options[APP_OPTIONS_TX_FIFO_SIZE] = tm->fifo_size; + options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = tm->private_segment_count; + options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = prealloc_fifos; + + options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + if (appns_id) + { + options[APP_OPTIONS_FLAGS] |= appns_flags; + options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret; + } + a->options = options; + a->namespace_id = appns_id; + + if ((error = vnet_application_attach (a))) + return error; + + tm->app_index = a->app_index; + return 0; +} + +static void * +tclient_thread_fn (void *arg) +{ + return 0; +} + +/** Start a transmit thread */ +int +start_tx_pthread_sctp (tclient_main_t * tm) +{ + if (tm->client_thread_handle == 0) + { + int rv = pthread_create (&tm->client_thread_handle, + NULL /*attr */ , + tclient_thread_fn, 0); + if (rv) + { + tm->client_thread_handle = 0; + return -1; + } + } + return 0; +} + +clib_error_t * +clients_connect_sctp (vlib_main_t * vm, u8 * uri, u32 n_clients) +{ + tclient_main_t *tm = &tclient_main; + vnet_connect_args_t _a, *a = &_a; + clib_error_t *error = 0; + int i; + for (i = 0; i < n_clients; i++) + { + memset (a, 0, sizeof (*a)); + + a->uri = (char *) uri; + a->api_context = i; + a->app_index = tm->app_index; + a->mp = 0; + + if ((error = vnet_connect_uri (a))) + return error; + + + /* Crude pacing for call setups */ + if ((i % 4) == 0) + vlib_process_suspend (vm, 10e-6); + ASSERT (i + 1 >= tm->ready_connections); + while (i + 1 - tm->ready_connections > 1000) + { + vlib_process_suspend (vm, 100e-6); + } + } + return 0; +} + +#define CLI_OUTPUT(_fmt, _args...) \ + if (!tm->no_output) \ + vlib_cli_output(vm, _fmt, ##_args) + +static clib_error_t * +test_sctp_clients_command_fn (vlib_main_t * vm, + unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + tclient_main_t *tm = &tclient_main; + vlib_thread_main_t *thread_main = vlib_get_thread_main (); + uword *event_data = 0, event_type; + u8 *default_connect_uri = (u8 *) "sctp://6.0.1.1/1234", *uri, *appns_id = 0; + u64 tmp, total_bytes, appns_flags = 0, appns_secret = 0; + f64 test_timeout = 20.0, syn_timeout = 20.0, delta; + f64 time_before_connects; + u32 n_clients = 1; + int preallocate_sessions = 0; + char *transfer_type; + clib_error_t *error = 0; + int i; + + tm->bytes_to_send = 8192; + tm->no_return = 0; + tm->fifo_size = 64 << 10; + tm->connections_per_batch = 1000; + tm->private_segment_count = 0; + tm->private_segment_size = 0; + tm->no_output = 0; + tm->test_bytes = 0; + tm->test_failed = 0; + tm->vlib_main = vm; + if (thread_main->n_vlib_mains > 1) + clib_spinlock_init (&tm->sessions_lock); + vec_free (tm->connect_uri); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "nclients %d", &n_clients)) + ; + else if (unformat (input, "mbytes %lld", &tmp)) + tm->bytes_to_send = tmp << 20; + else if (unformat (input, "gbytes %lld", &tmp)) + tm->bytes_to_send = tmp << 30; + else if (unformat (input, "bytes %lld", &tm->bytes_to_send)) + ; + else if (unformat (input, "uri %s", &tm->connect_uri)) + ; + else if (unformat (input, "test-timeout %f", &test_timeout)) + ; + else if (unformat (input, "syn-timeout %f", &syn_timeout)) + ; + else if (unformat (input, "no-return")) + tm->no_return = 1; + else if (unformat (input, "fifo-size %d", &tm->fifo_size)) + tm->fifo_size <<= 10; + else if (unformat (input, "private-segment-count %d", + &tm->private_segment_count)) + ; + else if (unformat (input, "private-segment-size %U", + unformat_memory_size, &tmp)) + { + if (tmp >= 0x100000000ULL) + return clib_error_return + (0, "private segment size %lld (%llu) too large", tmp, tmp); + tm->private_segment_size = tmp; + } + else if (unformat (input, "preallocate-fifos")) + tm->prealloc_fifos = 1; + else if (unformat (input, "preallocate-sessions")) + preallocate_sessions = 1; + else + if (unformat (input, "client-batch %d", &tm->connections_per_batch)) + ; + else if (unformat (input, "appns %_%v%_", &appns_id)) + ; + else if (unformat (input, "all-scope")) + appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE + | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE); + else if (unformat (input, "local-scope")) + appns_flags = APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE; + else if (unformat (input, "global-scope")) + appns_flags = APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; + else if (unformat (input, "secret %lu", &appns_secret)) + ; + else if (unformat (input, "no-output")) + tm->no_output = 1; + else if (unformat (input, "test-bytes")) + tm->test_bytes = 1; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + /* Store cli process node index for signalling */ + tm->cli_node_index = vlib_get_current_process (vm)->node_runtime.node_index; + + if (tm->is_init == 0) + { + if (sctp_test_clients_init (vm)) + return clib_error_return (0, "failed init"); + } + + tm->ready_connections = 0; + tm->expected_connections = n_clients; + tm->rx_total = 0; + tm->tx_total = 0; + + uri = default_connect_uri; + if (tm->connect_uri) + uri = tm->connect_uri; + +#if SCTP_BUILTIN_CLIENT_PTHREAD + start_tx_pthread (); +#endif + + vlib_worker_thread_barrier_sync (vm); + vnet_session_enable_disable (vm, 1 /* turn on SCTP, etc. */ ); + vlib_worker_thread_barrier_release (vm); + + if (tm->test_client_attached == 0) + { + if ((error = attach_builtin_test_clients_app (appns_id, appns_flags, + appns_secret))) + { + vec_free (appns_id); + clib_error_report (error); + return error; + } + vec_free (appns_id); + } + tm->test_client_attached = 1; + + /* Turn on the builtin client input nodes */ + for (i = 0; i < thread_main->n_vlib_mains; i++) + vlib_node_set_state (vlib_mains[i], builtin_sctp_client_node.index, + VLIB_NODE_STATE_POLLING); + + if (preallocate_sessions) + { + session_t *sp __attribute__ ((unused)); + for (i = 0; i < n_clients; i++) + pool_get (tm->sessions, sp); + for (i = 0; i < n_clients; i++) + pool_put_index (tm->sessions, i); + } + + /* Fire off connect requests */ + time_before_connects = vlib_time_now (vm); + if ((error = clients_connect_sctp (vm, uri, n_clients))) + return error; + + /* Park until the sessions come up, or ten seconds elapse... */ + vlib_process_wait_for_event_or_clock (vm, syn_timeout); + + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: + CLI_OUTPUT ("Timeout with only %d sessions active...", + tm->ready_connections); + error = + clib_error_return (0, "failed: syn timeout (%f) with %d sessions", + syn_timeout, tm->ready_connections); + goto cleanup; + + case 1: + delta = vlib_time_now (vm) - time_before_connects; + if (delta != 0.0) + CLI_OUTPUT ("%d three-way handshakes in %.2f seconds %.2f/s", + n_clients, delta, ((f64) n_clients) / delta); + + tm->test_start_time = vlib_time_now (tm->vlib_main); + CLI_OUTPUT ("Test started at %.6f", tm->test_start_time); + break; + + default: + CLI_OUTPUT ("unexpected event(1): %d", event_type); + error = clib_error_return (0, "failed: unexpected event(1): %d", + event_type); + goto cleanup; + } + + /* Now wait for the sessions to finish... */ + vlib_process_wait_for_event_or_clock (vm, test_timeout); + event_type = vlib_process_get_events (vm, &event_data); + switch (event_type) + { + case ~0: + CLI_OUTPUT ("Timeout with %d sessions still active...", + tm->ready_connections); + error = clib_error_return (0, "failed: timeout with %d sessions", + tm->ready_connections); + goto cleanup; + + case 2: + tm->test_end_time = vlib_time_now (vm); + CLI_OUTPUT ("Test finished at %.6f", tm->test_end_time); + break; + + default: + CLI_OUTPUT ("unexpected event(2): %d", event_type); + error = clib_error_return (0, "failed: unexpected event(2): %d", + event_type); + goto cleanup; + } + + delta = tm->test_end_time - tm->test_start_time; + + if (delta != 0.0) + { + total_bytes = (tm->no_return ? tm->tx_total : tm->rx_total); + transfer_type = tm->no_return ? "half-duplex" : "full-duplex"; + CLI_OUTPUT ("%lld bytes (%lld mbytes, %lld gbytes) in %.2f seconds", + total_bytes, total_bytes / (1ULL << 20), + total_bytes / (1ULL << 30), delta); + CLI_OUTPUT ("%.2f bytes/second %s", ((f64) total_bytes) / (delta), + transfer_type); + CLI_OUTPUT ("%.4f gbit/second %s", + (((f64) total_bytes * 8.0) / delta / 1e9), transfer_type); + } + else + { + CLI_OUTPUT ("zero delta-t?"); + error = clib_error_return (0, "failed: zero delta-t"); + goto cleanup; + } + + if (tm->test_bytes && tm->test_failed) + error = clib_error_return (0, "failed: test bytes"); + +cleanup: + tm->run_test = 0; + for (i = 0; i < vec_len (tm->connection_index_by_thread); i++) + { + vec_reset_length (tm->connection_index_by_thread[i]); + vec_reset_length (tm->connections_this_batch_by_thread[i]); + } + + pool_free (tm->sessions); + + /* Detach the application, so we can use different fifo sizes next time */ + if (tm->test_client_attached) + { + vnet_app_detach_args_t _da, *da = &_da; + int rv; + + da->app_index = tm->app_index; + rv = vnet_application_detach (da); + if (rv) + { + error = clib_error_return (0, "failed: app detach"); + CLI_OUTPUT ("WARNING: app detach failed..."); + } + tm->test_client_attached = 0; + tm->app_index = ~0; + } + if (error) + CLI_OUTPUT ("test failed"); + return error; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (test_clients_command, static) = +{ + .path = "test sctp clients", + .short_help = "test sctp clients [nclients %d] [[m|g]bytes <bytes>] " + "[test-timeout <time>][syn-timeout <time>][no-return][fifo-size <size>]" + "[private-segment-count <count>][private-segment-size <bytes>[m|g]]" + "[preallocate-fifos][preallocate-sessions][client-batch <batch-size>]" + "[uri <sctp://ip/port>][test-bytes][no-output]", + .function = test_sctp_clients_command_fn, + .is_mp_safe = 1, +}; +/* *INDENT-ON* */ + +clib_error_t * +sctp_test_clients_main_init (vlib_main_t * vm) +{ + tclient_main_t *tm = &tclient_main; + tm->is_init = 0; + return 0; +} + +VLIB_INIT_FUNCTION (sctp_test_clients_main_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/builtin_client.h b/src/vnet/sctp/builtin_client.h new file mode 100644 index 00000000000..ecf22d8e420 --- /dev/null +++ b/src/vnet/sctp/builtin_client.h @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef __included_tclient_h__ +#define __included_tclient_h__ + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/ethernet/ethernet.h> + +#include <vppinfra/hash.h> +#include <vppinfra/error.h> +#include <svm/queue.h> +#include <svm/svm_fifo_segment.h> +#include <vnet/session/session.h> +#include <vnet/session/application_interface.h> + +typedef struct +{ + u64 bytes_to_send; + u64 bytes_sent; + u64 bytes_to_receive; + u64 bytes_received; + + svm_fifo_t *server_rx_fifo; + svm_fifo_t *server_tx_fifo; + + u64 vpp_session_handle; +} session_t; + +typedef struct +{ + /* + * Application setup parameters + */ + svm_queue_t *vl_input_queue; /**< vpe input queue */ + svm_queue_t **vpp_event_queue; + + u32 cli_node_index; /**< cli process node index */ + u32 my_client_index; /**< loopback API client handle */ + u32 app_index; /**< app index after attach */ + + /* + * Configuration params + */ + u8 *connect_uri; /**< URI for slave's connect */ + u64 bytes_to_send; /**< Bytes to send */ + u32 configured_segment_size; + u32 fifo_size; + u32 expected_connections; /**< Number of clients/connections */ + u32 connections_per_batch; /**< Connections to rx/tx at once */ + u32 private_segment_count; /**< Number of private fifo segs */ + u32 private_segment_size; /**< size of private fifo segs */ + + /* + * Test state variables + */ + session_t *sessions; /**< Session pool, shared */ + clib_spinlock_t sessions_lock; + u8 **rx_buf; /**< intermediate rx buffers */ + u8 *connect_test_data; /**< Pre-computed test data */ + u32 **connection_index_by_thread; + u32 **connections_this_batch_by_thread; /**< active connection batch */ + pthread_t client_thread_handle; + + volatile u32 ready_connections; + volatile u32 finished_connections; + volatile u64 rx_total; + volatile u64 tx_total; + volatile int run_test; /**< Signal start of test */ + + f64 test_start_time; + f64 test_end_time; + u32 prev_conns; + u32 repeats; + /* + * Flags + */ + u8 is_init; + u8 test_client_attached; + u8 no_return; + u8 test_return_packets; + int i_am_master; + int drop_packets; /**< drop all packets */ + u8 prealloc_fifos; /**< Request fifo preallocation */ + u8 no_output; + u8 test_bytes; + u8 test_failed; + + /* + * Convenience + */ + vlib_main_t *vlib_main; + vnet_main_t *vnet_main; + ethernet_main_t *ethernet_main; +} tclient_main_t; + +extern tclient_main_t tclient_main; + +vlib_node_registration_t tclient_node; + +#endif /* __included_tclient_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/builtin_server.c b/src/vnet/sctp/builtin_server.c new file mode 100644 index 00000000000..81267e78434 --- /dev/null +++ b/src/vnet/sctp/builtin_server.c @@ -0,0 +1,472 @@ +/* + * Copyright (c) 2018 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/vnet.h> +#include <vlibmemory/api.h> +#include <vnet/session/application.h> +#include <vnet/session/application_interface.h> + +typedef struct +{ + /* + * Server app parameters + */ + svm_queue_t **vpp_queue; + svm_queue_t *vl_input_queue; /**< Sever's event queue */ + + u32 app_index; /**< Server app index */ + u32 my_client_index; /**< API client handle */ + u32 node_index; /**< process node index for evnt scheduling */ + + /* + * Config params + */ + u8 no_echo; /**< Don't echo traffic */ + u32 fifo_size; /**< Fifo size */ + u32 rcv_buffer_size; /**< Rcv buffer size */ + u32 prealloc_fifos; /**< Preallocate fifos */ + u32 private_segment_count; /**< Number of private segments */ + u32 private_segment_size; /**< Size of private segments */ + char *server_uri; /**< Server URI */ + + /* + * Test state + */ + u8 **rx_buf; /**< Per-thread RX buffer */ + u64 byte_index; + u32 **rx_retries; + + vlib_main_t *vlib_main; +} builtin_server_main_t; + +builtin_server_main_t builtin_server_main; + +int +builtin_sctp_session_accept_callback (stream_session_t * s) +{ + builtin_server_main_t *bsm = &builtin_server_main; + + bsm->vpp_queue[s->thread_index] = + session_manager_get_vpp_event_queue (s->thread_index); + s->session_state = SESSION_STATE_READY; + bsm->byte_index = 0; + vec_validate (bsm->rx_retries[s->thread_index], s->session_index); + bsm->rx_retries[s->thread_index][s->session_index] = 0; + return 0; +} + +void +builtin_sctp_session_disconnect_callback (stream_session_t * s) +{ + builtin_server_main_t *bsm = &builtin_server_main; + vnet_disconnect_args_t _a, *a = &_a; + + a->handle = session_handle (s); + a->app_index = bsm->app_index; + vnet_disconnect_session (a); +} + +void +builtin_sctp_session_reset_callback (stream_session_t * s) +{ + clib_warning ("Reset session %U", format_stream_session, s, 2); + stream_session_cleanup (s); +} + + +int +builtin_sctp_session_connected_callback (u32 app_index, u32 api_context, + stream_session_t * s, u8 is_fail) +{ + clib_warning ("called..."); + return -1; +} + +int +builtin_sctp_add_segment_callback (u32 client_index, + const u8 * seg_name, u32 seg_size) +{ + clib_warning ("called..."); + return -1; +} + +int +builtin_sctp_redirect_connect_callback (u32 client_index, void *mp) +{ + clib_warning ("called..."); + return -1; +} + +void +test_bytes_sctp (builtin_server_main_t * bsm, int actual_transfer) +{ + int i; + u32 my_thread_id = vlib_get_thread_index (); + + for (i = 0; i < actual_transfer; i++) + { + if (bsm->rx_buf[my_thread_id][i] != ((bsm->byte_index + i) & 0xff)) + { + clib_warning ("at %lld expected %d got %d", bsm->byte_index + i, + (bsm->byte_index + i) & 0xff, + bsm->rx_buf[my_thread_id][i]); + } + } + bsm->byte_index += actual_transfer; +} + +/* + * If no-echo, just read the data and be done with it + */ +int +builtin_sctp_server_rx_callback_no_echo (stream_session_t * s) +{ + builtin_server_main_t *bsm = &builtin_server_main; + u32 my_thread_id = vlib_get_thread_index (); + int actual_transfer; + svm_fifo_t *rx_fifo; + + rx_fifo = s->server_rx_fifo; + + do + { + actual_transfer = + svm_fifo_dequeue_nowait (rx_fifo, bsm->rcv_buffer_size, + bsm->rx_buf[my_thread_id]); + } + while (actual_transfer > 0); + return 0; +} + +int +builtin_sctp_server_rx_callback (stream_session_t * s) +{ + u32 n_written, max_dequeue, max_enqueue, max_transfer; + int actual_transfer; + svm_fifo_t *tx_fifo, *rx_fifo; + builtin_server_main_t *bsm = &builtin_server_main; + session_fifo_event_t evt; + u32 thread_index = vlib_get_thread_index (); + + ASSERT (s->thread_index == thread_index); + + rx_fifo = s->server_rx_fifo; + tx_fifo = s->server_tx_fifo; + + ASSERT (rx_fifo->master_thread_index == thread_index); + ASSERT (tx_fifo->master_thread_index == thread_index); + + max_dequeue = svm_fifo_max_dequeue (s->server_rx_fifo); + max_enqueue = svm_fifo_max_enqueue (s->server_tx_fifo); + + if (PREDICT_FALSE (max_dequeue == 0)) + return 0; + + /* Number of bytes we're going to copy */ + max_transfer = (max_dequeue < max_enqueue) ? max_dequeue : max_enqueue; + + /* No space in tx fifo */ + if (PREDICT_FALSE (max_transfer == 0)) + { + /* XXX timeout for session that are stuck */ + + rx_event: + /* Program self-tap to retry */ + if (svm_fifo_set_event (rx_fifo)) + { + svm_queue_t *q; + evt.fifo = rx_fifo; + evt.event_type = FIFO_EVENT_BUILTIN_RX; + + q = bsm->vpp_queue[thread_index]; + if (PREDICT_FALSE (q->cursize == q->maxsize)) + clib_warning ("out of event queue space"); + else if (svm_queue_add (q, (u8 *) & evt, 0)) + clib_warning ("failed to enqueue self-tap"); + + if (bsm->rx_retries[thread_index][s->session_index] == 500000) + { + clib_warning ("session stuck: %U", format_stream_session, s, 2); + } + if (bsm->rx_retries[thread_index][s->session_index] < 500001) + bsm->rx_retries[thread_index][s->session_index]++; + } + + return 0; + } + + _vec_len (bsm->rx_buf[thread_index]) = max_transfer; + + actual_transfer = svm_fifo_dequeue_nowait (rx_fifo, max_transfer, + bsm->rx_buf[thread_index]); + ASSERT (actual_transfer == max_transfer); + +// test_bytes (bsm, actual_transfer); + + /* + * Echo back + */ + + n_written = svm_fifo_enqueue_nowait (tx_fifo, actual_transfer, + bsm->rx_buf[thread_index]); + + if (n_written != max_transfer) + clib_warning ("short trout!"); + + if (svm_fifo_set_event (tx_fifo)) + { + /* Fabricate TX event, send to vpp */ + evt.fifo = tx_fifo; + evt.event_type = FIFO_EVENT_APP_TX; + + if (svm_queue_add (bsm->vpp_queue[s->thread_index], + (u8 *) & evt, 0 /* do wait for mutex */ )) + clib_warning ("failed to enqueue tx evt"); + } + + if (PREDICT_FALSE (n_written < max_dequeue)) + goto rx_event; + + return 0; +} + +static session_cb_vft_t builtin_session_cb_vft = { + .session_accept_callback = builtin_sctp_session_accept_callback, + .session_disconnect_callback = builtin_sctp_session_disconnect_callback, + .session_connected_callback = builtin_sctp_session_connected_callback, + .add_segment_callback = builtin_sctp_add_segment_callback, + .redirect_connect_callback = builtin_sctp_redirect_connect_callback, + .builtin_server_rx_callback = builtin_sctp_server_rx_callback, + .session_reset_callback = builtin_sctp_session_reset_callback +}; + +/* Abuse VPP's input queue */ +static int +create_api_loopback (vlib_main_t * vm) +{ + builtin_server_main_t *bsm = &builtin_server_main; + api_main_t *am = &api_main; + vl_shmem_hdr_t *shmem_hdr; + + shmem_hdr = am->shmem_hdr; + bsm->vl_input_queue = shmem_hdr->vl_input_queue; + bsm->my_client_index = + vl_api_memclnt_create_internal ("sctp_test_server", bsm->vl_input_queue); + return 0; +} + +static int +server_attach (u8 * appns_id, u64 appns_flags, u64 appns_secret) +{ + builtin_server_main_t *bsm = &builtin_server_main; + u8 segment_name[128]; + u64 options[APP_OPTIONS_N_OPTIONS]; + vnet_app_attach_args_t _a, *a = &_a; + u32 segment_size = 512 << 20; + + memset (a, 0, sizeof (*a)); + memset (options, 0, sizeof (options)); + + if (bsm->no_echo) + builtin_session_cb_vft.builtin_server_rx_callback = + builtin_sctp_server_rx_callback_no_echo; + else + builtin_session_cb_vft.builtin_server_rx_callback = + builtin_sctp_server_rx_callback; + + if (bsm->private_segment_size) + segment_size = bsm->private_segment_size; + + a->api_client_index = bsm->my_client_index; + a->session_cb_vft = &builtin_session_cb_vft; + a->options = options; + a->options[APP_OPTIONS_SEGMENT_SIZE] = segment_size; + a->options[APP_OPTIONS_RX_FIFO_SIZE] = bsm->fifo_size; + a->options[APP_OPTIONS_TX_FIFO_SIZE] = bsm->fifo_size; + a->options[APP_OPTIONS_PRIVATE_SEGMENT_COUNT] = bsm->private_segment_count; + a->options[APP_OPTIONS_PREALLOC_FIFO_PAIRS] = + bsm->prealloc_fifos ? bsm->prealloc_fifos : 1; + + a->options[APP_OPTIONS_FLAGS] = APP_OPTIONS_FLAGS_IS_BUILTIN; + if (appns_id) + { + a->namespace_id = appns_id; + a->options[APP_OPTIONS_FLAGS] |= appns_flags; + a->options[APP_OPTIONS_NAMESPACE_SECRET] = appns_secret; + } + a->segment_name = segment_name; + a->segment_name_length = ARRAY_LEN (segment_name); + + if (vnet_application_attach (a)) + { + clib_warning ("failed to attach server"); + return -1; + } + bsm->app_index = a->app_index; + return 0; +} + +static int +server_listen () +{ + builtin_server_main_t *bsm = &builtin_server_main; + vnet_bind_args_t _a, *a = &_a; + memset (a, 0, sizeof (*a)); + a->app_index = bsm->app_index; + a->uri = bsm->server_uri; + return vnet_bind_uri (a); +} + +static int +server_create (vlib_main_t * vm, u8 * appns_id, u64 appns_flags, + u64 appns_secret) +{ + builtin_server_main_t *bsm = &builtin_server_main; + vlib_thread_main_t *vtm = vlib_get_thread_main (); + u32 num_threads; + int i; + + if (bsm->my_client_index == (u32) ~ 0) + { + if (create_api_loopback (vm)) + { + clib_warning ("failed to create api loopback"); + return -1; + } + } + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (builtin_server_main.vpp_queue, num_threads - 1); + vec_validate (bsm->rx_buf, num_threads - 1); + vec_validate (bsm->rx_retries, num_threads - 1); + + for (i = 0; i < num_threads; i++) + vec_validate (bsm->rx_buf[i], bsm->rcv_buffer_size); + + if (server_attach (appns_id, appns_flags, appns_secret)) + { + clib_warning ("failed to attach server"); + return -1; + } + if (server_listen ()) + { + clib_warning ("failed to start listening"); + return -1; + } + return 0; +} + +static clib_error_t * +server_create_command_fn (vlib_main_t * vm, unformat_input_t * input, + vlib_cli_command_t * cmd) +{ + builtin_server_main_t *bsm = &builtin_server_main; + u8 server_uri_set = 0, *appns_id = 0; + u64 tmp, appns_flags = 0, appns_secret = 0; + int rv; + + bsm->no_echo = 0; + bsm->fifo_size = 64 << 10; + bsm->rcv_buffer_size = 128 << 10; + bsm->prealloc_fifos = 0; + bsm->private_segment_count = 0; + bsm->private_segment_size = 0; + vec_free (bsm->server_uri); + + while (unformat_check_input (input) != UNFORMAT_END_OF_INPUT) + { + if (unformat (input, "no-echo")) + bsm->no_echo = 1; + else if (unformat (input, "fifo-size %d", &bsm->fifo_size)) + bsm->fifo_size <<= 10; + else if (unformat (input, "rcv-buf-size %d", &bsm->rcv_buffer_size)) + ; + else if (unformat (input, "prealloc-fifos %d", &bsm->prealloc_fifos)) + ; + else if (unformat (input, "private-segment-count %d", + &bsm->private_segment_count)) + ; + else if (unformat (input, "private-segment-size %U", + unformat_memory_size, &tmp)) + { + if (tmp >= 0x100000000ULL) + return clib_error_return + (0, "private segment size %lld (%llu) too large", tmp, tmp); + bsm->private_segment_size = tmp; + } + else if (unformat (input, "uri %s", &bsm->server_uri)) + server_uri_set = 1; + else if (unformat (input, "appns %_%v%_", &appns_id)) + ; + else if (unformat (input, "all-scope")) + appns_flags |= (APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE + | APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE); + else if (unformat (input, "local-scope")) + appns_flags |= APP_OPTIONS_FLAGS_USE_LOCAL_SCOPE; + else if (unformat (input, "global-scope")) + appns_flags |= APP_OPTIONS_FLAGS_USE_GLOBAL_SCOPE; + else if (unformat (input, "secret %lu", &appns_secret)) + ; + else + return clib_error_return (0, "unknown input `%U'", + format_unformat_error, input); + } + + vnet_session_enable_disable (vm, 1 /* turn on SCTP, etc. */ ); + + if (!server_uri_set) + bsm->server_uri = (char *) format (0, "sctp://0.0.0.0/1234%c", 0); + + rv = server_create (vm, appns_id, appns_flags, appns_secret); + vec_free (appns_id); + switch (rv) + { + case 0: + break; + default: + return clib_error_return (0, "server_create returned %d", rv); + } + + return 0; +} + +/* *INDENT-OFF* */ +VLIB_CLI_COMMAND (server_create_command, static) = +{ + .path = "test sctp server", + .short_help = "test sctp server [no echo][fifo-size <mbytes>] " + "[rcv-buf-size <bytes>][prealloc-fifos <count>]" + "[private-segment-count <count>][private-segment-size <bytes[m|g]>]" + "[uri <sctp://ip/port>]", + .function = server_create_command_fn, +}; +/* *INDENT-ON* */ + +clib_error_t * +builtin_sctp_server_main_init (vlib_main_t * vm) +{ + builtin_server_main_t *bsm = &builtin_server_main; + bsm->my_client_index = ~0; + return 0; +} + +VLIB_INIT_FUNCTION (builtin_sctp_server_main_init); + +/* +* fd.io coding-style-patch-verification: ON +* +* Local Variables: +* eval: (c-set-style "gnu") +* End: +*/ diff --git a/src/vnet/sctp/sctp.c b/src/vnet/sctp/sctp.c new file mode 100644 index 00000000000..2e37a91dbac --- /dev/null +++ b/src/vnet/sctp/sctp.c @@ -0,0 +1,848 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/sctp/sctp.h> +#include <vnet/sctp/sctp_debug.h> + +sctp_main_t sctp_main; + +static u32 +sctp_connection_bind (u32 session_index, transport_endpoint_t * tep) +{ + sctp_main_t *tm = &sctp_main; + sctp_connection_t *listener; + void *iface_ip; + + pool_get (tm->listener_pool, listener); + memset (listener, 0, sizeof (*listener)); + + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = listener; + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = + listener - tm->listener_pool; + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_port = tep->port; + + /* If we are provided a sw_if_index, bind using one of its IPs */ + if (ip_is_zero (&tep->ip, 1) && tep->sw_if_index != ENDPOINT_INVALID_INDEX) + { + if ((iface_ip = ip_interface_get_first_ip (tep->sw_if_index, + tep->is_ip4))) + ip_set (&tep->ip, iface_ip, tep->is_ip4); + } + ip_copy (&listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.lcl_ip, + &tep->ip, tep->is_ip4); + + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.is_ip4 = tep->is_ip4; + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto = + TRANSPORT_PROTO_SCTP; + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_s_index = session_index; + listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.fib_index = + tep->fib_index; + listener->state = SCTP_STATE_CLOSED; + + sctp_connection_timers_init (listener); + + return listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index; +} + +u32 +sctp_session_bind (u32 session_index, transport_endpoint_t * tep) +{ + return sctp_connection_bind (session_index, tep); +} + +static void +sctp_connection_unbind (u32 listener_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc; + + tc = pool_elt_at_index (tm->listener_pool, listener_index); + + /* Poison the entry */ + if (CLIB_DEBUG > 0) + memset (tc, 0xFA, sizeof (*tc)); + + pool_put_index (tm->listener_pool, listener_index); +} + +u32 +sctp_session_unbind (u32 listener_index) +{ + sctp_connection_unbind (listener_index); + return 0; +} + +void +sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add) +{ + sctp_main_t *tm = &sctp_main; + if (is_ip4) + tm->punt_unknown4 = is_add; + else + tm->punt_unknown6 = is_add; +} + +static int +sctp_alloc_custom_local_endpoint (sctp_main_t * tm, ip46_address_t * lcl_addr, + u16 * lcl_port, u8 is_ip4) +{ + int index, port; + if (is_ip4) + { + index = tm->last_v4_address_rotor++; + if (tm->last_v4_address_rotor >= vec_len (tm->ip4_src_addresses)) + tm->last_v4_address_rotor = 0; + lcl_addr->ip4.as_u32 = tm->ip4_src_addresses[index].as_u32; + } + else + { + index = tm->last_v6_address_rotor++; + if (tm->last_v6_address_rotor >= vec_len (tm->ip6_src_addresses)) + tm->last_v6_address_rotor = 0; + clib_memcpy (&lcl_addr->ip6, &tm->ip6_src_addresses[index], + sizeof (ip6_address_t)); + } + port = transport_alloc_local_port (TRANSPORT_PROTO_SCTP, lcl_addr); + if (port < 1) + { + clib_warning ("Failed to allocate src port"); + return -1; + } + *lcl_port = port; + return 0; +} + +/** + * Initialize all connection timers as invalid + */ +void +sctp_connection_timers_init (sctp_connection_t * tc) +{ + int i, j; + + /* Set all to invalid */ + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + for (j = 0; j < SCTP_N_TIMERS; j++) + { + tc->sub_conn[i].timers[j] = SCTP_TIMER_HANDLE_INVALID; + } + + tc->rto = SCTP_RTO_INIT; +} + +/** + * Stop all connection timers + */ +void +sctp_connection_timers_reset (sctp_connection_t * tc) +{ + int i, j; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + for (j = 0; j < SCTP_N_TIMERS; j++) + sctp_timer_reset (tc, i, j); + } +} + +const char *sctp_fsm_states[] = { +#define _(sym, str) str, + foreach_sctp_fsm_state +#undef _ +}; + +u8 * +format_sctp_state (u8 * s, va_list * args) +{ + u32 state = va_arg (*args, u32); + + if (state < SCTP_N_STATES) + s = format (s, "%s", sctp_fsm_states[state]); + else + s = format (s, "UNKNOWN (%d (0x%x))", state, state); + return s; +} + +u8 * +format_sctp_connection_id (u8 * s, va_list * args) +{ + /* + sctp_connection_t *tc = va_arg (*args, sctp_connection_t *); + if (!tc) + return s; + if (tc->c_is_ip4) + { + s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T", + format_ip4_address, &tc->c_lcl_ip4, + clib_net_to_host_u16 (tc->c_lcl_port), format_ip4_address, + &tc->c_rmt_ip4, clib_net_to_host_u16 (tc->c_rmt_port)); + } + else + { + s = format (s, "[#%d][%s] %U:%d->%U:%d", tc->c_thread_index, "T", + format_ip6_address, &tc->c_lcl_ip6, + clib_net_to_host_u16 (tc->c_lcl_port), format_ip6_address, + &tc->c_rmt_ip6, clib_net_to_host_u16 (tc->c_rmt_port)); + } + */ + return s; +} + +u8 * +format_sctp_connection (u8 * s, va_list * args) +{ + sctp_connection_t *tc = va_arg (*args, sctp_connection_t *); + u32 verbose = va_arg (*args, u32); + + if (!tc) + return s; + s = format (s, "%-50U", format_sctp_connection_id, tc); + if (verbose) + { + s = format (s, "%-15U", format_sctp_state, tc->state); + } + + return s; +} + +/** + * Initialize connection send variables. + */ +void +sctp_init_snd_vars (sctp_connection_t * tc) +{ + u32 time_now; + + /* + * We use the time to randomize iss and for setting up the initial + * timestamp. Make sure it's updated otherwise syn and ack in the + * handshake may make it look as if time has flown in the opposite + * direction for us. + */ + sctp_set_time_now (vlib_get_thread_index ()); + time_now = sctp_time_now (); + + tc->iss = random_u32 (&time_now); + tc->snd_una = tc->iss; + tc->snd_nxt = tc->iss + 1; + tc->snd_una_max = tc->snd_nxt; +} + +/** + * Update max segment size we're able to process. + * + * The value is constrained by our interface's MTU and IP options. It is + * also what we advertise to our peer. + */ +void +sctp_update_rcv_mss (sctp_connection_t * tc) +{ + /* TODO find our iface MTU */ + tc->a_rwnd = DEFAULT_A_RWND - sizeof (sctp_full_hdr_t); + tc->rcv_opts.a_rwnd = tc->a_rwnd; + tc->rcv_a_rwnd = tc->a_rwnd; /* This will be updated by our congestion algos */ +} + +void +sctp_init_mss (sctp_connection_t * tc) +{ + SCTP_DBG ("CONN_INDEX = %u", + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); + + u16 default_a_rwnd = 536; + sctp_update_rcv_mss (tc); + + /* TODO cache mss and consider PMTU discovery */ + tc->snd_a_rwnd = clib_min (tc->rcv_opts.a_rwnd, tc->a_rwnd); + + if (tc->snd_a_rwnd < sizeof (sctp_full_hdr_t)) + { + SCTP_ADV_DBG ("tc->snd_a_rwnd < sizeof(sctp_full_hdr_t)"); + /* Assume that at least the min default mss works */ + tc->snd_a_rwnd = default_a_rwnd; + tc->rcv_opts.a_rwnd = default_a_rwnd; + } + + ASSERT (tc->snd_a_rwnd > sizeof (sctp_full_hdr_t)); +} + +/** Initialize sctp connection variables + * + * Should be called after having received a msg from the peer, i.e., a SYN or + * a SYNACK, such that connection options have already been exchanged. */ +void +sctp_connection_init_vars (sctp_connection_t * tc) +{ + sctp_init_mss (tc); + sctp_init_snd_vars (tc); +} + +always_inline sctp_connection_t * +sctp_sub_connection_add (u8 thread_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc = tm->connections[thread_index]; + + tc->sub_conn[tc->next_avail_sub_conn].connection.c_index = + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index; + tc->sub_conn[tc->next_avail_sub_conn].connection.thread_index = + thread_index; + tc->sub_conn[tc->next_avail_sub_conn].parent = tc; + + tc->next_avail_sub_conn += 1; + + return tc; +} + +void +sctp_sub_connection_add_ip4 (u8 thread_index, + sctp_ipv4_addr_param_t * ipv4_addr) +{ + sctp_connection_t *tc = sctp_sub_connection_add (thread_index); + + clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip4, + &ipv4_addr->address, sizeof (ipv4_addr->address)); +} + +void +sctp_sub_connection_add_ip6 (u8 thread_index, + sctp_ipv6_addr_param_t * ipv6_addr) +{ + sctp_connection_t *tc = sctp_sub_connection_add (thread_index); + + clib_memcpy (&tc->sub_conn[tc->next_avail_sub_conn].connection.lcl_ip.ip6, + &ipv6_addr->address, sizeof (ipv6_addr->address)); +} + +sctp_connection_t * +sctp_connection_new (u8 thread_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc; + + pool_get (tm->connections[thread_index], tc); + memset (tc, 0, sizeof (*tc)); + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = + tc - tm->connections[thread_index]; + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index = thread_index; + tc->local_tag = 0; + tc->next_avail_sub_conn = 1; + + return tc; +} + +sctp_connection_t * +sctp_half_open_connection_new (u8 thread_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc = 0; + ASSERT (vlib_get_thread_index () == 0); + pool_get (tm->half_open_connections, tc); + memset (tc, 0, sizeof (*tc)); + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index = + tc - tm->half_open_connections; + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; + return tc; +} + +static inline int +sctp_connection_open (transport_endpoint_t * rmt) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc; + ip46_address_t lcl_addr; + u16 lcl_port; + uword thread_id; + int rv; + + u8 idx = sctp_pick_conn_idx_on_state (SCTP_STATE_CLOSED); + + /* + * Allocate local endpoint + */ + if ((rmt->is_ip4 && vec_len (tm->ip4_src_addresses)) + || (!rmt->is_ip4 && vec_len (tm->ip6_src_addresses))) + rv = sctp_alloc_custom_local_endpoint (tm, &lcl_addr, &lcl_port, + rmt->is_ip4); + else + rv = transport_alloc_local_endpoint (TRANSPORT_PROTO_SCTP, + rmt, &lcl_addr, &lcl_port); + + if (rv) + return -1; + + /* + * Create connection and send INIT CHUNK + */ + thread_id = vlib_get_thread_index (); + ASSERT (thread_id == 0); + + clib_spinlock_lock_if_init (&tm->half_open_lock); + tc = sctp_half_open_connection_new (thread_id); + + transport_connection_t *t_conn = &tc->sub_conn[idx].connection; + ip_copy (&t_conn->rmt_ip, &rmt->ip, rmt->is_ip4); + ip_copy (&t_conn->lcl_ip, &lcl_addr, rmt->is_ip4); + tc->sub_conn[idx].parent = tc; + t_conn->rmt_port = rmt->port; + t_conn->lcl_port = clib_host_to_net_u16 (lcl_port); + t_conn->is_ip4 = rmt->is_ip4; + t_conn->proto = TRANSPORT_PROTO_SCTP; + t_conn->fib_index = rmt->fib_index; + + sctp_connection_timers_init (tc); + /* The other connection vars will be initialized after INIT_ACK chunk received */ + sctp_init_snd_vars (tc); + + sctp_send_init (tc); + + clib_spinlock_unlock_if_init (&tm->half_open_lock); + + return tc->sub_conn[idx].connection.c_index; +} + +/** + * Cleans up connection state. + * + * No notifications. + */ +void +sctp_connection_cleanup (sctp_connection_t * tc) +{ + sctp_main_t *tm = &sctp_main; + u8 i; + + /* Cleanup local endpoint if this was an active connect */ + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + transport_endpoint_cleanup (TRANSPORT_PROTO_SCTP, + &tc->sub_conn[i].connection.lcl_ip, + tc->sub_conn[i].connection.lcl_port); + + /* Check if connection is not yet fully established */ + if (tc->state == SCTP_STATE_COOKIE_WAIT) + { + + } + else + { + int thread_index = + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.thread_index; + + /* Make sure all timers are cleared */ + sctp_connection_timers_reset (tc); + + /* Poison the entry */ + if (CLIB_DEBUG > 0) + memset (tc, 0xFA, sizeof (*tc)); + pool_put (tm->connections[thread_index], tc); + } +} + +int +sctp_session_open (transport_endpoint_t * tep) +{ + return sctp_connection_open (tep); +} + +u16 +sctp_check_outstanding_data_chunks (sctp_connection_t * tc) +{ + return 0; /* Indicates no more data to be read/sent */ +} + +void +sctp_connection_close (sctp_connection_t * tc) +{ + SCTP_DBG ("Closing connection %u...", + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.c_index); + + tc->state = SCTP_STATE_SHUTDOWN_PENDING; + + sctp_send_shutdown (tc); +} + +void +sctp_session_close (u32 conn_index, u32 thread_index) +{ + ASSERT (thread_index == 0); + + sctp_connection_t *tc; + tc = sctp_connection_get (conn_index, thread_index); + sctp_connection_close (tc); +} + +void +sctp_session_cleanup (u32 conn_index, u32 thread_index) +{ + sctp_connection_t *tc; + tc = sctp_connection_get (conn_index, thread_index); + sctp_connection_timers_reset (tc); + + /* Wait for the session tx events to clear */ + tc->state = SCTP_STATE_CLOSED; +} + +/** + * Update snd_mss to reflect the effective segment size that we can send + */ +void +sctp_update_snd_mss (sctp_connection_t * tc) +{ + /* The overhead for the sctp_header_t and sctp_chunks_common_hdr_t + * (the sum equals to sctp_full_hdr_t) is already taken into account + * for the tc->a_rwnd computation. + * So let's not account it again here. + */ + tc->snd_hdr_length = + sizeof (sctp_payload_data_chunk_t) - sizeof (sctp_full_hdr_t); + tc->snd_a_rwnd = + clib_min (tc->a_rwnd, tc->rcv_opts.a_rwnd) - tc->snd_hdr_length; + + SCTP_DBG ("tc->snd_a_rwnd = %u, tc->snd_hdr_length = %u ", + tc->snd_a_rwnd, tc->snd_hdr_length); + + ASSERT (tc->snd_a_rwnd > 0); +} + +u16 +sctp_session_send_mss (transport_connection_t * trans_conn) +{ + SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); + + sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); + + if (trans_conn == NULL) + { + SCTP_DBG ("trans_conn == NULL"); + return 0; + } + + if (tc == NULL) + { + SCTP_DBG ("tc == NULL"); + return 0; + } + /* Ensure snd_mss does accurately reflect the amount of data we can push + * in a segment. This also makes sure that options are updated according to + * the current state of the connection. */ + sctp_update_snd_mss (tc); + + return tc->snd_a_rwnd; +} + +u16 +sctp_snd_space (sctp_connection_t * sctp_conn) +{ + /* TODO: This requires a real implementation */ + if (sctp_conn == NULL) + { + SCTP_DBG ("sctp_conn == NULL"); + return 0; + } + + if (sctp_conn->state != SCTP_STATE_ESTABLISHED) + { + SCTP_DBG_STATE_MACHINE + ("Trying to send DATA while not in SCTP_STATE_ESTABLISHED"); + return 0; + } + + return sctp_conn->snd_a_rwnd; +} + +u32 +sctp_session_send_space (transport_connection_t * trans_conn) +{ + SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); + + sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); + + return sctp_snd_space (tc); +} + +u32 +sctp_session_tx_fifo_offset (transport_connection_t * trans_conn) +{ + SCTP_DBG ("CONN_INDEX: %u", trans_conn->c_index); + + sctp_connection_t *tc = sctp_get_connection_from_transport (trans_conn); + + if (tc == NULL) + { + SCTP_DBG ("tc == NULL"); + return 0; + } + + /* This still works if fast retransmit is on */ + return (tc->snd_nxt - tc->snd_una); +} + +transport_connection_t * +sctp_session_get_transport (u32 conn_index, u32 thread_index) +{ + sctp_connection_t *tc = sctp_connection_get (conn_index, thread_index); + return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; +} + +transport_connection_t * +sctp_session_get_listener (u32 listener_index) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_connection_t *tc; + tc = pool_elt_at_index (tm->listener_pool, listener_index); + return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; +} + +u8 * +format_sctp_session (u8 * s, va_list * args) +{ + return NULL; +} + +u8 * +format_sctp_listener_session (u8 * s, va_list * args) +{ + return NULL; +} + +void +sctp_timer_init_handler (u32 conn_index) +{ + sctp_connection_t *tc; + + tc = sctp_connection_get (conn_index, vlib_get_thread_index ()); + /* note: the connection may have already disappeared */ + if (PREDICT_FALSE (tc == 0)) + return; + ASSERT (tc->state == SCTP_STATE_COOKIE_ECHOED); + /* Start cleanup. App wasn't notified yet so use delete notify as + * opposed to delete to cleanup session layer state. */ + stream_session_delete_notify (&tc-> + sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection); + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].timers[SCTP_TIMER_T1_INIT] = + SCTP_TIMER_HANDLE_INVALID; + + sctp_connection_cleanup (tc); +} + +/* *INDENT OFF* */ +static timer_expiration_handler *sctp_timer_expiration_handlers[SCTP_N_TIMERS] + = { + sctp_timer_init_handler +}; + +/* *INDENT ON* */ + +static void +sctp_expired_timers_dispatch (u32 * expired_timers) +{ + int i; + u32 connection_index, timer_id; + + for (i = 0; i < vec_len (expired_timers); i++) + { + /* Get session index and timer id */ + connection_index = expired_timers[i] & 0x0FFFFFFF; + timer_id = expired_timers[i] >> 28; + + /* Handle expiration */ + (*sctp_timer_expiration_handlers[timer_id]) (connection_index); + } +} + +void +sctp_initialize_timer_wheels (sctp_main_t * tm) +{ + tw_timer_wheel_16t_2w_512sl_t *tw; + /* *INDENT-OFF* */ + foreach_vlib_main (({ + tw = &tm->timer_wheels[ii]; + tw_timer_wheel_init_16t_2w_512sl (tw, sctp_expired_timers_dispatch, + 100e-3 /* timer period 100ms */ , ~0); + tw->last_run_time = vlib_time_now (this_vlib_main); + })); + /* *INDENT-ON* */ +} + +clib_error_t * +sctp_main_enable (vlib_main_t * vm) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_thread_main_t *vtm = vlib_get_thread_main (); + clib_error_t *error = 0; + u32 num_threads; + int thread; + sctp_connection_t *tc __attribute__ ((unused)); + u32 preallocated_connections_per_thread; + + if ((error = vlib_call_init_function (vm, ip_main_init))) + return error; + if ((error = vlib_call_init_function (vm, ip4_lookup_init))) + return error; + if ((error = vlib_call_init_function (vm, ip6_lookup_init))) + return error; + + /* + * Registrations + */ + + ip4_register_protocol (IP_PROTOCOL_SCTP, sctp4_input_node.index); + ip6_register_protocol (IP_PROTOCOL_SCTP, sctp6_input_node.index); + + /* + * Initialize data structures + */ + + num_threads = 1 /* main thread */ + vtm->n_threads; + vec_validate (tm->connections, num_threads - 1); + + /* + * Preallocate connections. Assume that thread 0 won't + * use preallocated threads when running multi-core + */ + if (num_threads == 1) + { + thread = 0; + preallocated_connections_per_thread = tm->preallocated_connections; + } + else + { + thread = 1; + preallocated_connections_per_thread = + tm->preallocated_connections / (num_threads - 1); + } + for (; thread < num_threads; thread++) + { + if (preallocated_connections_per_thread) + pool_init_fixed (tm->connections[thread], + preallocated_connections_per_thread); + } + + /* Initialize per worker thread tx buffers (used for control messages) */ + vec_validate (tm->tx_buffers, num_threads - 1); + + /* Initialize timer wheels */ + vec_validate (tm->timer_wheels, num_threads - 1); + sctp_initialize_timer_wheels (tm); + + /* Initialize clocks per tick for SCTP timestamp. Used to compute + * monotonically increasing timestamps. */ + tm->tstamp_ticks_per_clock = vm->clib_time.seconds_per_clock + / SCTP_TSTAMP_RESOLUTION; + + if (num_threads > 1) + { + } + + vec_validate (tm->tx_frames[0], num_threads - 1); + vec_validate (tm->tx_frames[1], num_threads - 1); + vec_validate (tm->ip_lookup_tx_frames[0], num_threads - 1); + vec_validate (tm->ip_lookup_tx_frames[1], num_threads - 1); + + tm->bytes_per_buffer = vlib_buffer_free_list_buffer_size + (vm, VLIB_BUFFER_DEFAULT_FREE_LIST_INDEX); + + vec_validate (tm->time_now, num_threads - 1); + return error; +} + +clib_error_t * +sctp_enable_disable (vlib_main_t * vm, u8 is_en) +{ + if (is_en) + { + if (sctp_main.is_enabled) + return 0; + + return sctp_main_enable (vm); + } + else + { + sctp_main.is_enabled = 0; + } + + return 0; +} + +transport_connection_t * +sctp_half_open_session_get_transport (u32 conn_index) +{ + sctp_connection_t *tc = sctp_half_open_connection_get (conn_index); + return &tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection; +} + +u8 * +format_sctp_half_open (u8 * s, va_list * args) +{ + u32 tci = va_arg (*args, u32); + sctp_connection_t *tc = sctp_half_open_connection_get (tci); + return format (s, "%U", format_sctp_connection_id, tc); +} + +/* *INDENT OFF* */ +const static transport_proto_vft_t sctp_proto = { + .enable = sctp_enable_disable, + .bind = sctp_session_bind, + .unbind = sctp_session_unbind, + .open = sctp_session_open, + .close = sctp_session_close, + .cleanup = sctp_session_cleanup, + .push_header = sctp_push_header, + .send_mss = sctp_session_send_mss, + .send_space = sctp_session_send_space, + .tx_fifo_offset = NULL, //sctp_session_tx_fifo_offset, + .get_connection = sctp_session_get_transport, + .get_listener = sctp_session_get_listener, + .get_half_open = sctp_half_open_session_get_transport, + .format_connection = format_sctp_session, + .format_listener = format_sctp_listener_session, + .format_half_open = format_sctp_half_open, +}; + +/* *INDENT ON* */ + +clib_error_t * +sctp_init (vlib_main_t * vm) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + ip_main_t *im = &ip_main; + ip_protocol_info_t *pi; + /* Session layer, and by implication SCTP, are disabled by default */ + tm->is_enabled = 0; + + /* Register with IP for header parsing */ + pi = ip_get_protocol_info (im, IP_PROTOCOL_SCTP); + if (pi == 0) + return clib_error_return (0, "SCTP protocol info AWOL"); + pi->format_header = format_sctp_header; + pi->unformat_pg_edit = unformat_pg_sctp_header; + + /* Register as transport with session layer */ + transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto, + FIB_PROTOCOL_IP4, sctp4_output_node.index); + transport_register_protocol (TRANSPORT_PROTO_SCTP, &sctp_proto, + FIB_PROTOCOL_IP6, sctp6_output_node.index); + + return 0; +} + +VLIB_INIT_FUNCTION (sctp_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp.h b/src/vnet/sctp/sctp.h new file mode 100644 index 00000000000..7c4df309906 --- /dev/null +++ b/src/vnet/sctp/sctp.h @@ -0,0 +1,645 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sctp_h +#define included_vnet_sctp_h + +#include <vnet/vnet.h> +#include <vnet/ip/ip.h> +#include <vnet/sctp/sctp_timer.h> +#include <vnet/sctp/sctp_packet.h> +#include <vnet/session/transport.h> +#include <vnet/session/session.h> + +/* SCTP timers */ +#define foreach_sctp_timer \ + _(T1_INIT, "T1_INIT") \ + _(T1_COOKIE, "T1_COOKIE") \ + _(T2_SHUTDOWN, "T2_SHUTDOWN") \ + _(T3_RXTX, "T3_RXTX") \ + _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD") + +typedef enum _sctp_timers +{ +#define _(sym, str) SCTP_TIMER_##sym, + foreach_sctp_timer +#undef _ + SCTP_N_TIMERS +} sctp_timers_e; + +#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0) + +typedef enum _sctp_error +{ +#define sctp_error(n,s) SCTP_ERROR_##n, +#include <vnet/sctp/sctp_error.def> +#undef sctp_error + SCTP_N_ERROR, +} sctp_error_t; + +#define NO_FLAG 0 + +#define IS_T_BIT_SET(var) ((var) & (1)) +#define IS_E_BIT_SET(var) ((var) & (1)) +#define IS_B_BIT_SET(var) ((var) & (1<<1)) +#define IS_U_BIT_SET(var) ((var) & (1<<2)) + +#define MAX_SCTP_CONNECTIONS 32 +#define MAIN_SCTP_SUB_CONN_IDX 0 + +#if (VLIB_BUFFER_TRACE_TRAJECTORY) +#define sctp_trajectory_add_start(b, start) \ +{ \ + (*vlib_buffer_trace_trajectory_cb) (b, start); \ +} +#else +#define sctp_trajectory_add_start(b, start) +#endif + +typedef struct _sctp_sub_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + void *parent; /**< Link to the parent-super connection */ + u32 timers[SCTP_N_TIMERS]; /**< Timer handles into timer wheel */ + +} sctp_sub_connection_t; + +typedef struct +{ + u32 a_rwnd; /**< Maximum segment size advertised */ + +} sctp_options_t; + +typedef struct _sctp_connection +{ + sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ + + u8 state; /**< SCTP state as per sctp_state_t */ + u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */ + u32 local_tag; /**< INIT_TAG generated locally */ + u32 remote_tag; /**< INIT_TAG generated by the remote peer */ + u16 life_span_inc; + + /** Send sequence variables RFC4960 */ + u32 snd_una; /**< oldest unacknowledged sequence number */ + u32 snd_una_max; /**< newest unacknowledged sequence number + 1*/ + u32 snd_wl1; /**< seq number used for last snd.wnd update */ + u32 snd_wl2; /**< ack number used for last snd.wnd update */ + u32 snd_nxt; /**< next seq number to be sent */ + + /** Receive sequence variables RFC4960 */ + u32 rcv_nxt; /**< next sequence number expected */ + u32 rcv_las; /**< rcv_nxt at last ack sent/rcv_wnd update */ + u32 iss; /**< initial sent sequence */ + u32 irs; /**< initial remote sequence */ + + /* RTT and RTO */ + u32 rto; /**< Retransmission timeout */ + u32 rto_boff; /**< Index for RTO backoff */ + u32 srtt; /**< Smoothed RTT */ + u32 rttvar; /**< Smoothed mean RTT difference. Approximates variance */ + u32 rtt_ts; /**< Timestamp for tracked ACK */ + u32 rtt_seq; /**< Sequence number for tracked ACK */ + + u32 a_rwnd; /** Constrained by medium / IP / etc. */ + u32 rcv_a_rwnd; /**< LOCAL max seg size that includes options. To be updated by congestion algos, etc. */ + u32 snd_a_rwnd; /**< REMOTE max seg size that includes options. To be updated if peer pushes back on window, etc.*/ + sctp_options_t rcv_opts; + sctp_options_t snd_opts; + u32 snd_hdr_length; /**< BASE HEADER LENGTH for the DATA chunk when sending */ + + u8 next_avail_sub_conn; /**< Represent the index of the next free slot in sub_conn */ +} sctp_connection_t; + +typedef void (timer_expiration_handler) (u32 index); + +sctp_connection_t *sctp_connection_new (u8 thread_index); +void sctp_sub_connection_add_ip4 (u8 thread_index, + sctp_ipv4_addr_param_t * ipv4_addr); +void sctp_sub_connection_add_ip6 (u8 thread_index, + sctp_ipv6_addr_param_t * ipv6_addr); +void sctp_connection_close (sctp_connection_t * tc); +void sctp_connection_cleanup (sctp_connection_t * tc); +void sctp_connection_del (sctp_connection_t * tc); + +u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); +void sctp_send_init (sctp_connection_t * tc); +void sctp_send_shutdown (sctp_connection_t * tc); +void sctp_send_shutdown_ack (sctp_connection_t * tc); +void sctp_send_shutdown_complete (sctp_connection_t * tc); +void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, + u8 is_ip4); +void sctp_flush_frames_to_output (u8 thread_index); +void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +format_function_t format_sctp_state; + +u8 *format_sctp_connection_id (u8 * s, va_list * args); +u8 *format_sctp_connection (u8 * s, va_list * args); +u8 *format_sctp_scoreboard (u8 * s, va_list * args); +u8 *format_sctp_header (u8 * s, va_list * args); +u8 *format_sctp_tx_trace (u8 * s, va_list * args); + +clib_error_t *sctp_init (vlib_main_t * vm); +void sctp_connection_timers_init (sctp_connection_t * tc); +void sctp_connection_timers_reset (sctp_connection_t * tc); +void sctp_init_snd_vars (sctp_connection_t * tc); +void sctp_connection_init_vars (sctp_connection_t * tc); + +void sctp_prepare_initack_chunk (sctp_connection_t * ts, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, + vlib_buffer_t * b, + sctp_state_cookie_param_t * sc); +void sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, + vlib_buffer_t * b); +void sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b); + +u16 sctp_check_outstanding_data_chunks (sctp_connection_t * tc); + +#define SCTP_TICK 0.001 /**< SCTP tick period (s) */ +#define STHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */ +#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */ +#define SCTP_PAWS_IDLE 24 * 24 * 60 * 60 * THZ /**< 24 days */ +#define SCTP_FIB_RECHECK_PERIOD 1 * THZ /**< Recheck every 1s */ +#define SCTP_MAX_OPTION_SPACE 40 + +#define SCTP_DUPACK_THRESHOLD 3 +#define SCTP_MAX_RX_FIFO_SIZE 4 << 20 +#define SCTP_MIN_RX_FIFO_SIZE 4 << 10 +#define SCTP_IW_N_SEGMENTS 10 +#define SCTP_ALWAYS_ACK 1 /**< On/off delayed acks */ +#define SCTP_USE_SACKS 1 /**< Disable only for testing */ + +#define IP_PROTOCOL_SCTP 132 + +/** SSCTP FSM state definitions as per RFC4960. */ +#define foreach_sctp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(COOKIE_WAIT, "COOKIE_WAIT") \ + _(COOKIE_ECHOED, "COOKIE_ECHOED") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \ + _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \ + _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ + _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") + +typedef enum _sctp_state +{ +#define _(sym, str) SCTP_STATE_##sym, + foreach_sctp_fsm_state +#undef _ + SCTP_N_STATES +} sctp_state_t; + +always_inline char * +sctp_state_to_string (u8 state) +{ + switch (state) + { + case SCTP_STATE_CLOSED: + return "SCTP_STATE_CLOSED"; + case SCTP_STATE_COOKIE_WAIT: + return "SCTP_STATE_COOKIE_WAIT"; + case SCTP_STATE_COOKIE_ECHOED: + return "SCTP_STATE_COOKIE_ECHOED"; + case SCTP_STATE_ESTABLISHED: + return "SCTP_STATE_ESTABLISHED"; + case SCTP_STATE_SHUTDOWN_PENDING: + return "SCTP_STATE_SHUTDOWN_PENDING"; + case SCTP_STATE_SHUTDOWN_SENT: + return "SCTP_STATE_SHUTDOWN_SENT"; + case SCTP_STATE_SHUTDOWN_RECEIVED: + return "SCTP_STATE_SHUTDOWN_RECEIVED"; + case SCTP_STATE_SHUTDOWN_ACK_SENT: + return "SCTP_STATE_SHUTDOWN_ACK_SENT"; + } + return NULL; +} + +always_inline char * +sctp_chunk_to_string (u8 type) +{ + switch (type) + { + case DATA: + return "DATA"; + case INIT: + return "INIT"; + case INIT_ACK: + return "INIT_ACK"; + case SACK: + return "SACK"; + case HEARTBEAT: + return "HEARTBEAT"; + case HEARTBEAT_ACK: + return "HEARTBEAT_ACK"; + case ABORT: + return "ABORT"; + case SHUTDOWN: + return "SHUTDOWN"; + case SHUTDOWN_ACK: + return "SHUTDOWN_ACK"; + case OPERATION_ERROR: + return "OPERATION_ERROR"; + case COOKIE_ECHO: + return "COOKIE_ECHO"; + case COOKIE_ACK: + return "COOKIE_ACK"; + case ECNE: + return "ECNE"; + case CWR: + return "CWR"; + case SHUTDOWN_COMPLETE: + return "SHUTDOWN_COMPLETE"; + } + return NULL; +} + +always_inline char * +sctp_optparam_type_to_string (u8 type) +{ + switch (type) + { + case SCTP_IPV4_ADDRESS_TYPE: + return "SCTP_IPV4_ADDRESS_TYPE"; + case SCTP_IPV6_ADDRESS_TYPE: + return "SCTP_IPV6_ADDRESS_TYPE"; + case SCTP_STATE_COOKIE_TYPE: + return "SCTP_STATE_COOKIE_TYPE"; + case SCTP_UNRECOGNIZED_TYPE: + return "SCTP_UNRECOGNIZED_TYPE"; + case SCTP_COOKIE_PRESERVATIVE_TYPE: + return "SCTP_COOKIE_PRESERVATIVE_TYPE"; + case SCTP_HOSTNAME_ADDRESS_TYPE: + return "SCTP_HOSTNAME_ADDRESS_TYPE"; + case SCTP_SUPPORTED_ADDRESS_TYPES: + return "SCTP_SUPPORTED_ADDRESS_TYPES"; + } + return NULL; +} + +#define SCTP_TICK 0.001 /**< SCTP tick period (s) */ +#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */ + +/* As per RFC4960, page 83 */ +#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */ +#define SCTP_RTO_MIN 1 * SHZ /* 1 second */ +#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */ +#define SCTP_RTO_BURST 4 +#define SCTP_RTO_ALPHA 1/8 +#define SCTP_RTO_BETA 1/4 +#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */ +#define SCTP_ASSOCIATION_MAX_RETRANS 10 + +#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */ + +typedef struct _sctp_lookup_dispatch +{ + u8 next, error; +} sctp_lookup_dispatch_t; + +typedef struct _sctp_main +{ + /* Per-worker thread SCTP connection pools */ + sctp_connection_t **connections; + + /* Pool of listeners. */ + sctp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + u32 *time_now; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + /** per-worker tx frames to SCTP 4/6 output nodes */ + vlib_frame_t **tx_frames[2]; + /** per-worker tx frames to ip 4/6 lookup nodes */ + vlib_frame_t **ip_lookup_tx_frames[2]; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Pool of half-open connections on which we've sent a SYN */ + sctp_connection_t *half_open_connections; + clib_spinlock_t half_open_lock; + + /* TODO: Congestion control algorithms registered */ + /* sctp_cc_algorithm_t *cc_algos; */ + + /* Flag that indicates if stack is on or off */ + u8 is_enabled; + + /** Number of preallocated connections */ + u32 preallocated_connections; + + /** Transport table (preallocation) size parameters */ + u32 local_endpoints_table_memory; + u32 local_endpoints_table_buckets; + + /** Vectors of src addresses. Optional unless one needs > 63K active-opens */ + ip4_address_t *ip4_src_addresses; + u32 last_v4_address_rotor; + u32 last_v6_address_rotor; + ip6_address_t *ip6_src_addresses; + + /** vlib buffer size */ + u32 bytes_per_buffer; + + u8 punt_unknown4; + u8 punt_unknown6; + +} sctp_main_t; + +extern sctp_main_t sctp_main; +extern vlib_node_registration_t sctp4_input_node; +extern vlib_node_registration_t sctp6_input_node; +extern vlib_node_registration_t sctp4_output_node; +extern vlib_node_registration_t sctp6_output_node; + +always_inline sctp_main_t * +vnet_get_sctp_main () +{ + return &sctp_main; +} + +always_inline sctp_header_t * +sctp_buffer_hdr (vlib_buffer_t * b) +{ + ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE); + return (sctp_header_t *) (b->data + b->current_data + + vnet_buffer (b)->sctp.hdr_offset); +} + +clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en); + +always_inline sctp_connection_t * +sctp_half_open_connection_get (u32 conn_index) +{ + sctp_connection_t *tc = 0; + clib_spinlock_lock_if_init (&sctp_main.half_open_lock); + if (!pool_is_free_index (sctp_main.half_open_connections, conn_index)) + tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index); + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = tc; + clib_spinlock_unlock_if_init (&sctp_main.half_open_lock); + return tc; +} + +/** + * Cleanup half-open connection + * + */ +always_inline void +sctp_half_open_connection_del (sctp_connection_t * tc) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + clib_spinlock_lock_if_init (&tm->half_open_lock); + pool_put_index (tm->half_open_connections, + tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_c_index); + if (CLIB_DEBUG) + memset (tc, 0xFA, sizeof (*tc)); + clib_spinlock_unlock_if_init (&tm->half_open_lock); +} + +always_inline u32 +sctp_set_time_now (u32 thread_index) +{ + sctp_main.time_now[thread_index] = clib_cpu_time_now () + * sctp_main.tstamp_ticks_per_clock; + return sctp_main.time_now[thread_index]; +} + +always_inline void +sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + ASSERT (tc->sub_conn[conn_idx].timers[timer_id] == + SCTP_TIMER_HANDLE_INVALID); + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + tc->sub_conn[conn_idx].timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline void +sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id) +{ + ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ()); + if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID) + return; + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID; +} + +always_inline void +sctp_update_time (f64 now, u32 thread_index) +{ + sctp_set_time_now (thread_index); + tw_timer_expire_timers_16t_2w_512sl (&sctp_main.timer_wheels[thread_index], + now); + sctp_flush_frames_to_output (thread_index); +} + +/** + * Try to cleanup half-open connection + * + * If called from a thread that doesn't own tc, the call won't have any + * effect. + * + * @param tc - connection to be cleaned up + * @return non-zero if cleanup failed. + */ +always_inline int +sctp_half_open_connection_cleanup (sctp_connection_t * tc) +{ + /* Make sure this is the owning thread */ + if (tc->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_thread_index != + vlib_get_thread_index ()) + return 1; + sctp_timer_reset (tc, MAIN_SCTP_SUB_CONN_IDX, SCTP_TIMER_T1_INIT); + sctp_half_open_connection_del (tc); + return 0; +} + +always_inline u32 +sctp_header_bytes () +{ + return sizeof (sctp_header_t); +} + +always_inline sctp_connection_t * +sctp_get_connection_from_transport (transport_connection_t * tconn) +{ + ASSERT (tconn != NULL); + + sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn; +#if SCTP_ADV_DEBUG + if (sub == NULL) + SCTP_ADV_DBG ("sub == NULL"); + if (sub->parent == NULL) + SCTP_ADV_DBG ("sub->parent == NULL"); +#endif + return (sctp_connection_t *) sub->parent; +} + +always_inline u32 +sctp_time_now (void) +{ + return sctp_main.time_now[vlib_get_thread_index ()]; +} + +always_inline void +sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + tc->sub_conn[conn_idx].timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline sctp_connection_t * +sctp_listener_get (u32 tli) +{ + return pool_elt_at_index (sctp_main.listener_pool, tli); +} + +#endif + +always_inline sctp_connection_t * +sctp_connection_get (u32 conn_index, u32 thread_index) +{ + if (PREDICT_FALSE + (pool_is_free_index (sctp_main.connections[thread_index], conn_index))) + return 0; + return pool_elt_at_index (sctp_main.connections[thread_index], conn_index); +} + +always_inline u8 +sctp_pick_conn_idx_on_chunk (sctp_chunk_type chunk_type) +{ + u8 idx = MAIN_SCTP_SUB_CONN_IDX; + + switch (chunk_type) + { + case DATA: + case INIT: + case INIT_ACK: + case SACK: + case HEARTBEAT: + case HEARTBEAT_ACK: + case ABORT: + case SHUTDOWN: + case SHUTDOWN_ACK: + case OPERATION_ERROR: + case COOKIE_ECHO: + case COOKIE_ACK: + case ECNE: + case CWR: + case SHUTDOWN_COMPLETE: + idx = MAIN_SCTP_SUB_CONN_IDX; + } + return idx; +} + +always_inline u8 +sctp_pick_conn_idx_on_state (sctp_state_t state) +{ + u8 idx = MAIN_SCTP_SUB_CONN_IDX; + + switch (state) + { + case SCTP_STATE_CLOSED: + case SCTP_STATE_COOKIE_WAIT: + case SCTP_STATE_COOKIE_ECHOED: + case SCTP_STATE_ESTABLISHED: + case SCTP_STATE_SHUTDOWN_PENDING: + case SCTP_STATE_SHUTDOWN_SENT: + case SCTP_STATE_SHUTDOWN_RECEIVED: + case SCTP_STATE_SHUTDOWN_ACK_SENT: + idx = MAIN_SCTP_SUB_CONN_IDX; + default: + idx = MAIN_SCTP_SUB_CONN_IDX; + } + return idx; +} + +/** + * Push SCTP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, + u8 sctp_hdr_opts_len) +{ + sctp_full_hdr_t *full_hdr; + + full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len); + + full_hdr->hdr.src_port = sp; + full_hdr->hdr.dst_port = dp; + full_hdr->hdr.checksum = 0; + return full_hdr; +} + +/** + * Push SCTP header to buffer + * + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, + u8 sctp_hdr_opts_len) +{ + return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net, + sctp_hdr_opts_len); +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_debug.h b/src/vnet/sctp/sctp_debug.h new file mode 100644 index 00000000000..b422d199640 --- /dev/null +++ b/src/vnet/sctp/sctp_debug.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_sctp_debug_h__ +#define included_sctp_debug_h__ + +#include <vlib/vlib.h> + +typedef enum _sctp_dbg +{ +#define _(sym, str) SCTP_DBG_##sym, + foreach_sctp_dbg_evt +#undef _ +} sctp_dbg_e; + +#define SCTP_DEBUG_STATE_MACHINE (0) +#if SCTP_DEBUG_STATE_MACHINE +#define SCTP_DBG_STATE_MACHINE(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG_STATE_MACHINE(_fmt, _args...) +#endif + +#define SCTP_DEBUG (0) +#if SCTP_DEBUG +#define SCTP_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG(_fmt, _args...) +#endif + +#define SCTP_ADV_DEBUG (0) +#if SCTP_ADV_DEBUG +#define SCTP_ADV_DBG(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_ADV_DBG(_fmt, _args...) +#endif + +#define SCTP_DEBUG_OUTPUT (0) +#if SCTP_DEBUG_OUTPUT +#define SCTP_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_DBG_OUTPUT(_fmt, _args...) +#endif + +#define SCTP_ADV_DEBUG_OUTPUT (0) +#if SCTP_ADV_DEBUG_OUTPUT +#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) clib_warning (_fmt, ##_args) +#else +#define SCTP_ADV_DBG_OUTPUT(_fmt, _args...) +#endif + +#endif /* included_sctp_debug_h__ */ diff --git a/src/vnet/sctp/sctp_error.def b/src/vnet/sctp/sctp_error.def new file mode 100644 index 00000000000..a244fac9e63 --- /dev/null +++ b/src/vnet/sctp/sctp_error.def @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +sctp_error (NONE, "no error") +sctp_error (PKTS_SENT, "Packets sent") +sctp_error (INVALID_CONNECTION, "Invalid connection") +sctp_error (INVALID_TAG, "Invalid verification tag") +sctp_error (INVALID_TAG_FOR_INIT, "Invalid verification tag for INIT chunk") +sctp_error (CONNECTION_CLOSED, "Connection closed") +sctp_error (ENQUEUED, "Packets pushed into rx fifo") +sctp_error (CREATE_EXISTS, "Connection already exists") +sctp_error (INITS_RCVD, "INITs received") +sctp_error (CREATE_SESSION_FAIL, "Sessions couldn't be allocated") +sctp_error (NO_LISTENER, "no listener for dst port") +sctp_error (LENGTH, "inconsistent ip/tcp lengths") +sctp_error (DISPATCH, "Dispatch error") +sctp_error (ACK_DUP, "Duplicate ACK") +sctp_error (DATA_CHUNK_VIOLATION, "DATA chunk received in invalid state") +sctp_error (INIT_CHUNK_VIOLATION, "INIT chunk received in the wrong state") +sctp_error (INIT_ACK_CHUNK_VIOLATION, "INIT_ACK chunk received in the wrong state") +sctp_error (SACK_CHUNK_VIOLATION, "SACK chunk received in invalid state") +sctp_error (HEARTBEAT_CHUNK_VIOLATION, "HEARTBEAT chunk received in invalid state") +sctp_error (HEARTBEAT_ACK_CHUNK_VIOLATION, "HEARTBEAT_ACK chunk received in invalid state") +sctp_error (ABORT_CHUNK_VIOLATION, "ABORT_CHUNK chunk received in invalid state") +sctp_error (SHUTDOWN_CHUNK_VIOLATION, "SHUTDOWN chunk received in invalid state") +sctp_error (SHUTDOWN_ACK_CHUNK_VIOLATION, "SHUTDOWN_ACK chunk received in invalid state") +sctp_error (OPERATION_ERROR_VIOLATION, "OPERATION_ERROR chunk received in invalid state") +sctp_error (COOKIE_ECHO_VIOLATION, "COOKIE_ECHO chunk received in invalid state") +sctp_error (COOKIE_ACK_VIOLATION, "COOKIE_ACK chunk received in invalid state") +sctp_error (ECNE_VIOLATION, "ECNE chunk received in invalid state") +sctp_error (CWR_VIOLATION, "CWR chunk received in invalid state") +sctp_error (SHUTDOWN_COMPLETE_VIOLATION, "SHUTDOWN_COMPLETE chunk received in invalid state") +sctp_error (FIFO_FULL, "Packets dropped for lack of rx fifo space") +sctp_error (PARTIALLY_ENQUEUED, "Packets partially pushed into rx fifo") +sctp_error (EVENT_FIFO_FULL, "Events not sent for lack of event fifo space") +sctp_error (UNKOWN_CHUNK, "Unrecognized / unknown chunk or chunk-state mismatch") +sctp_error (BUNDLING_VIOLATION, "Bundling not allowed") +sctp_error (PUNT, "Packets punted") +sctp_error (FILTERED, "Packets filtered")
\ No newline at end of file diff --git a/src/vnet/sctp/sctp_format.c b/src/vnet/sctp/sctp_format.c new file mode 100644 index 00000000000..49ee04def65 --- /dev/null +++ b/src/vnet/sctp/sctp_format.c @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include <vnet/sctp/sctp.h> + +/* Format SCTP header. */ +u8 * +format_sctp_header (u8 * s, va_list * args) +{ + return NULL; +} + +u8 * +format_sctp_tx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + + return NULL; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_input.c b/src/vnet/sctp/sctp_input.c new file mode 100644 index 00000000000..4e5ea9194e8 --- /dev/null +++ b/src/vnet/sctp/sctp_input.c @@ -0,0 +1,2202 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vppinfra/sparse_vec.h> +#include <vnet/sctp/sctp.h> +#include <vnet/sctp/sctp_packet.h> +#include <vnet/sctp/sctp_debug.h> +#include <vnet/session/session.h> +#include <math.h> + +static char *sctp_error_strings[] = { +#define sctp_error(n,s) s, +#include <vnet/sctp/sctp_error.def> +#undef sctp_error +}; + +/* All SCTP nodes have the same outgoing arcs */ +#define foreach_sctp_state_next \ + _ (DROP, "error-drop") \ + _ (SCTP4_OUTPUT, "sctp4-output") \ + _ (SCTP6_OUTPUT, "sctp6-output") + +typedef enum _sctp_established_phase_next +{ +#define _(s,n) SCTP_ESTABLISHED_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_ESTABLISHED_PHASE_N_NEXT, +} sctp_established_phase_next_t; + +typedef enum _sctp_rcv_phase_next +{ +#define _(s,n) SCTP_RCV_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_RCV_PHASE_N_NEXT, +} sctp_rcv_phase_next_t; + +typedef enum _sctp_listen_phase_next +{ +#define _(s,n) SCTP_LISTEN_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_LISTEN_PHASE_N_NEXT, +} sctp_listen_phase_next_t; + +typedef enum _sctp_shutdown_phase_next +{ +#define _(s,n) SCTP_SHUTDOWN_PHASE_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_SHUTDOWN_PHASE_N_NEXT, +} sctp_shutdown_phase_next_t; + +/* Generic, state independent indices */ +typedef enum _sctp_state_next +{ +#define _(s,n) SCTP_NEXT_##s, + foreach_sctp_state_next +#undef _ + SCTP_STATE_N_NEXT, +} sctp_state_next_t; + +typedef enum _sctp_input_next +{ + SCTP_INPUT_NEXT_DROP, + SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_INPUT_NEXT_RCV_PHASE, + SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_INPUT_NEXT_PUNT_PHASE, + SCTP_INPUT_N_NEXT +} sctp_input_next_t; + +char * +phase_to_string (u8 phase) +{ + switch (phase) + { + case SCTP_INPUT_NEXT_DROP: + return "SCTP_INPUT_NEXT_DROP"; + case SCTP_INPUT_NEXT_LISTEN_PHASE: + return "SCTP_INPUT_NEXT_LISTEN_PHASE"; + case SCTP_INPUT_NEXT_RCV_PHASE: + return "SCTP_INPUT_NEXT_RCV_PHASE"; + case SCTP_INPUT_NEXT_ESTABLISHED_PHASE: + return "SCTP_INPUT_NEXT_ESTABLISHED_PHASE"; + case SCTP_INPUT_NEXT_SHUTDOWN_PHASE: + return "SCTP_INPUT_NEXT_SHUTDOWN_PHASE"; + case SCTP_INPUT_NEXT_PUNT_PHASE: + return "SCTP_INPUT_NEXT_PUNT_PHASE"; + } + return NULL; +} + +#define foreach_sctp4_input_next \ + _ (DROP, "error-drop") \ + _ (RCV_PHASE, "sctp4-rcv") \ + _ (LISTEN_PHASE, "sctp4-listen") \ + _ (ESTABLISHED_PHASE, "sctp4-established") \ + _ (SHUTDOWN_PHASE, "sctp4-shutdown") \ + _ (PUNT_PHASE, "ip4-punt") + + +#define foreach_sctp6_input_next \ + _ (DROP, "error-drop") \ + _ (RCV_PHASE, "sctp6-rcv") \ + _ (LISTEN_PHASE, "sctp6-listen") \ + _ (ESTABLISHED_PHASE, "sctp6-established") \ + _ (SHUTDOWN_PHASE, "sctp6-shutdown") \ + _ (PUNT_PHASE, "ip6-punt") + +static u8 +sctp_lookup_is_valid (transport_connection_t * t_conn, + sctp_header_t * sctp_hdr) +{ + sctp_connection_t *sctp_conn = sctp_get_connection_from_transport (t_conn); + + if (!sctp_conn) + return 1; + + u8 is_valid = (t_conn->lcl_port == sctp_hdr->dst_port + && (sctp_conn->state == SCTP_STATE_CLOSED + || t_conn->rmt_port == sctp_hdr->src_port)); + + return is_valid; +} + +/** + * Lookup transport connection + */ +static sctp_connection_t * +sctp_lookup_connection (u32 fib_index, vlib_buffer_t * b, u8 thread_index, + u8 is_ip4) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + sctp_header_t *sctp_hdr; + transport_connection_t *tconn; + sctp_connection_t *sctp_conn; + u8 is_filtered, i; + if (is_ip4) + { + ip4_header_t *ip4_hdr; + ip4_hdr = vlib_buffer_get_current (b); + sctp_hdr = ip4_next_header (ip4_hdr); + tconn = session_lookup_connection_wt4 (fib_index, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (tconn == 0) /* Not primary connection */ + { + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if ((tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip4.as_u32 == + ip4_hdr->dst_address.as_u32) + && (tm->connections[thread_index]->sub_conn[i]. + connection.rmt_ip.ip4.as_u32 == + ip4_hdr->src_address.as_u32)) + { + tconn = + &tm->connections[thread_index]->sub_conn[i].connection; + break; + } + } + } + ASSERT (tconn != 0); + ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr)); + } + else + { + ip6_header_t *ip6_hdr; + ip6_hdr = vlib_buffer_get_current (b); + sctp_hdr = ip6_next_header (ip6_hdr); + tconn = session_lookup_connection_wt6 (fib_index, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + thread_index, &is_filtered); + if (tconn == 0) /* Not primary connection */ + { + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if ((tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip6.as_u64[0] == + ip6_hdr->dst_address.as_u64[0] + && tm->connections[thread_index]->sub_conn[i]. + connection.lcl_ip.ip6.as_u64[1] == + ip6_hdr->dst_address.as_u64[1]) + && (tm->connections[thread_index]->sub_conn[i]. + connection.rmt_ip.ip6.as_u64[0] == + ip6_hdr->src_address.as_u64[0] + && tm->connections[thread_index]-> + sub_conn[i].connection.rmt_ip.ip6.as_u64[1] == + ip6_hdr->src_address.as_u64[1])) + { + tconn = + &tm->connections[thread_index]->sub_conn[i].connection; + break; + } + } + } + ASSERT (tconn != 0); + ASSERT (sctp_lookup_is_valid (tconn, sctp_hdr)); + } + sctp_conn = sctp_get_connection_from_transport (tconn); + return sctp_conn; +} + +typedef struct +{ + sctp_header_t sctp_header; + sctp_connection_t sctp_connection; +} sctp_rx_trace_t; + +#define sctp_next_output(is_ip4) (is_ip4 ? SCTP_NEXT_SCTP4_OUTPUT \ + : SCTP_NEXT_SCTP6_OUTPUT) + + +void +sctp_set_rx_trace_data (sctp_rx_trace_t * rx_trace, + sctp_connection_t * sctp_conn, + sctp_header_t * sctp_hdr, vlib_buffer_t * b0, + u8 is_ip4) +{ + if (sctp_conn) + { + clib_memcpy (&rx_trace->sctp_connection, sctp_conn, + sizeof (rx_trace->sctp_connection)); + } + else + { + sctp_hdr = sctp_buffer_hdr (b0); + } + clib_memcpy (&rx_trace->sctp_header, sctp_hdr, + sizeof (rx_trace->sctp_header)); +} + +always_inline u16 +sctp_calculate_implied_length (ip4_header_t * ip4_hdr, ip6_header_t * ip6_hdr, + int is_ip4) +{ + u16 sctp_implied_packet_length = 0; + + if (is_ip4) + sctp_implied_packet_length = + clib_net_to_host_u16 (ip4_hdr->length) - ip4_header_bytes (ip4_hdr); + else + sctp_implied_packet_length = + clib_net_to_host_u16 (ip6_hdr->payload_length) - sizeof (ip6_hdr); + + return sctp_implied_packet_length; +} + +always_inline u8 +sctp_is_bundling (u16 sctp_implied_length, + sctp_chunks_common_hdr_t * sctp_common_hdr) +{ + if (sctp_implied_length != + sizeof (sctp_header_t) + vnet_sctp_get_chunk_length (sctp_common_hdr)) + return 1; + return 0; +} + +always_inline u16 +sctp_handle_init (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_init_chunk_t *init_chunk = (sctp_init_chunk_t *) (sctp_hdr); + ip4_address_t *ip4_addr = 0; + ip6_address_t *ip6_addr = 0; + char hostname[FQDN_MAX_LENGTH]; + + /* Check the current state of the connection + * + * The logic required by the RFC4960 Section 5.2.2 is already taken care of + * in the code below and by the "sctp_prepare_initack_chunk" function. + * However, for debugging purposes it is nice to have a message printed out + * for these corner-case scenarios. + */ + if (sctp_conn->state != SCTP_STATE_CLOSED) + { /* UNEXPECTED scenario */ + switch (sctp_conn->state) + { + case SCTP_STATE_COOKIE_WAIT: /* TODO */ + SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_WAIT state"); + break; + case SCTP_STATE_COOKIE_ECHOED: /* TODO */ + SCTP_ADV_DBG ("Received INIT chunk while in COOKIE_ECHOED state"); + break; + } + } + + if (sctp_hdr->verification_tag != 0x0) + return SCTP_ERROR_INVALID_TAG_FOR_INIT; + + /* + * It is not possible to bundle any other CHUNK with the INIT chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* Save the INITIATE_TAG of the remote peer for this connection: + * it MUST be used for the VERIFICATION_TAG parameter in the SCTP HEADER */ + sctp_conn->remote_tag = init_chunk->initiate_tag; + sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_chunk->a_rwnd); + + /* + * If the length specified in the INIT message is bigger than the size in bytes of our structure it means that + * optional parameters have been sent with the INIT chunk and we need to parse them. + */ + u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr); + if (length > sizeof (sctp_init_chunk_t)) + { + /* There are optional parameters in the INIT chunk */ + u16 pointer_offset = sizeof (sctp_init_chunk_t); + while (pointer_offset < length) + { + sctp_opt_params_hdr_t *opt_params_hdr = + (sctp_opt_params_hdr_t *) init_chunk + pointer_offset; + + switch (clib_net_to_host_u16 (opt_params_hdr->type)) + { + case SCTP_IPV4_ADDRESS_TYPE: + { + sctp_ipv4_addr_param_t *ipv4 = + (sctp_ipv4_addr_param_t *) opt_params_hdr; + clib_memcpy (ip4_addr, &ipv4->address, + sizeof (ip4_address_t)); + + sctp_sub_connection_add_ip4 (vlib_get_thread_index (), ipv4); + + break; + } + case SCTP_IPV6_ADDRESS_TYPE: + { + sctp_ipv6_addr_param_t *ipv6 = + (sctp_ipv6_addr_param_t *) opt_params_hdr; + clib_memcpy (ip6_addr, &ipv6->address, + sizeof (ip6_address_t)); + + sctp_sub_connection_add_ip6 (vlib_get_thread_index (), ipv6); + + break; + } + case SCTP_COOKIE_PRESERVATIVE_TYPE: + { + sctp_cookie_preservative_param_t *cookie_pres = + (sctp_cookie_preservative_param_t *) opt_params_hdr; + sctp_conn->life_span_inc = cookie_pres->life_span_inc; + break; + } + case SCTP_HOSTNAME_ADDRESS_TYPE: + { + sctp_hostname_param_t *hostname_addr = + (sctp_hostname_param_t *) opt_params_hdr; + clib_memcpy (hostname, hostname_addr->hostname, + FQDN_MAX_LENGTH); + break; + } + case SCTP_SUPPORTED_ADDRESS_TYPES: + { + /* TODO */ + break; + } + } + pointer_offset += clib_net_to_host_u16 (opt_params_hdr->length); + } + } + + /* Reuse buffer to make init-ack and send */ + sctp_prepare_initack_chunk (sctp_conn, b0, ip4_addr, ip6_addr); + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_is_valid_init_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_init_ack_chunk_t *init_ack_chunk = + (sctp_init_ack_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the INIT_ACK chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_init_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_init_ack_chunk_t *init_ack_chunk = + (sctp_init_ack_chunk_t *) (sctp_hdr); + ip4_address_t *ip4_addr = 0; + ip6_address_t *ip6_addr = 0; + sctp_state_cookie_param_t state_cookie; + + char hostname[FQDN_MAX_LENGTH]; + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != init_ack_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the INIT chunk + */ + if (sctp_is_bundling (sctp_implied_length, &init_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* remote_tag to be placed in the VERIFICATION_TAG field of the COOKIE_ECHO chunk */ + sctp_conn->remote_tag = init_ack_chunk->initiate_tag; + sctp_conn->snd_opts.a_rwnd = clib_net_to_host_u32 (init_ack_chunk->a_rwnd); + + u16 length = vnet_sctp_get_chunk_length (sctp_chunk_hdr); + + if (length > sizeof (sctp_init_ack_chunk_t)) + /* + * There are optional parameters in the INIT ACK chunk + */ + { + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + while (pointer_offset < length) + { + sctp_opt_params_hdr_t *opt_params_hdr = + (sctp_opt_params_hdr_t *) ((char *) init_ack_chunk + + pointer_offset); + + switch (clib_net_to_host_u16 (opt_params_hdr->type)) + { + case SCTP_IPV4_ADDRESS_TYPE: + { + sctp_ipv4_addr_param_t *ipv4 = + (sctp_ipv4_addr_param_t *) opt_params_hdr; + clib_memcpy (ip4_addr, &ipv4->address, + sizeof (ip4_address_t)); + + sctp_sub_connection_add_ip4 (vlib_get_thread_index (), ipv4); + + break; + } + case SCTP_IPV6_ADDRESS_TYPE: + { + sctp_ipv6_addr_param_t *ipv6 = + (sctp_ipv6_addr_param_t *) opt_params_hdr; + clib_memcpy (ip6_addr, &ipv6->address, + sizeof (ip6_address_t)); + + sctp_sub_connection_add_ip6 (vlib_get_thread_index (), ipv6); + + break; + } + case SCTP_STATE_COOKIE_TYPE: + { + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) opt_params_hdr; + + clib_memcpy (&state_cookie, state_cookie_param, + sizeof (sctp_state_cookie_param_t)); + break; + } + case SCTP_HOSTNAME_ADDRESS_TYPE: + { + sctp_hostname_param_t *hostname_addr = + (sctp_hostname_param_t *) opt_params_hdr; + clib_memcpy (hostname, hostname_addr->hostname, + FQDN_MAX_LENGTH); + break; + } + case SCTP_UNRECOGNIZED_TYPE: + { + break; + } + } + u16 increment = clib_net_to_host_u16 (opt_params_hdr->length); + /* This indicates something really bad happened */ + if (increment == 0) + { + return SCTP_ERROR_INVALID_TAG; + } + pointer_offset += increment; + } + } + + sctp_prepare_cookie_echo_chunk (sctp_conn, b0, &state_cookie); + + /* Start the T1_COOKIE timer */ + sctp_timer_set (sctp_conn, sctp_pick_conn_idx_on_chunk (COOKIE_ECHO), + SCTP_TIMER_T1_COOKIE, SCTP_RTO_INIT); + + return SCTP_ERROR_NONE; +} + +/** Enqueue data for delivery to application */ +always_inline int +sctp_session_enqueue_data (sctp_connection_t * tc, vlib_buffer_t * b, + u16 data_len, u8 conn_idx) +{ + int written, error = SCTP_ERROR_ENQUEUED; + + written = + session_enqueue_stream_connection (&tc->sub_conn[conn_idx].connection, b, + 0, 1 /* queue event */ , 1); + + /* Update rcv_nxt */ + if (PREDICT_TRUE (written == data_len)) + { + tc->rcv_nxt += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] == DATA_LEN [%d]", + tc->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + /* If more data written than expected, account for out-of-order bytes. */ + else if (written > data_len) + { + tc->rcv_nxt += written; + + SCTP_ADV_DBG ("CONN = %u, WRITTEN [%u] > DATA_LEN [%d]", + tc->sub_conn[conn_idx].connection.c_index, + written, data_len); + } + else if (written > 0) + { + /* We've written something but FIFO is probably full now */ + tc->rcv_nxt += written; + + error = SCTP_ERROR_PARTIALLY_ENQUEUED; + + SCTP_ADV_DBG + ("CONN = %u, WRITTEN [%u] > 0 (SCTP_ERROR_PARTIALLY_ENQUEUED)", + tc->sub_conn[conn_idx].connection.c_index, written); + } + else + { + SCTP_ADV_DBG ("CONN = %u, WRITTEN == 0 (SCTP_ERROR_FIFO_FULL)", + tc->sub_conn[conn_idx].connection.c_index); + + return SCTP_ERROR_FIFO_FULL; + } + + return error; +} + +always_inline u16 +sctp_handle_data (sctp_payload_data_chunk_t * sctp_data_chunk, + sctp_connection_t * sctp_conn, vlib_buffer_t * b, + u16 * next0) +{ + u32 error = 0, n_data_bytes; + u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_data_chunk->sctp_hdr.verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + vnet_buffer (b)->sctp.sid = sctp_data_chunk->stream_id; + vnet_buffer (b)->sctp.ssn = sctp_data_chunk->stream_seq; + vnet_buffer (b)->sctp.tsn = sctp_data_chunk->tsn; + + vlib_buffer_advance (b, vnet_buffer (b)->sctp.data_offset); + n_data_bytes = vnet_buffer (b)->sctp.data_len; + ASSERT (n_data_bytes); + + SCTP_ADV_DBG ("POINTER_WITH_DATA = %p", b->data); + + /* In order data, enqueue. Fifo figures out by itself if any out-of-order + * segments can be enqueued after fifo tail offset changes. */ + error = sctp_session_enqueue_data (sctp_conn, b, n_data_bytes, idx); + + sctp_timer_update (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT); + + *next0 = sctp_next_output (sctp_conn->sub_conn[idx].c_is_ip4); + sctp_prepare_sack_chunk (sctp_conn, b); + + return error; +} + +always_inline u16 +sctp_handle_cookie_echo (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0) +{ + + /* Build TCB */ + u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + sctp_prepare_cookie_ack_chunk (sctp_conn, b0); + + /* Change state */ + sctp_conn->state = SCTP_STATE_ESTABLISHED; + + stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection); + + return SCTP_ERROR_NONE; + +} + +always_inline u16 +sctp_handle_cookie_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0) +{ + + /* Stop T1_COOKIE timer */ + u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + sctp_timer_reset (sctp_conn, idx, SCTP_TIMER_T1_COOKIE); + /* Change state */ + sctp_conn->state = SCTP_STATE_ESTABLISHED; + + stream_session_accept_notify (&sctp_conn->sub_conn[idx].connection); + + sctp_timer_set (sctp_conn, idx, SCTP_TIMER_T3_RXTX, SCTP_RTO_INIT); + + return SCTP_ERROR_NONE; + +} + +always_inline uword +sctp46_rcv_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn, *new_sctp_conn; + u16 sctp_implied_length = 0; + u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT; + u8 idx; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + + /* If we are in SCTP_COOKIE_WAIT_STATE then the connection + * will come from the half-open connections pool. + */ + sctp_conn = + sctp_half_open_connection_get (vnet_buffer (b0)-> + sctp.connection_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto drop; + } + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_ADV_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto drop; + } + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + } + idx = sctp_pick_conn_idx_on_state (sctp_conn->state); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + + transport_connection_t *t_conn = + &sctp_conn->sub_conn[idx].connection; + + t_conn->lcl_port = sctp_hdr->dst_port; + t_conn->rmt_port = sctp_hdr->src_port; + t_conn->is_ip4 = is_ip4; + + if (is_ip4) + { + t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; + t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; + } + else + { + clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address, + sizeof (ip6_address_t)); + } + + sctp_chunk_hdr = + (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr); + + sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + + switch (chunk_type) + { + case INIT_ACK: + error0 = + sctp_is_valid_init_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + b0, sctp_implied_length); + + if (error0 == SCTP_ERROR_NONE) + { + pool_get (tm->connections[my_thread_index], new_sctp_conn); + clib_memcpy (new_sctp_conn, sctp_conn, + sizeof (*new_sctp_conn)); + new_sctp_conn->sub_conn[idx].c_c_index = + new_sctp_conn - tm->connections[my_thread_index]; + new_sctp_conn->sub_conn[idx].c_thread_index = + my_thread_index; + new_sctp_conn->sub_conn[idx].parent = new_sctp_conn; + + if (sctp_half_open_connection_cleanup (sctp_conn)) + { + SCTP_DBG + ("Cannot cleanup half-open connection; not the owning thread"); + } + + sctp_connection_timers_init (new_sctp_conn); + + error0 = + sctp_handle_init_ack (sctp_hdr, sctp_chunk_hdr, + new_sctp_conn, b0, + sctp_implied_length); + + sctp_connection_init_vars (new_sctp_conn); + + if (session_stream_connect_notify + (&new_sctp_conn->sub_conn[idx].connection, 0)) + { + SCTP_DBG + ("conn_index = %u: session_stream_connect_notify error; cleaning up connection", + new_sctp_conn->sub_conn[idx].connection.c_index); + sctp_connection_cleanup (new_sctp_conn); + goto drop; + } + } + next0 = sctp_next_output (is_ip4); + break; + + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_NEXT_DROP; + goto drop; + } + + if (error0 != SCTP_ERROR_NONE) + { + clib_warning ("error while parsing chunk"); + sctp_connection_cleanup (sctp_conn); + next0 = SCTP_NEXT_DROP; + goto drop; + } + + drop: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +sctp4_rcv_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_rcv_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +sctp6_rcv_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_rcv_phase_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +u8 * +format_sctp_rx_trace_short (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *); + + s = format (s, "%d -> %d (%U)", + clib_net_to_host_u16 (t->sctp_header.src_port), + clib_net_to_host_u16 (t->sctp_header.dst_port), + format_sctp_state, t->sctp_connection.state); + + return s; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_rcv_phase_node) = +{ + .function = sctp4_rcv_phase, + .name = "sctp4-rcv", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_RCV_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_rcv_phase_node, sctp4_rcv_phase); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_init_phase_node) = +{ + .function = sctp6_rcv_phase, + .name = "sctp6-rcv", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_RCV_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_RCV_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_init_phase_node, sctp6_rcv_phase); + +vlib_node_registration_t sctp4_shutdown_phase_node; +vlib_node_registration_t sctp6_shutdown_phase_node; + +always_inline u16 +sctp_handle_shutdown (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_shutdown_association_chunk_t *shutdown_chunk = + (sctp_shutdown_association_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + switch (sctp_conn->state) + { + case SCTP_STATE_ESTABLISHED: + if (sctp_check_outstanding_data_chunks (sctp_conn) == 0) + sctp_conn->state = SCTP_STATE_SHUTDOWN_RECEIVED; + break; + + case SCTP_STATE_SHUTDOWN_SENT: + sctp_send_shutdown_ack (sctp_conn); + break; + } + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_shutdown_ack (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 sctp_implied_length) +{ + sctp_shutdown_ack_chunk_t *shutdown_ack_chunk = + (sctp_shutdown_ack_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_ack_chunk->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + /* Whether we are in SCTP_STATE_SHUTDOWN_SENT or SCTP_STATE_SHUTDOWN_ACK_SENT + * the reception of a SHUTDOWN_ACK chunk leads to the same actions: + * - STOP T2_SHUTDOWN timer + * - SEND SHUTDOWN_COMPLETE chunk + */ + sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX, + SCTP_TIMER_T2_SHUTDOWN); + sctp_send_shutdown_complete (sctp_conn); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_shutdown_complete (sctp_header_t * sctp_hdr, + sctp_chunks_common_hdr_t * sctp_chunk_hdr, + sctp_connection_t * sctp_conn, + vlib_buffer_t * b0, u16 sctp_implied_length) +{ + sctp_shutdown_complete_chunk_t *shutdown_complete = + (sctp_shutdown_complete_chunk_t *) (sctp_hdr); + + /* Check that the LOCALLY generated tag is being used by the REMOTE peer as the verification tag */ + if (sctp_conn->local_tag != sctp_hdr->verification_tag) + { + return SCTP_ERROR_INVALID_TAG; + } + + /* + * It is not possible to bundle any other CHUNK with the SHUTDOWN chunk + */ + if (sctp_is_bundling (sctp_implied_length, &shutdown_complete->chunk_hdr)) + return SCTP_ERROR_BUNDLING_VIOLATION; + + sctp_timer_reset (sctp_conn, MAIN_SCTP_SUB_CONN_IDX, + SCTP_TIMER_T2_SHUTDOWN); + + sctp_conn->state = SCTP_STATE_CLOSED; + + stream_session_disconnect_notify (&sctp_conn->sub_conn + [MAIN_SCTP_SUB_CONN_IDX].connection); + + return SCTP_ERROR_NONE; +} + +always_inline uword +sctp46_shutdown_phase_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_rx_trace_t *sctp_trace; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn; + u16 sctp_implied_length = 0; + u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_RCV_PHASE_N_NEXT; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_conn = + sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, + my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto drop; + } + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + } + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + switch (vnet_sctp_get_chunk_type (sctp_chunk_hdr)) + { + case SHUTDOWN: + error0 = + sctp_handle_shutdown (sctp_hdr, sctp_chunk_hdr, sctp_conn, b0, + sctp_implied_length); + next0 = sctp_next_output (is_ip4); + break; + + case SHUTDOWN_ACK: + error0 = + sctp_handle_shutdown_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + b0, sctp_implied_length); + next0 = sctp_next_output (is_ip4); + break; + + case SHUTDOWN_COMPLETE: + error0 = + sctp_handle_shutdown_complete (sctp_hdr, sctp_chunk_hdr, + sctp_conn, b0, + sctp_implied_length); + + sctp_connection_cleanup (sctp_conn); + next0 = sctp_next_output (is_ip4); + break; + + /* + * DATA chunks can still be transmitted/received in the SHUTDOWN-PENDING + * and SHUTDOWN-SENT states (as per RFC4960 Section 6) + */ + case DATA: + error0 = + sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr, + sctp_conn, b0, &next0); + next0 = sctp_next_output (is_ip4); + break; + + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_NEXT_DROP; + goto drop; + } + + if (error0 != SCTP_ERROR_NONE) + { + clib_warning ("error while parsing chunk"); + sctp_connection_cleanup (sctp_conn); + next0 = SCTP_NEXT_DROP; + goto drop; + } + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_trace = + vlib_add_trace (vm, node, b0, sizeof (*sctp_trace)); + clib_memcpy (&sctp_trace->sctp_header, sctp_hdr, + sizeof (sctp_trace->sctp_header)); + clib_memcpy (&sctp_trace->sctp_connection, sctp_conn, + sizeof (sctp_trace->sctp_connection)); + } + + b0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; + +} + +static uword +sctp4_shutdown_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +sctp6_shutdown_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_shutdown_phase_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_shutdown_phase_node) = +{ + .function = sctp4_shutdown_phase, + .name = "sctp4-shutdown", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_shutdown_phase_node, + sctp4_shutdown_phase); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_shutdown_phase_node) = +{ + .function = sctp6_shutdown_phase, + .name = "sctp6-shutdown", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_SHUTDOWN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_SHUTDOWN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_shutdown_phase_node, + sctp6_shutdown_phase); + +vlib_node_registration_t sctp4_listen_phase_node; +vlib_node_registration_t sctp6_listen_phase_node; + +vlib_node_registration_t sctp4_established_phase_node; +vlib_node_registration_t sctp6_established_phase_node; + +always_inline u16 +sctp_handle_sack (sctp_selective_ack_chunk_t * sack_chunk, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 * next0) +{ + *next0 = + sctp_next_output (sctp_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX]. + connection.is_ip4); + + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_heartbeat (sctp_hb_req_chunk_t * sctp_hb_chunk, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 * next0) +{ + return SCTP_ERROR_NONE; +} + +always_inline u16 +sctp_handle_heartbeat_ack (sctp_hb_ack_chunk_t * sctp_hb_ack_chunk, + sctp_connection_t * sctp_conn, vlib_buffer_t * b0, + u16 * next0) +{ + return SCTP_ERROR_NONE; +} + +always_inline void +sctp_node_inc_counter (vlib_main_t * vm, u32 tcp4_node, u32 tcp6_node, + u8 is_ip4, u8 evt, u8 val) +{ + if (PREDICT_TRUE (!val)) + return; + + if (is_ip4) + vlib_node_increment_counter (vm, tcp4_node, evt, val); + else + vlib_node_increment_counter (vm, tcp6_node, evt, val); +} + +always_inline uword +sctp46_listen_process_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + ip4_header_t *ip4_hdr; + ip6_header_t *ip6_hdr; + sctp_connection_t *child_conn; + sctp_connection_t *sctp_listener; + u16 next0 = SCTP_LISTEN_PHASE_N_NEXT, error0 = SCTP_ERROR_ENQUEUED; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_listener = + sctp_listener_get (vnet_buffer (b0)->sctp.connection_index); + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + } + + child_conn = + sctp_lookup_connection (sctp_listener->sub_conn + [MAIN_SCTP_SUB_CONN_IDX].c_fib_index, b0, + my_thread_index, is_ip4); + + if (PREDICT_FALSE (child_conn->state != SCTP_STATE_CLOSED)) + { + SCTP_DBG + ("conn_index = %u: child_conn->state != SCTP_STATE_CLOSED.... STATE=%s", + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX]. + connection.c_index, + sctp_state_to_string (child_conn->state)); + error0 = SCTP_ERROR_CREATE_EXISTS; + goto drop; + } + + /* Create child session and send SYN-ACK */ + child_conn = sctp_connection_new (my_thread_index); + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].parent = child_conn; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_port = + sctp_hdr->dst_port; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_port = + sctp_hdr->src_port; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_is_ip4 = is_ip4; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto = + sctp_listener->sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection.proto; + child_conn->state = SCTP_STATE_CLOSED; + + if (is_ip4) + { + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_ip4.as_u32 = + ip4_hdr->dst_address.as_u32; + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_ip4.as_u32 = + ip4_hdr->src_address.as_u32; + } + else + { + clib_memcpy (&child_conn-> + sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_lcl_ip6, + &ip6_hdr->dst_address, sizeof (ip6_address_t)); + clib_memcpy (&child_conn-> + sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_rmt_ip6, + &ip6_hdr->src_address, sizeof (ip6_address_t)); + } + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = &full_hdr->common_hdr; + + u8 chunk_type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); + if (chunk_type != INIT) + { + SCTP_DBG + ("conn_index = %u: chunk_type != INIT... chunk_type=%s", + child_conn->sub_conn[MAIN_SCTP_SUB_CONN_IDX]. + connection.c_index, sctp_chunk_to_string (chunk_type)); + + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_NEXT_DROP; + goto drop; + } + + u16 sctp_implied_length = + sctp_calculate_implied_length (ip4_hdr, ip6_hdr, is_ip4); + + switch (chunk_type) + { + case INIT: + sctp_connection_timers_init (child_conn); + + error0 = + sctp_handle_init (sctp_hdr, sctp_chunk_hdr, child_conn, b0, + sctp_implied_length); + + sctp_connection_init_vars (child_conn); + + + if (error0 == SCTP_ERROR_NONE) + { + if (stream_session_accept + (&child_conn-> + sub_conn[MAIN_SCTP_SUB_CONN_IDX].connection, + sctp_listener-> + sub_conn[MAIN_SCTP_SUB_CONN_IDX].c_s_index, 0)) + { + clib_warning ("session accept fail"); + sctp_connection_cleanup (child_conn); + error0 = SCTP_ERROR_CREATE_SESSION_FAIL; + goto drop; + } + } + next0 = sctp_next_output (is_ip4); + break; + + /* Reception of a DATA chunk whilst in the CLOSED state is called + * "Out of the Blue" packet and handling of the chunk needs special treatment + * as per RFC4960 section 8.4 + */ + case DATA: + break; + } + + drop: + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + clib_memcpy (&t0->sctp_header, sctp_hdr, + sizeof (t0->sctp_header)); + clib_memcpy (&t0->sctp_connection, sctp_listener, + sizeof (t0->sctp_connection)); + } + + b0->error = node->errors[error0]; + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + + } + return from_frame->n_vectors; +} + +static uword +sctp4_listen_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_listen_process_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +sctp6_listen_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_listen_process_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +always_inline uword +sctp46_established_phase_inline (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index, errors = 0; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + + next_index = node->cached_next_index; + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + ip4_header_t *ip4_hdr = 0; + ip6_header_t *ip6_hdr = 0; + sctp_connection_t *sctp_conn; + u16 error0 = SCTP_ERROR_NONE, next0 = SCTP_ESTABLISHED_PHASE_N_NEXT; + u8 idx; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + sctp_conn = + sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, + my_thread_index); + + if (PREDICT_FALSE (sctp_conn == 0)) + { + SCTP_DBG + ("sctp_conn == NULL; return SCTP_ERROR_INVALID_CONNECTION"); + error0 = SCTP_ERROR_INVALID_CONNECTION; + goto done; + } + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + } + + idx = sctp_pick_conn_idx_on_state (sctp_conn->state); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + + transport_connection_t *t_conn = + &sctp_conn->sub_conn[idx].connection; + + t_conn->lcl_port = sctp_hdr->dst_port; + t_conn->rmt_port = sctp_hdr->src_port; + t_conn->is_ip4 = is_ip4; + + sctp_conn->sub_conn[idx].parent = sctp_conn; + + if (is_ip4) + { + t_conn->lcl_ip.ip4.as_u32 = ip4_hdr->dst_address.as_u32; + t_conn->rmt_ip.ip4.as_u32 = ip4_hdr->src_address.as_u32; + } + else + { + clib_memcpy (&t_conn->lcl_ip.ip6, &ip6_hdr->dst_address, + sizeof (ip6_address_t)); + clib_memcpy (&t_conn->rmt_ip.ip6, &ip6_hdr->src_address, + sizeof (ip6_address_t)); + } + + sctp_chunk_hdr = + (sctp_chunks_common_hdr_t *) (&full_hdr->common_hdr); + + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + + switch (chunk_type) + { + case COOKIE_ECHO: + error0 = + sctp_handle_cookie_echo (sctp_hdr, sctp_chunk_hdr, sctp_conn, + b0); + next0 = sctp_next_output (is_ip4); + break; + + case COOKIE_ACK: + error0 = + sctp_handle_cookie_ack (sctp_hdr, sctp_chunk_hdr, sctp_conn, + b0); + next0 = sctp_next_output (is_ip4); + break; + + case SACK: + error0 = + sctp_handle_sack ((sctp_selective_ack_chunk_t *) sctp_hdr, + sctp_conn, b0, &next0); + break; + + case HEARTBEAT: + error0 = + sctp_handle_heartbeat ((sctp_hb_req_chunk_t *) sctp_hdr, + sctp_conn, b0, &next0); + break; + + case HEARTBEAT_ACK: + error0 = + sctp_handle_heartbeat_ack ((sctp_hb_ack_chunk_t *) sctp_hdr, + sctp_conn, b0, &next0); + break; + + case DATA: + error0 = + sctp_handle_data ((sctp_payload_data_chunk_t *) sctp_hdr, + sctp_conn, b0, &next0); + break; + + /* All UNEXPECTED scenarios (wrong chunk received per state-machine) + * are handled by the input-dispatcher function using the table-lookup + * hence we should never get to the "default" case below. + */ + default: + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_NEXT_DROP; + goto done; + } + + done: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + errors = session_manager_flush_enqueue_events (TRANSPORT_PROTO_SCTP, + my_thread_index); + + sctp_node_inc_counter (vm, is_ip4, sctp4_established_phase_node.index, + sctp6_established_phase_node.index, + SCTP_ERROR_EVENT_FIFO_FULL, errors); + sctp_flush_frame_to_output (vm, my_thread_index, is_ip4); + + return from_frame->n_vectors; +} + +static uword +sctp4_established_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_established_phase_inline (vm, node, from_frame, + 1 /* is_ip4 */ ); +} + +static uword +sctp6_established_phase (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_established_phase_inline (vm, node, from_frame, + 0 /* is_ip4 */ ); +} + +u8 * +format_sctp_rx_trace (u8 * s, va_list * args) +{ + CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); + CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); + sctp_rx_trace_t *t = va_arg (*args, sctp_rx_trace_t *); + u32 indent = format_get_indent (s); + + s = format (s, "%U\n%U%U", + format_sctp_header, &t->sctp_header, 128, + format_white_space, indent, + format_sctp_connection, &t->sctp_connection, 1); + + return s; +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_listen_phase_node) = +{ + .function = sctp4_listen_phase, + .name = "sctp4-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_listen_phase_node, sctp4_listen_phase); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_listen_phase_node) = +{ + .function = sctp6_listen_phase, + .name = "sctp6-listen", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_listen_phase_node, sctp6_listen_phase); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_established_phase_node) = +{ + .function = sctp4_established_phase, + .name = "sctp4-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_ESTABLISHED_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_ESTABLISHED_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_established_phase_node, + sctp4_established_phase); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_established_phase_node) = +{ + .function = sctp6_established_phase, + .name = "sctp6-established", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_LISTEN_PHASE_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_LISTEN_PHASE_NEXT_##s] = n, + foreach_sctp_state_next +#undef _ + }, + .format_trace = format_sctp_rx_trace_short, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_established_phase_node, + sctp6_established_phase); + +/* + * This is the function executed first for the SCTP graph. + * It takes care of doing the initial message parsing and + * dispatch to the specialized function. + */ +always_inline uword +sctp46_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + u8 is_filtered; + sctp_main_t *tm = vnet_get_sctp_main (); + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + sctp_set_time_now (my_thread_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + int n_advance_bytes0, n_data_bytes0; + u32 bi0, fib_index0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_chunks_common_hdr_t *sctp_chunk_hdr = 0; + sctp_connection_t *sctp_conn; + transport_connection_t *tconn; + ip4_header_t *ip4_hdr; + ip6_header_t *ip6_hdr; + u32 error0 = SCTP_ERROR_NO_LISTENER, next0 = SCTP_INPUT_NEXT_DROP; + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + vnet_buffer (b0)->tcp.flags = 0; + fib_index0 = vnet_buffer (b0)->ip.fib_index; + + /* Checksum computed by ipx_local no need to compute again */ + + if (is_ip4) + { + ip4_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip4_next_header (ip4_hdr); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + n_advance_bytes0 = + (ip4_header_bytes (ip4_hdr) + + sizeof (sctp_payload_data_chunk_t)); + n_data_bytes0 = + clib_net_to_host_u16 (ip4_hdr->length) - n_advance_bytes0; + + tconn = session_lookup_connection_wt4 (fib_index0, + &ip4_hdr->dst_address, + &ip4_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &is_filtered); + } + else + { + ip6_hdr = vlib_buffer_get_current (b0); + sctp_hdr = ip6_next_header (ip6_hdr); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + sctp_chunk_hdr = &full_hdr->common_hdr; + + n_advance_bytes0 = sctp_header_bytes (); + n_data_bytes0 = + clib_net_to_host_u16 (ip6_hdr->payload_length) - + n_advance_bytes0; + n_advance_bytes0 += sizeof (ip6_hdr[0]); + + tconn = session_lookup_connection_wt6 (fib_index0, + &ip6_hdr->dst_address, + &ip6_hdr->src_address, + sctp_hdr->dst_port, + sctp_hdr->src_port, + TRANSPORT_PROTO_SCTP, + my_thread_index, + &is_filtered); + } + + /* Length check */ + if (PREDICT_FALSE (n_advance_bytes0 < 0)) + { + error0 = SCTP_ERROR_LENGTH; + goto done; + } + + sctp_conn = sctp_get_connection_from_transport (tconn); + vnet_sctp_common_hdr_params_net_to_host (sctp_chunk_hdr); + + u8 type = vnet_sctp_get_chunk_type (sctp_chunk_hdr); + +#if SCTP_DEBUG_STATE_MACHINE + u8 idx = sctp_pick_conn_idx_on_state (sctp_conn->state); +#endif + vnet_buffer (b0)->sctp.hdr_offset = + (u8 *) sctp_hdr - (u8 *) vlib_buffer_get_current (b0); + + /* Session exists */ + if (PREDICT_TRUE (0 != sctp_conn)) + { + /* Save connection index */ + vnet_buffer (b0)->sctp.connection_index = tconn->c_index; + vnet_buffer (b0)->sctp.data_offset = n_advance_bytes0; + vnet_buffer (b0)->sctp.data_len = n_data_bytes0; + + next0 = tm->dispatch_table[sctp_conn->state][type].next; + error0 = tm->dispatch_table[sctp_conn->state][type].error; + + SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u: " + "CURRENT_CONNECTION_STATE = %s," + "CHUNK_TYPE_RECEIVED = %s " + "NEXT_PHASE = %s", + sctp_conn->sub_conn + [idx].connection.c_index, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (type), + phase_to_string (next0)); + + if (type == DATA) + SCTP_ADV_DBG ("n_advance_bytes0 = %u, n_data_bytes0 = %u", + n_advance_bytes0, n_data_bytes0); + + } + else + { + if (is_filtered) + { + next0 = SCTP_INPUT_NEXT_DROP; + error0 = SCTP_ERROR_FILTERED; + } + else if ((is_ip4 && tm->punt_unknown4) || + (!is_ip4 && tm->punt_unknown6)) + { + next0 = SCTP_INPUT_NEXT_PUNT_PHASE; + error0 = SCTP_ERROR_PUNT; + } + else + { + next0 = SCTP_INPUT_NEXT_DROP; + error0 = SCTP_ERROR_NO_LISTENER; + } + SCTP_DBG_STATE_MACHINE ("sctp_conn == NULL, NEXT_PHASE = %s", + phase_to_string (next0)); + sctp_conn = 0; + } + + done: + b0->error = error0 ? node->errors[error0] : 0; + + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + sctp_rx_trace_t *t0 = + vlib_add_trace (vm, node, b0, sizeof (*t0)); + sctp_set_rx_trace_data (t0, sctp_conn, sctp_hdr, b0, is_ip4); + } + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + return from_frame->n_vectors; +} + +static uword +sctp4_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_input_dispatcher (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +sctp6_input_dispatcher (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_input_dispatcher (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_input_node) = +{ + .function = sctp4_input_dispatcher, + .name = "sctp4-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_INPUT_NEXT_##s] = n, + foreach_sctp4_input_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_input_node, sctp4_input_dispatcher); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_input_node) = +{ + .function = sctp6_input_dispatcher, + .name = "sctp6-input", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_INPUT_N_NEXT, + .next_nodes = + { +#define _(s,n) [SCTP_INPUT_NEXT_##s] = n, + foreach_sctp6_input_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_rx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_input_node, sctp6_input_dispatcher); + +vlib_node_registration_t sctp4_input_node; +vlib_node_registration_t sctp6_input_node; + +static void +sctp_dispatch_table_init (sctp_main_t * tm) +{ + int i, j; + for (i = 0; i < ARRAY_LEN (tm->dispatch_table); i++) + for (j = 0; j < ARRAY_LEN (tm->dispatch_table[i]); j++) + { + tm->dispatch_table[i][j].next = SCTP_INPUT_NEXT_DROP; + tm->dispatch_table[i][j].error = SCTP_ERROR_DISPATCH; + } + +#define _(t,f,n,e) \ +do { \ + tm->dispatch_table[SCTP_STATE_##t][f].next = (n); \ + tm->dispatch_table[SCTP_STATE_##t][f].error = (e); \ +} while (0) + + /* + * SCTP STATE-MACHINE states: + * + * _(CLOSED, "CLOSED") \ + * _(COOKIE_WAIT, "COOKIE_WAIT") \ + * _(COOKIE_ECHOED, "COOKIE_ECHOED") \ + * _(ESTABLISHED, "ESTABLISHED") \ + * _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \ + * _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \ + * _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ + * _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") + */ + //_(CLOSED, DATA, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED DATA chunk which requires special handling */ + _(CLOSED, INIT, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); + _(CLOSED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(CLOSED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(CLOSED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(CLOSED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(CLOSED, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(CLOSED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(CLOSED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(CLOSED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(CLOSED, COOKIE_ECHO, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(CLOSED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(CLOSED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(CLOSED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(CLOSED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(COOKIE_WAIT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE); + _(COOKIE_WAIT, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED INIT chunk which requires special handling */ + _(COOKIE_WAIT, INIT_ACK, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(COOKIE_WAIT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(COOKIE_WAIT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(COOKIE_WAIT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(COOKIE_WAIT, ABORT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); + _(COOKIE_WAIT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(COOKIE_WAIT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(COOKIE_WAIT, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(COOKIE_WAIT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(COOKIE_WAIT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(COOKIE_WAIT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(COOKIE_WAIT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(COOKIE_WAIT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(COOKIE_ECHOED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_NONE); + _(COOKIE_ECHOED, INIT, SCTP_INPUT_NEXT_RCV_PHASE, SCTP_ERROR_NONE); /* UNEXPECTED INIT chunk which requires special handling */ + _(COOKIE_ECHOED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(COOKIE_ECHOED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(COOKIE_ECHOED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(COOKIE_ECHOED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(COOKIE_ECHOED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(COOKIE_ECHOED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(COOKIE_ECHOED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(COOKIE_ECHOED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(COOKIE_ECHOED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(COOKIE_ECHOED, COOKIE_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(COOKIE_ECHOED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(COOKIE_ECHOED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(COOKIE_ECHOED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(ESTABLISHED, DATA, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(ESTABLISHED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(ESTABLISHED, SACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, HEARTBEAT, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(ESTABLISHED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_ESTABLISHED_PHASE, + SCTP_ERROR_NONE); + _(ESTABLISHED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(ESTABLISHED, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(ESTABLISHED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(ESTABLISHED, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(ESTABLISHED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(ESTABLISHED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(ESTABLISHED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(ESTABLISHED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(ESTABLISHED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(SHUTDOWN_PENDING, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_PENDING, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_PENDING, SACK, SCTP_INPUT_NEXT_LISTEN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, HEARTBEAT, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, HEARTBEAT_ACK, SCTP_INPUT_NEXT_LISTEN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_PENDING, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_PENDING, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(SHUTDOWN_PENDING, OPERATION_ERROR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_OPERATION_ERROR_VIOLATION); /* UNEXPECTED OPERATION_ERROR chunk */ + _(SHUTDOWN_PENDING, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(SHUTDOWN_PENDING, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_PENDING, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_PENDING, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_PENDING, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(SHUTDOWN_SENT, DATA, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED SACK chunk */ + _(SHUTDOWN_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_SENT, SHUTDOWN, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(SHUTDOWN_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(SHUTDOWN_RECEIVED, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION); /* UNEXPECTED DATA chunk */ + _(SHUTDOWN_RECEIVED, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_RECEIVED, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_RECEIVED, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_RECEIVED, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_RECEIVED, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_RECEIVED, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN_ACK, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + _(SHUTDOWN_RECEIVED, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(SHUTDOWN_RECEIVED, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_RECEIVED, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_RECEIVED, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_RECEIVED, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_COMPLETE_VIOLATION); /* UNEXPECTED SHUTDOWN_COMPLETE chunk */ + + _(SHUTDOWN_ACK_SENT, DATA, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_DATA_CHUNK_VIOLATION); /* UNEXPECTED DATA chunk */ + _(SHUTDOWN_ACK_SENT, INIT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_INIT_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_ACK_SENT, INIT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED INIT_ACK chunk */ + _(SHUTDOWN_ACK_SENT, SACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SACK_CHUNK_VIOLATION); /* UNEXPECTED INIT chunk */ + _(SHUTDOWN_ACK_SENT, HEARTBEAT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT chunk */ + _(SHUTDOWN_ACK_SENT, HEARTBEAT_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_HEARTBEAT_ACK_CHUNK_VIOLATION); /* UNEXPECTED HEARTBEAT_ACK chunk */ + _(SHUTDOWN_ACK_SENT, ABORT, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ABORT_CHUNK_VIOLATION); /* UNEXPECTED ABORT chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_SHUTDOWN_ACK_CHUNK_VIOLATION); /* UNEXPECTED SHUTDOWN_ACK chunk */ + _(SHUTDOWN_ACK_SENT, COOKIE_ECHO, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_COOKIE_ECHO_VIOLATION); /* UNEXPECTED COOKIE_ECHO chunk */ + _(SHUTDOWN_ACK_SENT, COOKIE_ACK, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ACK_DUP); /* UNEXPECTED COOKIE_ACK chunk */ + _(SHUTDOWN_ACK_SENT, ECNE, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_ECNE_VIOLATION); /* UNEXPECTED ECNE chunk */ + _(SHUTDOWN_ACK_SENT, CWR, SCTP_INPUT_NEXT_DROP, SCTP_ERROR_CWR_VIOLATION); /* UNEXPECTED CWR chunk */ + _(SHUTDOWN_ACK_SENT, SHUTDOWN_COMPLETE, SCTP_INPUT_NEXT_SHUTDOWN_PHASE, + SCTP_ERROR_NONE); + + /* TODO: Handle COOKIE ECHO when a TCB Exists */ + +#undef _ +} + +clib_error_t * +sctp_input_init (vlib_main_t * vm) +{ + clib_error_t *error = 0; + sctp_main_t *tm = vnet_get_sctp_main (); + + if ((error = vlib_call_init_function (vm, sctp_init))) + return error; + + /* Initialize dispatch table. */ + sctp_dispatch_table_init (tm); + + return error; +} + +VLIB_INIT_FUNCTION (sctp_input_init); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_output.c b/src/vnet/sctp/sctp_output.c new file mode 100644 index 00000000000..841444e20a3 --- /dev/null +++ b/src/vnet/sctp/sctp_output.c @@ -0,0 +1,1331 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/sctp/sctp.h> +#include <vnet/sctp/sctp_debug.h> +#include <vppinfra/random.h> + +vlib_node_registration_t sctp4_output_node; +vlib_node_registration_t sctp6_output_node; + +typedef enum _sctp_output_next +{ + SCTP_OUTPUT_NEXT_DROP, + SCTP_OUTPUT_NEXT_IP_LOOKUP, + SCTP_OUTPUT_N_NEXT +} sctp_output_next_t; + +#define foreach_sctp4_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip4-lookup") + +#define foreach_sctp6_output_next \ + _ (DROP, "error-drop") \ + _ (IP_LOOKUP, "ip6-lookup") + +static char *sctp_error_strings[] = { +#define sctp_error(n,s) s, +#include <vnet/sctp/sctp_error.def> +#undef sctp_error +}; + +typedef struct +{ + sctp_header_t sctp_header; + sctp_connection_t sctp_connection; +} sctp_tx_trace_t; + +/** + * Flush tx frame populated by retransmits and timer pops + */ +void +sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, u8 is_ip4) +{ + if (sctp_main.tx_frames[!is_ip4][thread_index]) + { + u32 next_index; + next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index; + vlib_put_frame_to_node (vm, next_index, + sctp_main.tx_frames[!is_ip4][thread_index]); + sctp_main.tx_frames[!is_ip4][thread_index] = 0; + } +} + +/** + * Flush ip lookup tx frames populated by timer pops + */ +always_inline void +sctp_flush_frame_to_ip_lookup (vlib_main_t * vm, u8 thread_index, u8 is_ip4) +{ + if (sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index]) + { + u32 next_index; + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + vlib_put_frame_to_node (vm, next_index, + sctp_main.ip_lookup_tx_frames[!is_ip4] + [thread_index]); + sctp_main.ip_lookup_tx_frames[!is_ip4][thread_index] = 0; + } +} + +/** + * Flush v4 and v6 sctp and ip-lookup tx frames for thread index + */ +void +sctp_flush_frames_to_output (u8 thread_index) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_flush_frame_to_output (vm, thread_index, 1); + sctp_flush_frame_to_output (vm, thread_index, 0); + sctp_flush_frame_to_ip_lookup (vm, thread_index, 1); + sctp_flush_frame_to_ip_lookup (vm, thread_index, 0); +} + +u32 +ip4_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip4_header_t * ip0) +{ + ip_csum_t checksum; + u32 ip_header_length, payload_length_host_byte_order; + u32 n_this_buffer, n_bytes_left, n_ip_bytes_this_buffer; + void *data_this_buffer; + + /* Initialize checksum with ip header. */ + ip_header_length = ip4_header_bytes (ip0); + payload_length_host_byte_order = + clib_net_to_host_u16 (ip0->length) - ip_header_length; + checksum = + clib_host_to_net_u32 (payload_length_host_byte_order + + (ip0->protocol << 16)); + + if (BITS (uword) == 32) + { + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->src_address, u32)); + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->dst_address, u32)); + } + else + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->src_address, u64)); + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + data_this_buffer = (void *) ip0 + ip_header_length; + n_ip_bytes_this_buffer = + p0->current_length - (((u8 *) ip0 - p0->data) - p0->current_data); + if (n_this_buffer + ip_header_length > n_ip_bytes_this_buffer) + { + n_this_buffer = n_ip_bytes_this_buffer > ip_header_length ? + n_ip_bytes_this_buffer - ip_header_length : 0; + } + while (1) + { + checksum = + ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + ASSERT (p0->flags & VLIB_BUFFER_NEXT_PRESENT); + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + return checksum; +} + +u32 +ip6_sctp_compute_checksum (vlib_main_t * vm, vlib_buffer_t * p0, + ip6_header_t * ip0, int *bogus_lengthp) +{ + ip_csum_t checksum; + u16 payload_length_host_byte_order; + u32 i, n_this_buffer, n_bytes_left; + u32 headers_size = sizeof (ip0[0]); + void *data_this_buffer; + + ASSERT (bogus_lengthp); + *bogus_lengthp = 0; + + /* Initialize checksum with ip header. */ + checksum = ip0->payload_length + clib_host_to_net_u16 (ip0->protocol); + payload_length_host_byte_order = clib_net_to_host_u16 (ip0->payload_length); + data_this_buffer = (void *) (ip0 + 1); + + for (i = 0; i < ARRAY_LEN (ip0->src_address.as_uword); i++) + { + checksum = ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0-> + src_address.as_uword + [i], uword)); + checksum = + ip_csum_with_carry (checksum, + clib_mem_unaligned (&ip0->dst_address.as_uword[i], + uword)); + } + + /* some icmp packets may come with a "router alert" hop-by-hop extension header (e.g., mldv2 packets) + * or UDP-Ping packets */ + if (PREDICT_FALSE (ip0->protocol == IP_PROTOCOL_IP6_HOP_BY_HOP_OPTIONS)) + { + u32 skip_bytes; + ip6_hop_by_hop_ext_t *ext_hdr = + (ip6_hop_by_hop_ext_t *) data_this_buffer; + + /* validate really icmp6 next */ + ASSERT ((ext_hdr->next_hdr == IP_PROTOCOL_SCTP)); + + skip_bytes = 8 * (1 + ext_hdr->n_data_u64s); + data_this_buffer = (void *) ((u8 *) data_this_buffer + skip_bytes); + + payload_length_host_byte_order -= skip_bytes; + headers_size += skip_bytes; + } + + n_bytes_left = n_this_buffer = payload_length_host_byte_order; + if (p0 && n_this_buffer + headers_size > p0->current_length) + n_this_buffer = + p0->current_length > + headers_size ? p0->current_length - headers_size : 0; + while (1) + { + checksum = + ip_incremental_checksum (checksum, data_this_buffer, n_this_buffer); + n_bytes_left -= n_this_buffer; + if (n_bytes_left == 0) + break; + + if (!(p0->flags & VLIB_BUFFER_NEXT_PRESENT)) + { + *bogus_lengthp = 1; + return 0xfefe; + } + p0 = vlib_get_buffer (vm, p0->next_buffer); + data_this_buffer = vlib_buffer_get_current (p0); + n_this_buffer = p0->current_length; + } + + return checksum; +} + +void +sctp_push_ip_hdr (sctp_main_t * tm, sctp_sub_connection_t * tc, + vlib_buffer_t * b) +{ + sctp_header_t *th = vlib_buffer_get_current (b); + vlib_main_t *vm = vlib_get_main (); + if (tc->c_is_ip4) + { + ip4_header_t *ih; + ih = vlib_buffer_push_ip4 (vm, b, &tc->c_lcl_ip4, + &tc->c_rmt_ip4, IP_PROTOCOL_SCTP, 1); + th->checksum = ip4_sctp_compute_checksum (vm, b, ih); + } + else + { + ip6_header_t *ih; + int bogus = ~0; + + ih = vlib_buffer_push_ip6 (vm, b, &tc->c_lcl_ip6, + &tc->c_rmt_ip6, IP_PROTOCOL_SCTP); + th->checksum = ip6_sctp_compute_checksum (vm, b, ih, &bogus); + ASSERT (!bogus); + } +} + +always_inline void * +sctp_reuse_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + if (b->flags & VLIB_BUFFER_NEXT_PRESENT) + vlib_buffer_free_one (vm, b->next_buffer); + /* Zero all flags but free list index and trace flag */ + b->flags &= VLIB_BUFFER_NEXT_PRESENT - 1; + b->current_data = 0; + b->current_length = 0; + b->total_length_not_including_first_buffer = 0; + vnet_buffer (b)->sctp.flags = 0; + + /* Leave enough space for headers */ + return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); +} + +always_inline void * +sctp_init_buffer (vlib_main_t * vm, vlib_buffer_t * b) +{ + ASSERT ((b->flags & VLIB_BUFFER_NEXT_PRESENT) == 0); + b->flags &= VLIB_BUFFER_FREE_LIST_INDEX_MASK; + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->total_length_not_including_first_buffer = 0; + vnet_buffer (b)->sctp.flags = 0; + VLIB_BUFFER_TRACE_TRAJECTORY_INIT (b); + /* Leave enough space for headers */ + return vlib_buffer_make_headroom (b, MAX_HDRS_LEN); +} + +always_inline int +sctp_alloc_tx_buffers (sctp_main_t * tm, u8 thread_index, u32 n_free_buffers) +{ + vlib_main_t *vm = vlib_get_main (); + u32 current_length = vec_len (tm->tx_buffers[thread_index]); + u32 n_allocated; + + vec_validate (tm->tx_buffers[thread_index], + current_length + n_free_buffers - 1); + n_allocated = + vlib_buffer_alloc (vm, &tm->tx_buffers[thread_index][current_length], + n_free_buffers); + _vec_len (tm->tx_buffers[thread_index]) = current_length + n_allocated; + /* buffer shortage, report failure */ + if (vec_len (tm->tx_buffers[thread_index]) == 0) + { + clib_warning ("out of buffers"); + return -1; + } + return 0; +} + +always_inline int +sctp_get_free_buffer_index (sctp_main_t * tm, u32 * bidx) +{ + u32 *my_tx_buffers; + u32 thread_index = vlib_get_thread_index (); + if (PREDICT_FALSE (vec_len (tm->tx_buffers[thread_index]) == 0)) + { + if (sctp_alloc_tx_buffers (tm, thread_index, VLIB_FRAME_SIZE)) + return -1; + } + my_tx_buffers = tm->tx_buffers[thread_index]; + *bidx = my_tx_buffers[vec_len (my_tx_buffers) - 1]; + _vec_len (my_tx_buffers) -= 1; + return 0; +} + +always_inline void +sctp_enqueue_to_output_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4, u8 flush) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + u32 thread_index = vlib_get_thread_index (); + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->error = 0; + + /* Decide where to send the packet */ + next_index = is_ip4 ? sctp4_output_node.index : sctp6_output_node.index; + sctp_trajectory_add_start (b, 2); + + /* Get frame to v4/6 output node */ + f = tm->tx_frames[!is_ip4][thread_index]; + if (!f) + { + f = vlib_get_frame_to_node (vm, next_index); + ASSERT (f); + tm->tx_frames[!is_ip4][thread_index] = f; + } + to_next = vlib_frame_vector_args (f); + to_next[f->n_vectors] = bi; + f->n_vectors += 1; + if (flush || f->n_vectors == VLIB_FRAME_SIZE) + { + vlib_put_frame_to_node (vm, next_index, f); + tm->tx_frames[!is_ip4][thread_index] = 0; + } +} + +always_inline void +sctp_enqueue_to_output_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + sctp_enqueue_to_output_i (vm, b, bi, is_ip4, 1); +} + +always_inline void +sctp_enqueue_to_ip_lookup_i (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4, u8 flush) +{ + sctp_main_t *tm = vnet_get_sctp_main (); + u32 thread_index = vlib_get_thread_index (); + u32 *to_next, next_index; + vlib_frame_t *f; + + b->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + b->error = 0; + + /* Default FIB for now */ + vnet_buffer (b)->sw_if_index[VLIB_TX] = 0; + + /* Send to IP lookup */ + next_index = is_ip4 ? ip4_lookup_node.index : ip6_lookup_node.index; + if (VLIB_BUFFER_TRACE_TRAJECTORY > 0) + { + b->pre_data[0] = 2; + b->pre_data[1] = next_index; + } + + f = tm->ip_lookup_tx_frames[!is_ip4][thread_index]; + if (!f) + { + f = vlib_get_frame_to_node (vm, next_index); + ASSERT (f); + tm->ip_lookup_tx_frames[!is_ip4][thread_index] = f; + } + + to_next = vlib_frame_vector_args (f); + to_next[f->n_vectors] = bi; + f->n_vectors += 1; + if (flush || f->n_vectors == VLIB_FRAME_SIZE) + { + vlib_put_frame_to_node (vm, next_index, f); + tm->ip_lookup_tx_frames[!is_ip4][thread_index] = 0; + } +} + +always_inline void +sctp_enqueue_to_ip_lookup (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 0); +} + +always_inline void +sctp_enqueue_to_ip_lookup_now (vlib_main_t * vm, vlib_buffer_t * b, u32 bi, + u8 is_ip4) +{ + sctp_enqueue_to_ip_lookup_i (vm, b, bi, is_ip4, 1); +} + +/** + * Convert buffer to INIT + */ +void +sctp_prepare_init_chunk (sctp_connection_t * sctp_conn, vlib_buffer_t * b) +{ + u32 random_seed = random_default_seed (); + u16 alloc_bytes = sizeof (sctp_init_chunk_t); + sctp_sub_connection_t *sub_conn = + &sctp_conn->sub_conn[sctp_pick_conn_idx_on_chunk (INIT)]; + + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + + if (sub_conn->c_is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_chunk_t *init_chunk = vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (init_chunk); + if (sub_conn->c_is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_chunk + pointer_offset; + ip4_param->address.as_u32 = sub_conn->c_lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_chunk + pointer_offset; + ip6_param->address.as_u64[0] = sub_conn->c_lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = sub_conn->c_lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + init_chunk->sctp_hdr.src_port = sub_conn->c_lcl_port; /* No need of host_to_net conversion, already in net-byte order */ + init_chunk->sctp_hdr.dst_port = sub_conn->c_rmt_port; /* No need of host_to_net conversion, already in net-byte order */ + init_chunk->sctp_hdr.checksum = 0; + /* The sender of an INIT must set the VERIFICATION_TAG to 0 as per RFC 4960 Section 8.5.1 */ + init_chunk->sctp_hdr.verification_tag = 0x0; + + vnet_sctp_set_chunk_type (&init_chunk->chunk_hdr, INIT); + vnet_sctp_set_chunk_length (&init_chunk->chunk_hdr, chunk_len); + vnet_sctp_common_hdr_params_host_to_net (&init_chunk->chunk_hdr); + + init_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + init_chunk->initiate_tag = clib_host_to_net_u32 (random_u32 (&random_seed)); + init_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + sctp_conn->local_tag = init_chunk->initiate_tag; + + vnet_buffer (b)->sctp.connection_index = sub_conn->c_c_index; + + SCTP_DBG_STATE_MACHINE ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + "CHUNK_TYPE = %s, " + "SRC_PORT = %u, DST_PORT = %u", + sub_conn->connection.c_index, + sctp_conn->state, + sctp_state_to_string (sctp_conn->state), + sctp_chunk_to_string (INIT), + init_chunk->sctp_hdr.src_port, + init_chunk->sctp_hdr.dst_port); +} + +u64 +sctp_compute_mac () +{ + return 0x0; +} + +void +sctp_prepare_cookie_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ACK); + + sctp_reuse_buffer (vm, b); + + u16 alloc_bytes = sizeof (sctp_cookie_ack_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_cookie_ack_chunk_t *cookie_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + cookie_ack_chunk->sctp_hdr.checksum = 0; + cookie_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; + cookie_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; + cookie_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + vnet_sctp_set_chunk_type (&cookie_ack_chunk->chunk_hdr, COOKIE_ACK); + vnet_sctp_set_chunk_length (&cookie_ack_chunk->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +void +sctp_prepare_cookie_echo_chunk (sctp_connection_t * tc, vlib_buffer_t * b, + sctp_state_cookie_param_t * sc) +{ + vlib_main_t *vm = vlib_get_main (); + u8 idx = sctp_pick_conn_idx_on_chunk (COOKIE_ECHO); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = sizeof (sctp_cookie_echo_chunk_t); + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + sctp_cookie_echo_chunk_t *cookie_echo_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + cookie_echo_chunk->sctp_hdr.checksum = 0; + cookie_echo_chunk->sctp_hdr.src_port = + tc->sub_conn[idx].connection.lcl_port; + cookie_echo_chunk->sctp_hdr.dst_port = + tc->sub_conn[idx].connection.rmt_port; + cookie_echo_chunk->sctp_hdr.verification_tag = tc->remote_tag; + vnet_sctp_set_chunk_type (&cookie_echo_chunk->chunk_hdr, COOKIE_ECHO); + vnet_sctp_set_chunk_length (&cookie_echo_chunk->chunk_hdr, chunk_len); + clib_memcpy (&(cookie_echo_chunk->cookie), sc, + sizeof (sctp_state_cookie_param_t)); + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +/** + * Convert buffer to INIT-ACK + */ +void +sctp_prepare_initack_chunk (sctp_connection_t * tc, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr) +{ + vlib_main_t *vm = vlib_get_main (); + sctp_ipv4_addr_param_t *ip4_param = 0; + sctp_ipv6_addr_param_t *ip6_param = 0; + u8 idx = sctp_pick_conn_idx_on_chunk (INIT_ACK); + u32 random_seed = random_default_seed (); + + sctp_reuse_buffer (vm, b); + + /* The minimum size of the message is given by the sctp_init_ack_chunk_t */ + u16 alloc_bytes = + sizeof (sctp_init_ack_chunk_t) + sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + /* Create room for variable-length fields in the INIT_ACK chunk */ + alloc_bytes += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (tc->sub_conn[idx].connection.is_ip4) + alloc_bytes += sizeof (sctp_ipv4_addr_param_t); + else + alloc_bytes += sizeof (sctp_ipv6_addr_param_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_init_ack_chunk_t *init_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + u16 pointer_offset = sizeof (sctp_init_ack_chunk_t); + + /* Create State Cookie parameter */ + sctp_state_cookie_param_t *state_cookie_param = + (sctp_state_cookie_param_t *) ((char *) init_ack_chunk + pointer_offset); + + state_cookie_param->param_hdr.type = + clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + state_cookie_param->param_hdr.length = + clib_host_to_net_u16 (sizeof (sctp_state_cookie_param_t)); + state_cookie_param->creation_time = clib_host_to_net_u32 (sctp_time_now ()); + state_cookie_param->cookie_lifespan = + clib_host_to_net_u32 (SCTP_VALID_COOKIE_LIFE); + state_cookie_param->mac = clib_host_to_net_u64 (sctp_compute_mac ()); + + pointer_offset += sizeof (sctp_state_cookie_param_t); + + if (PREDICT_TRUE (ip4_addr != NULL)) + { + sctp_ipv4_addr_param_t *ipv4_addr = + (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + + ipv4_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + ipv4_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE_LENGTH); + ipv4_addr->address.as_u32 = ip4_addr->as_u32; + + pointer_offset += SCTP_IPV4_ADDRESS_TYPE_LENGTH; + } + if (PREDICT_TRUE (ip6_addr != NULL)) + { + sctp_ipv6_addr_param_t *ipv6_addr = + (sctp_ipv6_addr_param_t *) init_ack_chunk + + sizeof (sctp_init_chunk_t) + pointer_offset; + + ipv6_addr->param_hdr.type = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + ipv6_addr->param_hdr.length = + clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE_LENGTH); + ipv6_addr->address.as_u64[0] = ip6_addr->as_u64[0]; + ipv6_addr->address.as_u64[1] = ip6_addr->as_u64[1]; + + pointer_offset += SCTP_IPV6_ADDRESS_TYPE_LENGTH; + } + + if (tc->sub_conn[idx].connection.is_ip4) + { + ip4_param = (sctp_ipv4_addr_param_t *) init_ack_chunk + pointer_offset; + ip4_param->address.as_u32 = + tc->sub_conn[idx].connection.lcl_ip.ip4.as_u32; + + pointer_offset += sizeof (sctp_ipv4_addr_param_t); + } + else + { + ip6_param = (sctp_ipv6_addr_param_t *) init_ack_chunk + pointer_offset; + ip6_param->address.as_u64[0] = + tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[0]; + ip6_param->address.as_u64[1] = + tc->sub_conn[idx].connection.lcl_ip.ip6.as_u64[1]; + + pointer_offset += sizeof (sctp_ipv6_addr_param_t); + } + + /* src_port & dst_port are already in network byte-order */ + init_ack_chunk->sctp_hdr.checksum = 0; + init_ack_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; + init_ack_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; + /* the tc->verification_tag is already in network byte-order (being a copy of the init_tag coming with the INIT chunk) */ + init_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + + vnet_sctp_set_chunk_type (&init_ack_chunk->chunk_hdr, INIT_ACK); + vnet_sctp_set_chunk_length (&init_ack_chunk->chunk_hdr, chunk_len); + + init_ack_chunk->initiate_tag = + clib_host_to_net_u32 (random_u32 (&random_seed)); + /* As per RFC 4960, the initial_tsn may be the same value as the initiate_tag */ + init_ack_chunk->initial_tsn = init_ack_chunk->initiate_tag; + init_ack_chunk->a_rwnd = clib_host_to_net_u32 (DEFAULT_A_RWND); + init_ack_chunk->inboud_streams_count = + clib_host_to_net_u16 (INBOUND_STREAMS_COUNT); + init_ack_chunk->outbound_streams_count = + clib_host_to_net_u16 (OUTBOUND_STREAMS_COUNT); + + tc->local_tag = init_ack_chunk->initiate_tag; + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +/** + * Convert buffer to SHUTDOWN + */ +void +sctp_prepare_shutdown_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + + b = sctp_reuse_buffer (vm, b); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_shutdown_association_chunk_t *shutdown_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_chunk->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; + shutdown_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; + shutdown_chunk->sctp_hdr.verification_tag = tc->remote_tag; + vnet_sctp_set_chunk_type (&shutdown_chunk->chunk_hdr, SHUTDOWN); + vnet_sctp_set_chunk_length (&shutdown_chunk->chunk_hdr, chunk_len); + + shutdown_chunk->cumulative_tsn_ack = tc->rcv_las; + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +/* + * Send SHUTDOWN + */ +void +sctp_send_shutdown (sctp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (sctp_check_outstanding_data_chunks (tc) > 0) + return; + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_shutdown_chunk (tc, b); + + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN); + sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); + sctp_enqueue_to_output_now (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); +} + +/** + * Convert buffer to SHUTDOWN_ACK + */ +void +sctp_prepare_shutdown_ack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +{ + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + sctp_shutdown_ack_chunk_t *shutdown_ack_chunk = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_ack_chunk->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_ack_chunk->sctp_hdr.src_port = + tc->sub_conn[idx].connection.lcl_port; + shutdown_ack_chunk->sctp_hdr.dst_port = + tc->sub_conn[idx].connection.rmt_port; + shutdown_ack_chunk->sctp_hdr.verification_tag = tc->remote_tag; + + vnet_sctp_set_chunk_type (&shutdown_ack_chunk->chunk_hdr, SHUTDOWN_ACK); + vnet_sctp_set_chunk_length (&shutdown_ack_chunk->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +/* + * Send SHUTDOWN_ACK + */ +void +sctp_send_shutdown_ack (sctp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (sctp_check_outstanding_data_chunks (tc) > 0) + return; + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_shutdown_ack_chunk (tc, b); + + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_ACK); + sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (tc, idx, SCTP_TIMER_T2_SHUTDOWN, SCTP_RTO_INIT); + tc->state = SCTP_STATE_SHUTDOWN_ACK_SENT; +} + +/** + * Convert buffer to SACK + */ +void +sctp_prepare_sack_chunk (sctp_connection_t * tc, vlib_buffer_t * b) +{ + vlib_main_t *vm = vlib_get_main (); + u8 idx = sctp_pick_conn_idx_on_chunk (SACK); + + sctp_reuse_buffer (vm, b); + + u16 alloc_bytes = sizeof (sctp_selective_ack_chunk_t); + + /* As per RFC 4960 the chunk_length value does NOT contemplate + * the size of the first header (see sctp_header_t) and any padding + */ + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + sctp_selective_ack_chunk_t *sack = vlib_buffer_push_uninit (b, alloc_bytes); + + sack->sctp_hdr.checksum = 0; + sack->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; + sack->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; + sack->sctp_hdr.verification_tag = tc->remote_tag; + vnet_sctp_set_chunk_type (&sack->chunk_hdr, SACK); + vnet_sctp_set_chunk_length (&sack->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +/** + * Convert buffer to SHUTDOWN_COMPLETE + */ +void +sctp_prepare_shutdown_complete_chunk (sctp_connection_t * tc, + vlib_buffer_t * b) +{ + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); + u16 alloc_bytes = sizeof (sctp_shutdown_association_chunk_t); + alloc_bytes += vnet_sctp_calculate_padding (alloc_bytes); + + u16 chunk_len = alloc_bytes - sizeof (sctp_header_t); + + sctp_shutdown_complete_chunk_t *shutdown_complete = + vlib_buffer_push_uninit (b, alloc_bytes); + + shutdown_complete->sctp_hdr.checksum = 0; + /* No need of host_to_net conversion, already in net-byte order */ + shutdown_complete->sctp_hdr.src_port = + tc->sub_conn[idx].connection.lcl_port; + shutdown_complete->sctp_hdr.dst_port = + tc->sub_conn[idx].connection.rmt_port; + shutdown_complete->sctp_hdr.verification_tag = tc->remote_tag; + + vnet_sctp_set_chunk_type (&shutdown_complete->chunk_hdr, SHUTDOWN_COMPLETE); + vnet_sctp_set_chunk_length (&shutdown_complete->chunk_hdr, chunk_len); + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +void +sctp_send_shutdown_complete (sctp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + sctp_init_buffer (vm, b); + sctp_prepare_shutdown_complete_chunk (tc, b); + + u8 idx = sctp_pick_conn_idx_on_chunk (SHUTDOWN_COMPLETE); + sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup (vm, b, bi, tc->sub_conn[idx].connection.is_ip4); + + tc->state = SCTP_STATE_CLOSED; +} + + +/* + * Send INIT + */ +void +sctp_send_init (sctp_connection_t * tc) +{ + vlib_buffer_t *b; + u32 bi; + sctp_main_t *tm = vnet_get_sctp_main (); + vlib_main_t *vm = vlib_get_main (); + + if (PREDICT_FALSE (sctp_get_free_buffer_index (tm, &bi))) + return; + + b = vlib_get_buffer (vm, bi); + u8 idx = sctp_pick_conn_idx_on_chunk (INIT); + + sctp_init_buffer (vm, b); + sctp_prepare_init_chunk (tc, b); + + /* Measure RTT with this */ + tc->rtt_ts = sctp_time_now (); + tc->rtt_seq = tc->snd_nxt; + tc->rto_boff = 0; + + sctp_push_ip_hdr (tm, &tc->sub_conn[idx], b); + sctp_enqueue_to_ip_lookup_now (vm, b, bi, tc->sub_conn[idx].c_is_ip4); + + /* Start the T1_INIT timer */ + sctp_timer_set (tc, idx, SCTP_TIMER_T1_INIT, SCTP_RTO_INIT); + /* Change state to COOKIE_WAIT */ + tc->state = SCTP_STATE_COOKIE_WAIT; +} + +always_inline u8 +sctp_in_cong_recovery (sctp_connection_t * sctp_conn) +{ + return 0; +} + +/** + * Push SCTP header and update connection variables + */ +static void +sctp_push_hdr_i (sctp_connection_t * tc, vlib_buffer_t * b, + sctp_state_t next_state) +{ + u8 idx = sctp_pick_conn_idx_on_chunk (DATA); + + u16 data_len = + b->current_length + b->total_length_not_including_first_buffer; + ASSERT (!b->total_length_not_including_first_buffer + || (b->flags & VLIB_BUFFER_NEXT_PRESENT)); + + SCTP_ADV_DBG_OUTPUT ("b->current_length = %u, " + "b->current_data = %p " + "data_len = %u", + b->current_length, b->current_data, data_len); + + u16 bytes_to_add = sizeof (sctp_payload_data_chunk_t); + u16 chunk_length = data_len + bytes_to_add - sizeof (sctp_header_t); + + bytes_to_add += vnet_sctp_calculate_padding (bytes_to_add + data_len); + + sctp_payload_data_chunk_t *data_chunk = + vlib_buffer_push_uninit (b, bytes_to_add); + + data_chunk->sctp_hdr.checksum = 0; + data_chunk->sctp_hdr.src_port = tc->sub_conn[idx].connection.lcl_port; + data_chunk->sctp_hdr.dst_port = tc->sub_conn[idx].connection.rmt_port; + data_chunk->sctp_hdr.verification_tag = tc->remote_tag; + + data_chunk->tsn = clib_host_to_net_u32 (0); + data_chunk->stream_id = clib_host_to_net_u16 (0); + data_chunk->stream_seq = clib_host_to_net_u16 (0); + + vnet_sctp_set_chunk_type (&data_chunk->chunk_hdr, DATA); + vnet_sctp_set_chunk_length (&data_chunk->chunk_hdr, chunk_length); + + SCTP_ADV_DBG_OUTPUT ("POINTER_WITH_DATA = %p, DATA_OFFSET = %u", + b->data, b->current_data); + + vnet_buffer (b)->sctp.connection_index = + tc->sub_conn[idx].connection.c_index; +} + +u32 +sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b) +{ + sctp_connection_t *tc = sctp_get_connection_from_transport (tconn); + sctp_push_hdr_i (tc, b, SCTP_STATE_ESTABLISHED); + + if (tc->rtt_ts == 0 && !sctp_in_cong_recovery (tc)) + { + tc->rtt_ts = sctp_time_now (); + tc->rtt_seq = tc->snd_nxt; + } + sctp_trajectory_add_start (b0, 3); + + return 0; + +} + +always_inline uword +sctp46_output_inline (vlib_main_t * vm, + vlib_node_runtime_t * node, + vlib_frame_t * from_frame, int is_ip4) +{ + u32 n_left_from, next_index, *from, *to_next; + u32 my_thread_index = vm->thread_index; + + from = vlib_frame_vector_args (from_frame); + n_left_from = from_frame->n_vectors; + next_index = node->cached_next_index; + sctp_set_time_now (my_thread_index); + + while (n_left_from > 0) + { + u32 n_left_to_next; + + vlib_get_next_frame (vm, node, next_index, to_next, n_left_to_next); + + while (n_left_from > 0 && n_left_to_next > 0) + { + u32 bi0; + vlib_buffer_t *b0; + sctp_header_t *sctp_hdr = 0; + sctp_connection_t *tc0; + sctp_tx_trace_t *t0; + sctp_header_t *th0 = 0; + u32 error0 = SCTP_ERROR_PKTS_SENT, next0 = + SCTP_OUTPUT_NEXT_IP_LOOKUP; + +#if SCTP_DEBUG_STATE_MACHINE + u16 packet_length = 0; +#endif + + bi0 = from[0]; + to_next[0] = bi0; + from += 1; + to_next += 1; + n_left_from -= 1; + n_left_to_next -= 1; + + b0 = vlib_get_buffer (vm, bi0); + tc0 = sctp_connection_get (vnet_buffer (b0)->sctp.connection_index, + my_thread_index); + + if (PREDICT_FALSE (tc0 == 0)) + { + error0 = SCTP_ERROR_INVALID_CONNECTION; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + + u8 idx = sctp_pick_conn_idx_on_state (tc0->state); + + th0 = vlib_buffer_get_current (b0); + + if (is_ip4) + { + ip4_header_t *th0 = vlib_buffer_push_ip4 (vm, + b0, + &tc0->sub_conn + [idx].connection. + lcl_ip.ip4, + &tc0-> + sub_conn + [idx].connection. + rmt_ip.ip4, + IP_PROTOCOL_SCTP, 1); + + u32 checksum = ip4_sctp_compute_checksum (vm, b0, th0); + + sctp_hdr = ip4_next_header (th0); + sctp_hdr->checksum = checksum; + + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + th0->checksum = 0; + +#if SCTP_DEBUG_STATE_MACHINE + packet_length = clib_net_to_host_u16 (th0->length); +#endif + } + else + { + ip6_header_t *ih0; + ih0 = vlib_buffer_push_ip6 (vm, + b0, + &tc0->sub_conn[idx]. + connection.lcl_ip.ip6, + &tc0->sub_conn[idx]. + connection.rmt_ip.ip6, + IP_PROTOCOL_SCTP); + + int bogus = ~0; + u32 checksum = ip6_sctp_compute_checksum (vm, b0, ih0, &bogus); + ASSERT (!bogus); + + sctp_hdr = ip6_next_header (ih0); + sctp_hdr->checksum = checksum; + + vnet_buffer (b0)->l3_hdr_offset = (u8 *) ih0 - b0->data; + vnet_buffer (b0)->l4_hdr_offset = (u8 *) th0 - b0->data; + th0->checksum = 0; + +#if SCTP_DEBUG_STATE_MACHINE + packet_length = clib_net_to_host_u16 (ih0->payload_length); +#endif + } + + u8 is_valid = + (tc0->sub_conn[idx].connection.lcl_port == + sctp_hdr->src_port + || tc0->sub_conn[idx].connection.lcl_port == + sctp_hdr->dst_port) + && (tc0->sub_conn[idx].connection.rmt_port == + sctp_hdr->dst_port + || tc0->sub_conn[idx].connection.rmt_port == + sctp_hdr->src_port); + + sctp_full_hdr_t *full_hdr = (sctp_full_hdr_t *) sctp_hdr; + u8 chunk_type = vnet_sctp_get_chunk_type (&full_hdr->common_hdr); + + if (!is_valid) + { + SCTP_DBG_STATE_MACHINE ("BUFFER IS INCORRECT: conn_index = %u, " + "packet_length = %u, " + "chunk_type = %u [%s], " + "connection.lcl_port = %u, sctp_hdr->src_port = %u, " + "connection.rmt_port = %u, sctp_hdr->dst_port = %u", + tc0->sub_conn + [idx].connection.c_index, packet_length, + chunk_type, + sctp_chunk_to_string (chunk_type), + tc0->sub_conn[idx].connection.lcl_port, + sctp_hdr->src_port, + tc0->sub_conn[idx].connection.rmt_port, + sctp_hdr->dst_port); + + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + + SCTP_DBG_STATE_MACHINE + ("CONN_INDEX = %u, CURR_CONN_STATE = %u (%s), " + "CHUNK_TYPE = %s, " "SRC_PORT = %u, DST_PORT = %u", + tc0->sub_conn[idx].connection.c_index, + tc0->state, sctp_state_to_string (tc0->state), + sctp_chunk_to_string (chunk_type), full_hdr->hdr.src_port, + full_hdr->hdr.dst_port); + + if (chunk_type == DATA) + SCTP_ADV_DBG_OUTPUT ("PACKET_LENGTH = %u", packet_length); + + /* Let's make sure the state-machine does not send anything crazy */ + switch (tc0->state) + { + case SCTP_STATE_CLOSED: + { + if (chunk_type != INIT && chunk_type != INIT_ACK) + { + SCTP_DBG_STATE_MACHINE + ("Sending the wrong chunk (%s) based on state-machine status (%s)", + sctp_chunk_to_string (chunk_type), + sctp_state_to_string (tc0->state)); + + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + break; + } + case SCTP_STATE_ESTABLISHED: + if (chunk_type != DATA && chunk_type != HEARTBEAT && + chunk_type != HEARTBEAT_ACK && chunk_type != SACK && + chunk_type != COOKIE_ACK && chunk_type != SHUTDOWN) + { + SCTP_DBG_STATE_MACHINE + ("Sending the wrong chunk (%s) based on state-machine status (%s)", + sctp_chunk_to_string (chunk_type), + sctp_state_to_string (tc0->state)); + + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + break; + case SCTP_STATE_COOKIE_WAIT: + if (chunk_type != COOKIE_ECHO) + { + SCTP_DBG_STATE_MACHINE + ("Sending the wrong chunk (%s) based on state-machine status (%s)", + sctp_chunk_to_string (chunk_type), + sctp_state_to_string (tc0->state)); + + error0 = SCTP_ERROR_UNKOWN_CHUNK; + next0 = SCTP_OUTPUT_NEXT_DROP; + goto done; + } + /* Change state */ + tc0->state = SCTP_STATE_COOKIE_ECHOED; + break; + default: + SCTP_DBG_STATE_MACHINE + ("Sending chunk (%s) based on state-machine status (%s)", + sctp_chunk_to_string (chunk_type), + sctp_state_to_string (tc0->state)); + break; + } + + if (chunk_type == SHUTDOWN) + { + /* Start the SCTP_TIMER_T2_SHUTDOWN timer */ + sctp_timer_set (tc0, idx, SCTP_TIMER_T2_SHUTDOWN, + SCTP_RTO_INIT); + tc0->state = SCTP_STATE_SHUTDOWN_SENT; + } + + vnet_buffer (b0)->sw_if_index[VLIB_RX] = 0; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = ~0; + + b0->flags |= VNET_BUFFER_F_LOCALLY_ORIGINATED; + + SCTP_DBG_STATE_MACHINE ("CONNECTION_INDEX = %u, " + "NEW_STATE = %s, " + "CHUNK_SENT = %s", + tc0->sub_conn[idx].connection.c_index, + sctp_state_to_string (tc0->state), + sctp_chunk_to_string (chunk_type)); + + vnet_sctp_common_hdr_params_host_to_net (&full_hdr->common_hdr); + + done: + b0->error = node->errors[error0]; + if (PREDICT_FALSE (b0->flags & VLIB_BUFFER_IS_TRACED)) + { + t0 = vlib_add_trace (vm, node, b0, sizeof (*t0)); + if (th0) + { + clib_memcpy (&t0->sctp_header, th0, + sizeof (t0->sctp_header)); + } + else + { + memset (&t0->sctp_header, 0, sizeof (t0->sctp_header)); + } + clib_memcpy (&t0->sctp_connection, tc0, + sizeof (t0->sctp_connection)); + } + + vlib_validate_buffer_enqueue_x1 (vm, node, next_index, to_next, + n_left_to_next, bi0, next0); + } + + vlib_put_next_frame (vm, node, next_index, n_left_to_next); + } + + return from_frame->n_vectors; +} + +static uword +sctp4_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_output_inline (vm, node, from_frame, 1 /* is_ip4 */ ); +} + +static uword +sctp6_output (vlib_main_t * vm, vlib_node_runtime_t * node, + vlib_frame_t * from_frame) +{ + return sctp46_output_inline (vm, node, from_frame, 0 /* is_ip4 */ ); +} + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp4_output_node) = +{ + .function = sctp4_output,.name = "sctp4-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n, + foreach_sctp4_output_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_tx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp4_output_node, sctp4_output); + +/* *INDENT-OFF* */ +VLIB_REGISTER_NODE (sctp6_output_node) = +{ + .function = sctp6_output, + .name = "sctp6-output", + /* Takes a vector of packets. */ + .vector_size = sizeof (u32), + .n_errors = SCTP_N_ERROR, + .error_strings = sctp_error_strings, + .n_next_nodes = SCTP_OUTPUT_N_NEXT, + .next_nodes = { +#define _(s,n) [SCTP_OUTPUT_NEXT_##s] = n, + foreach_sctp6_output_next +#undef _ + }, + .format_buffer = format_sctp_header, + .format_trace = format_sctp_tx_trace, +}; +/* *INDENT-ON* */ + +VLIB_NODE_FUNCTION_MULTIARCH (sctp6_output_node, sctp6_output); + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_packet.h b/src/vnet/sctp/sctp_packet.h new file mode 100644 index 00000000000..4c358db6d3b --- /dev/null +++ b/src/vnet/sctp/sctp_packet.h @@ -0,0 +1,1445 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sctp_packet_h +#define included_vnet_sctp_packet_h + +#include <stdbool.h> + +#include <vnet/ip/ip4_packet.h> +#include <vnet/ip/ip6_packet.h> + +/* + * As per RFC 4960 + * https://tools.ietf.org/html/rfc4960 + */ + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Source Port Number | Destination Port Number | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Verification Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Checksum | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + /* + * This is the SCTP sender's port number. It can be used by the + * receiver in combination with the source IP address, the SCTP + * destination port, and possibly the destination IP address to + * identify the association to which this packet belongs. + * The port number 0 MUST NOT be used. + */ + u16 src_port; + + /* + * This is the SCTP port number to which this packet is destined. + * The receiving host will use this port number to de-multiplex the + * SCTP packet to the correct receiving endpoint/application. + * The port number 0 MUST NOT be used. + */ + u16 dst_port; + + /* + * The receiver of this packet uses the Verification Tag to validate + * the sender of this SCTP packet. On transmit, the value of this + * Verification Tag MUST be set to the value of the Initiate Tag + * received from the peer endpoint during the association + * initialization, with the following exceptions: + * - A packet containing an INIT chunk MUST have a zero Verification + * Tag. + * - A packet containing a SHUTDOWN COMPLETE chunk with the T bit + * set MUST have the Verification Tag copied from the packet with + * the SHUTDOWN ACK chunk. + * - A packet containing an ABORT chunk may have the verification tag + * copied from the packet that caused the ABORT to be sent. + * An INIT chunk MUST be the only chunk in the SCTP packet carrying it. + */ + u32 verification_tag; + + /* + * This field contains the checksum of this SCTP packet. + * SCTP uses the CRC32c algorithm. + */ + u32 checksum; + +} sctp_header_t; + +always_inline void +vnet_set_sctp_src_port (sctp_header_t * h, u16 src_port) +{ + h->src_port = clib_host_to_net_u16 (src_port); +} + +always_inline u16 +vnet_get_sctp_src_port (sctp_header_t * h) +{ + return (clib_net_to_host_u16 (h->src_port)); +} + +always_inline void +vnet_set_sctp_dst_port (sctp_header_t * h, u16 dst_port) +{ + h->dst_port = clib_host_to_net_u16 (dst_port); +} + +always_inline u16 +vnet_get_sctp_dst_port (sctp_header_t * h) +{ + return (clib_net_to_host_u16 (h->dst_port)); +} + +always_inline void +vnet_set_sctp_verification_tag (sctp_header_t * h, u32 verification_tag) +{ + h->verification_tag = clib_host_to_net_u32 (verification_tag); +} + +always_inline u32 +vnet_get_sctp_verification_tag (sctp_header_t * h) +{ + return (clib_net_to_host_u32 (h->verification_tag)); +} + +always_inline void +vnet_set_sctp_checksum (sctp_header_t * h, u32 checksum) +{ + h->checksum = clib_host_to_net_u32 (checksum); +} + +always_inline u32 +vnet_get_sctp_checksum (sctp_header_t * h) +{ + return (clib_net_to_host_u32 (h->checksum)); +} + +/* + * Multiple chunks can be bundled into one SCTP packet up to the MTU + * size, except for the INIT, INIT ACK, and SHUTDOWN COMPLETE chunks. + * These chunks MUST NOT be bundled with any other chunk in a packet. + * + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Common Header | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk #1 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk #n | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ + +typedef enum +{ + DATA = 0, + INIT, + INIT_ACK, + SACK, + HEARTBEAT, + HEARTBEAT_ACK, + ABORT, + SHUTDOWN, + SHUTDOWN_ACK, + OPERATION_ERROR, + COOKIE_ECHO, + COOKIE_ACK, + ECNE, + CWR, + SHUTDOWN_COMPLETE +} sctp_chunk_type; + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Chunk Type | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + /* + * This field identifies the type of information contained in the + * Chunk Value field. It takes a value from 0 to 254. + * The value of 255 is reserved for future use as an extension field. + * + * The values of Chunk Types are defined as follows: + * ID Value Chunk Type + * ----- ---------- + * 0 - Payload Data (DATA) + * 1 - Initiation (INIT) + * 2 - Initiation Acknowledgement (INIT ACK) + * 3 - Selective Acknowledgement (SACK) + * 4 - Heartbeat Request (HEARTBEAT) + * 5 - Heartbeat Acknowledgement (HEARTBEAT ACK) + * 6 - Abort (ABORT) + * 7 - Shutdown (SHUTDOWN) + * 8 - Shutdown Acknowledgement (SHUTDOWN ACK) + * 9 - Operation Error (ERROR) + * 10 - State Cookie (COOKIE ECHO) + * 11 - Cookie Acknowledgement (COOKIE ACK) + * 12 - Reserved for Explicit Congestion Notification Echo (ECNE) + * 13 - Reserved for Congestion Window Reduced (CWR) + * 14 - Shutdown Complete (SHUTDOWN COMPLETE) + * 15 to 62 - available + * 63 - reserved for IETF-defined Chunk Extensions + * 64 to 126 - available + * 127 - reserved for IETF-defined Chunk Extensions + * 128 to 190 - available + * 191 - reserved for IETF-defined Chunk Extensions + * 192 to 254 - available + * 255 - reserved for IETF-defined Chunk Extensions + * + * Chunk Types are encoded such that the highest-order 2 bits specify + * the action that must be taken if the processing endpoint does not + * recognize the Chunk Type. + * 00 - Stop processing this SCTP packet and discard it, do not + * process any further chunks within it. + * 01 - Stop processing this SCTP packet and discard it, do not + * process any further chunks within it, and report the + * unrecognized chunk in an 'Unrecognized Chunk Type'. + * 10 - Skip this chunk and continue processing. + * 11 - Skip this chunk and continue processing, but report in an + * ERROR chunk using the 'Unrecognized Chunk Type' cause of error. + * + * Note: The ECNE and CWR chunk types are reserved for future use of + * Explicit Congestion Notification (ECN); + */ + //u8 type; + + /* + * The usage of these bits depends on the Chunk type as given by the + * Chunk Type field. Unless otherwise specified, they are set to 0 on + * transmit and are ignored on receipt. + */ + //u8 flags; + + /* + * This value represents the size of the chunk in bytes, including + * the Chunk Type, Chunk Flags, Chunk Length, and Chunk Value fields. + * Therefore, if the Chunk Value field is zero-length, the Length + * field will be set to 4. + * The Chunk Length field does not count any chunk padding. + * Chunks (including Type, Length, and Value fields) are padded out + * by the sender with all zero bytes to be a multiple of 4 bytes + * long. This padding MUST NOT be more than 3 bytes in total. The + * Chunk Length value does not include terminating padding of the + * chunk. However, it does include padding of any variable-length + * parameter except the last parameter in the chunk. The receiver + * MUST ignore the padding. + * + * Note: A robust implementation should accept the chunk whether or + * not the final padding has been included in the Chunk Length. + */ + //u16 length; + + u32 params; + +} sctp_chunks_common_hdr_t; + +typedef struct +{ + sctp_header_t hdr; + sctp_chunks_common_hdr_t common_hdr; + +} sctp_full_hdr_t; + +#define CHUNK_TYPE_MASK 0xFF000000 +#define CHUNK_TYPE_SHIFT 24 + +#define CHUNK_FLAGS_MASK 0x00FF0000 +#define CHUNK_FLAGS_SHIFT 16 + +#define CHUNK_LENGTH_MASK 0x0000FFFF +#define CHUNK_LENGTH_SHIFT 0 + +always_inline void +vnet_sctp_common_hdr_params_host_to_net (sctp_chunks_common_hdr_t * h) +{ + h->params = clib_host_to_net_u32 (h->params); +} + +always_inline void +vnet_sctp_common_hdr_params_net_to_host (sctp_chunks_common_hdr_t * h) +{ + h->params = clib_net_to_host_u32 (h->params); +} + +always_inline void +vnet_sctp_set_chunk_type (sctp_chunks_common_hdr_t * h, sctp_chunk_type t) +{ + h->params &= ~(CHUNK_TYPE_MASK); + h->params |= (t << CHUNK_TYPE_SHIFT) & CHUNK_TYPE_MASK; +} + +always_inline u8 +vnet_sctp_get_chunk_type (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_TYPE_MASK) >> CHUNK_TYPE_SHIFT); +} + +always_inline void +vnet_sctp_set_chunk_length (sctp_chunks_common_hdr_t * h, u16 length) +{ + h->params &= ~(CHUNK_LENGTH_MASK); + h->params |= (length << CHUNK_LENGTH_SHIFT) & CHUNK_LENGTH_MASK; +} + +always_inline u16 +vnet_sctp_get_chunk_length (sctp_chunks_common_hdr_t * h) +{ + return ((h->params & CHUNK_LENGTH_MASK) >> CHUNK_LENGTH_SHIFT); +} + +/* + * Payload chunk + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 0 | Reserved|U|B|E| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Stream Identifier S | Stream Sequence Number n | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Payload Protocol Identifier | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / User Data (seq n of Stream S) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + /* + * Type (8 bits): 0 + * Flags (8 bits): + * -- Reserved (5 bits): all 0s + * -- U bit + * -- B bit + * -- E bit + * Length (16 bits): This field indicates the length of the DATA chunk in + * bytes from the beginning of the type field to the end of the User Data + * field excluding any padding. + * A DATA chunk with one byte of user data will have Length set to 17 + * (indicating 17 bytes). A DATA chunk with a User Data field of length L + * will have the Length field set to (16 + L) (indicating 16+L bytes) where + * L MUST be greater than 0. + */ + + /* + * Fragment Description Table: + * + * B E Description + * ============================================================ + * | 1 0 | First piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 0 0 | Middle piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 0 1 | Last piece of a fragmented user message | + * +----------------------------------------------------------+ + * | 1 1 | Unfragmented message | + * ============================================================ + */ + sctp_chunks_common_hdr_t chunk_hdr; + + /* + * This value represents the TSN for this DATA chunk. + * The valid range of TSN is from 0 to 4294967295 (2**32 - 1). + * TSN wraps back to 0 after reaching 4294967295. + */ + u32 tsn; + + /* + * Identifies the stream to which the following user data belongs. + */ + u16 stream_id; + + /* + * This value represents the Stream Sequence Number of the following user data + * within the stream S. Valid range is 0 to 65535. + * When a user message is fragmented by SCTP for transport, the same Stream + * Sequence Number MUST be carried in each of the fragments of the message. + */ + u16 stream_seq; + + /* + * This value represents an application (or upper layer) specified protocol + * identifier. This value is passed to SCTP by its upper layer and sent to its + * peer. This identifier is not used by SCTP but can be used by certain network + * entities, as well as by the peer application, to identify the type of + * information being carried in this DATA chunk. This field must be sent even + * in fragmented DATA chunks (to make sure it is available for agents in the + * middle of the network). Note that this field is NOT touched by an SCTP + * implementation; therefore, its byte order is NOT necessarily big endian. + * The upper layer is responsible for any byte order conversions to this field. + * The value 0 indicates that no application identifier is specified by the + * upper layer for this payload data. + */ + u32 payload_id; + + /* + * This is the payload user data. The implementation MUST pad the end of the + * data to a 4-byte boundary with all-zero bytes. Any padding MUST NOT be + * included in the Length field. A sender MUST never add more than 3 bytes of + * padding. + */ + u32 data[]; + +} sctp_payload_data_chunk_t; + +always_inline void +vnet_sctp_set_ebit (sctp_payload_data_chunk_t * p, u8 enable) +{ + //p->chunk_hdr.flags = clib_host_to_net_u16 (enable); +} + +always_inline u8 +vnet_sctp_get_ebit (sctp_payload_data_chunk_t * p) +{ + //return (clib_net_to_host_u16 (p->chunk_hdr.flags)); + return 0; +} + +always_inline void +vnet_sctp_set_bbit (sctp_payload_data_chunk_t * p, u8 enable) +{ + //p->chunk_hdr.flags = clib_host_to_net_u16 (enable << 1); +} + +always_inline u8 +vnet_sctp_get_bbit (sctp_payload_data_chunk_t * p) +{ + //return (clib_net_to_host_u16 (p->chunk_hdr.flags >> 1)); + return 0; +} + +always_inline void +vnet_sctp_set_ubit (sctp_payload_data_chunk_t * p, u8 enable) +{ + //p->chunk_hdr.flags = clib_host_to_net_u16 (enable << 2); +} + +always_inline u8 +vnet_sctp_get_ubit (sctp_payload_data_chunk_t * p) +{ + //return (clib_net_to_host_u16 (p->chunk_hdr.flags >> 2)); + return 0; +} + +always_inline void +vnet_sctp_set_tsn (sctp_payload_data_chunk_t * p, u32 tsn) +{ + p->tsn = clib_host_to_net_u32 (tsn); +} + +always_inline u32 +vnet_sctp_get_tsn (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u32 (p->tsn)); +} + +always_inline void +vnet_sctp_set_stream_id (sctp_payload_data_chunk_t * p, u16 stream_id) +{ + p->stream_id = clib_host_to_net_u16 (stream_id); +} + +always_inline u16 +vnet_sctp_get_stream_id (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u16 (p->stream_id)); +} + +always_inline void +vnet_sctp_set_stream_seq (sctp_payload_data_chunk_t * p, u16 stream_seq) +{ + p->stream_seq = clib_host_to_net_u16 (stream_seq); +} + +always_inline u16 +vnet_sctp_get_stream_seq (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u16 (p->stream_seq)); +} + +always_inline void +vnet_sctp_set_payload_id (sctp_payload_data_chunk_t * p, u32 payload_id) +{ + p->payload_id = clib_host_to_net_u32 (payload_id); +} + +always_inline u32 +vnet_sctp_get_payload_id (sctp_payload_data_chunk_t * p) +{ + return (clib_net_to_host_u32 (p->payload_id)); +} + +always_inline u16 +vnet_sctp_calculate_padding (u16 base_length) +{ + if (base_length % 4 == 0) + return 0; + + return (4 - base_length % 4); +} + +always_inline u16 +vnet_sctp_calculate_payload_data_padding (sctp_payload_data_chunk_t * p) +{ + u16 payload_length = vnet_sctp_get_chunk_length (&p->chunk_hdr) - + sizeof (p->chunk_hdr) - + sizeof (p->tsn) - + sizeof (p->stream_id) - sizeof (p->stream_seq) - sizeof (p->payload_id); + + return vnet_sctp_calculate_padding (payload_length); +} + +#define DEFAULT_A_RWND 1480 +#define INBOUND_STREAMS_COUNT 1 +#define OUTBOUND_STREAMS_COUNT 1 + +/* + * INIT chunk + * + * This chunk is used to initiate an SCTP association between two + * endpoints. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 1 | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initiate Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit (a_rwnd) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Outbound Streams | Number of Inbound Streams | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initial TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Optional/Variable-Length Parameters / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * + * The INIT chunk contains the following parameters. Unless otherwise + * noted, each parameter MUST only be included once in the INIT chunk. + * + * Fixed Parameters Status + * ---------------------------------------------- + * Initiate Tag Mandatory + * Advertised Receiver Window Credit Mandatory + * Number of Outbound Streams Mandatory + * Number of Inbound Streams Mandatory + * Initial TSN Mandatory + * + * Variable Parameters Status Type Value + * ------------------------------------------------------------- + * IPv4 Address (Note 1) Optional 5 + * IPv6 Address (Note 1) Optional 6 + * Cookie Preservative Optional 9 + * Reserved for ECN Capable (Note 2) Optional 32768 (0x8000) + * Host Name Address (Note 3) Optional 11 + * Supported Address Types (Note 4) Optional 12 + * + * Note 1: The INIT chunks can contain multiple addresses that can be + * IPv4 and/or IPv6 in any combination. + * + * Note 2: The ECN Capable field is reserved for future use of Explicit + * Congestion Notification. + * + * Note 3: An INIT chunk MUST NOT contain more than one Host Name Address + * parameter. Moreover, the sender of the INIT MUST NOT combine any other + * address types with the Host Name Address in the INIT. The receiver of + * INIT MUST ignore any other address types if the Host Name Address parameter + * is present in the received INIT chunk. + * + * Note 4: This parameter, when present, specifies all the address types the + * sending endpoint can support. The absence of this parameter indicates that + * the sending endpoint can support any address type. + * + * IMPLEMENTATION NOTE: If an INIT chunk is received with known parameters that + * are not optional parameters of the INIT chunk, then the receiver SHOULD + * process the INIT chunk and send back an INIT ACK. The receiver of the INIT + * chunk MAY bundle an ERROR chunk with the COOKIE ACK chunk later. + * However, restrictive implementations MAY send back an ABORT chunk in response + * to the INIT chunk. The Chunk Flags field in INIT is reserved, and all bits + * in it should be set to 0 by the sender and ignored by the receiver. + * The sequence of parameters within an INIT can be processed in any order. + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + + /* + * The receiver of the INIT (the responding end) records the value of + * the Initiate Tag parameter. + * This value MUST be placed into the Verification Tag field of every + * SCTP packet that the receiver of the INIT transmits within this association. + * The Initiate Tag is allowed to have any value except 0. + * + * If the value of the Initiate Tag in a received INIT chunk is found + * to be 0, the receiver MUST treat it as an error and close the + * association by transmitting an ABORT. + * + * The value of the INIT TAG is recommended to be random for security + * reasons. A good method is described in https://tools.ietf.org/html/rfc4086 + */ + u32 initiate_tag; + + /* + * This value represents the dedicated buffer space, in number of bytes, + * the sender of the INIT has reserved in association with this window. + * During the life of the association, this buffer space SHOULD NOT be + * lessened (i.e., dedicated buffers taken away from this association); + * however, an endpoint MAY change the value of a_rwnd it sends in SACK + * chunks. + */ + u32 a_rwnd; + + /* + * Defines the number of outbound streams the sender of this INIT chunk + * wishes to create in this association. + * The value of 0 MUST NOT be used. + * + * Note: A receiver of an INIT with the OS value set to 0 SHOULD abort + * the association. + */ + u16 outbound_streams_count; + + /* + * Defines the maximum number of streams the sender of this INIT + * chunk allows the peer end to create in this association. + * The value 0 MUST NOT be used. + * + * Note: There is no negotiation of the actual number of streams but + * instead the two endpoints will use the min(requested, offered). + * + * Note: A receiver of an INIT with the MIS value of 0 SHOULD abort + * the association. + */ + u16 inboud_streams_count; + + /* + * Defines the initial TSN that the sender will use. + * The valid range is from 0 to 4294967295. + * This field MAY be set to the value of the Initiate Tag field. + */ + u32 initial_tsn; + + /* The following field allows to have multiple optional fields which are: + * - sctp_ipv4_address + * - sctp_ipv6_address + * - sctp_cookie_preservative + * - sctp_hostname_address + * - sctp_supported_address_types + */ + u32 optional_fields[]; + +} sctp_init_chunk_t; + +/* + * INIT ACK chunk + * + * The INIT ACK chunk is used to acknowledge the initiation of an SCTP + * association. The parameter part of INIT ACK is formatted similarly to the + * INIT chunk. + * + * It uses two extra variable parameters: + * - the State Cookie and + * - the Unrecognized Parameter: + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 2 | Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initiate Tag | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Outbound Streams | Number of Inbound Streams | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Initial TSN | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Optional/Variable-Length Parameters / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef sctp_init_chunk_t sctp_init_ack_chunk_t; + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Parameter Type | Parameter Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Parameter Value / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + u16 type; + u16 length; + +} sctp_opt_params_hdr_t; + +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + u64 mac; /* RFC 2104 */ + u32 creation_time; + u32 cookie_lifespan; + +} sctp_state_cookie_param_t; + +/* + * This chunk is used only during the initialization of an association. + * It is sent by the initiator of an association to its peer to complete + * the initialization process. This chunk MUST precede any DATA chunk + * sent within the association, but MAY be bundled with one or more DATA + * chunks in the same packet. + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 10 |Chunk Flags | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Cookie / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + + sctp_state_cookie_param_t cookie; + +} sctp_cookie_echo_chunk_t; + + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 11 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + +} sctp_cookie_ack_chunk_t; + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 14 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + +} sctp_shutdown_complete_chunk_t; + +/* OPTIONAL or VARIABLE-LENGTH parameters for INIT */ +#define SCTP_IPV4_ADDRESS_TYPE 5 +#define SCTP_IPV4_ADDRESS_TYPE_LENGTH 8 +#define SCTP_IPV6_ADDRESS_TYPE 6 +#define SCTP_IPV6_ADDRESS_TYPE_LENGTH 20 +#define SCTP_STATE_COOKIE_TYPE 7 +#define SCTP_UNRECOGNIZED_TYPE 8 +#define SCTP_COOKIE_PRESERVATIVE_TYPE 9 +#define SCTP_COOKIE_PRESERVATIVE_TYPE_LENGTH 8 +#define SCTP_HOSTNAME_ADDRESS_TYPE 11 +#define SCTP_SUPPORTED_ADDRESS_TYPES 12 + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 5 | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | IPv4 Address | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * Contains an IPv4 address of the sending endpoint. + * It is binary encoded. + */ + ip4_address_t address; + +} sctp_ipv4_addr_param_t; + +always_inline void +vnet_sctp_set_ipv4_address (sctp_ipv4_addr_param_t * a, ip4_address_t address) +{ + a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + a->param_hdr.length = clib_host_to_net_u16 (8); + a->address.as_u32 = clib_host_to_net_u32 (address.as_u32); +} + +always_inline u32 +vnet_sctp_get_ipv4_address (sctp_ipv4_addr_param_t * a) +{ + return (clib_net_to_host_u32 (a->address.as_u32)); +} + +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 6 | Length = 20 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | | + * | IPv6 Address | + * | | + * | | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * Contains an IPv6 address of the sending endpoint. + * It is binary encoded. + */ + ip6_address_t address; + +} sctp_ipv6_addr_param_t; + +always_inline void +vnet_sctp_set_ipv6_address (sctp_ipv6_addr_param_t * a, ip6_address_t address) +{ + a->param_hdr.type = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + a->param_hdr.length = clib_host_to_net_u16 (20); + a->address.as_u64[0] = clib_host_to_net_u64 (address.as_u64[0]); + a->address.as_u64[1] = clib_host_to_net_u64 (address.as_u64[1]); +} + +always_inline ip6_address_t +vnet_sctp_get_ipv6_address (sctp_ipv6_addr_param_t * a) +{ + ip6_address_t ip6_address; + + ip6_address.as_u64[0] = clib_net_to_host_u64 (a->address.as_u64[0]); + ip6_address.as_u64[1] = clib_net_to_host_u64 (a->address.as_u64[1]); + + return ip6_address; +} + +/* + * The sender of the INIT shall use this parameter to suggest to the + * receiver of the INIT for a longer life-span of the State Cookie. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 9 | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Suggested Cookie Life-Span Increment (msec.) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * This parameter indicates to the receiver how much increment in + * milliseconds the sender wishes the receiver to add to its default + * cookie life-span. + * + * This optional parameter should be added to the INIT chunk by the + * sender when it reattempts establishing an association with a peer + * to which its previous attempt of establishing the association + * failed due to a stale cookie operation error. The receiver MAY + * choose to ignore the suggested cookie life-span increase for its + * own security reasons. + */ + u32 life_span_inc; + +} sctp_cookie_preservative_param_t; + +always_inline void +vnet_sctp_set_cookie_preservative (sctp_cookie_preservative_param_t * c, + u32 life_span_inc) +{ + c->param_hdr.type = clib_host_to_net_u16 (SCTP_COOKIE_PRESERVATIVE_TYPE); + c->param_hdr.length = clib_host_to_net_u16 (8); + c->life_span_inc = clib_host_to_net_u32 (life_span_inc); +} + +always_inline u32 +vnet_sctp_get_cookie_preservative (sctp_cookie_preservative_param_t * c) +{ + return (clib_net_to_host_u32 (c->life_span_inc)); +} + +#define FQDN_MAX_LENGTH 256 + +/* + * The sender of INIT uses this parameter to pass its Host Name (in + * place of its IP addresses) to its peer. + * The peer is responsible for resolving the name. + * Using this parameter might make it more likely for the association to work + * across a NAT box. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 11 | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Host Name / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + + /* + * This field contains a host name in "host name syntax" per RFC 1123 + * Section 2.1 + * + * Note: At least one null terminator is included in the Host Name + * string and must be included in the length. + */ + char hostname[FQDN_MAX_LENGTH]; + +} sctp_hostname_param_t; + +always_inline void +vnet_sctp_set_hostname_address (sctp_hostname_param_t * h, char *hostname) +{ + h->param_hdr.length = FQDN_MAX_LENGTH; + h->param_hdr.type = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE); + memset (h->hostname, '0', FQDN_MAX_LENGTH); + memcpy (h->hostname, hostname, FQDN_MAX_LENGTH); +} + +#define MAX_SUPPORTED_ADDRESS_TYPES 3 + +/* + * The sender of INIT uses this parameter to list all the address types + * it can support. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 12 | Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Address Type #1 | Address Type #2 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | ...... | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+-+-+-+-+-+-+-+-+-+-+-++-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + u16 address_type[MAX_SUPPORTED_ADDRESS_TYPES]; + +} sctp_supported_addr_types_param_t; + +always_inline void +vnet_sctp_set_supported_address_types (sctp_supported_addr_types_param_t * s) +{ + s->param_hdr.type = clib_host_to_net_u16 (SCTP_SUPPORTED_ADDRESS_TYPES); + s->param_hdr.length = 4 /* base = type + length */ + + MAX_SUPPORTED_ADDRESS_TYPES * 4; /* each address type is 4 bytes */ + + s->address_type[0] = clib_host_to_net_u16 (SCTP_IPV4_ADDRESS_TYPE); + s->address_type[1] = clib_host_to_net_u16 (SCTP_IPV6_ADDRESS_TYPE); + s->address_type[2] = clib_host_to_net_u16 (SCTP_HOSTNAME_ADDRESS_TYPE); +} + +/* + * Error cause codes to be used for the sctp_error_cause.cause_code field + */ +#define INVALID_STREAM_IDENTIFIER 1 +#define MISSING_MANDATORY_PARAMETER 2 +#define STALE_COOKIE_ERROR 3 +#define OUT_OF_RESOURCE 4 +#define UNRESOLVABLE_ADDRESS 5 +#define UNRECOGNIZED_CHUNK_TYPE 6 +#define INVALID_MANDATORY_PARAMETER 7 +#define UNRECOGNIZED_PARAMETER 8 +#define NO_USER_DATA 9 +#define COOKIE_RECEIVED_WHILE_SHUTTING_DOWN 10 +#define RESTART_OF_ASSOCIATION_WITH_NEW_ADDR 11 +#define USER_INITIATED_ABORT 12 +#define PROTOCOL_VIOLATION 13 + +always_inline void +vnet_sctp_set_state_cookie (sctp_state_cookie_param_t * s) +{ + s->param_hdr.type = clib_host_to_net_u16 (SCTP_STATE_COOKIE_TYPE); + + /* TODO: length & value to be populated */ +} + +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + u32 value[]; + +} sctp_unrecognized_param_t; + +always_inline void +vnet_sctp_set_unrecognized_param (sctp_unrecognized_param_t * u) +{ + u->param_hdr.type = clib_host_to_net_u16 (UNRECOGNIZED_PARAMETER); + + /* TODO: length & value to be populated */ +} + +/* + * Selective ACK (SACK) chunk + * + * This chunk is sent to the peer endpoint to acknowledge received DATA + * chunks and to inform the peer endpoint of gaps in the received + * subsequences of DATA chunks as represented by their TSNs. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 3 |Chunk Flags | Chunk Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cumulative TSN Ack | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Advertised Receiver Window Credit (a_rwnd) | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Number of Gap Ack Blocks = N | Number of Duplicate TSNs = X | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Gap Ack Block #1 Start | Gap Ack Block #1 End | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ ... \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Gap Ack Block #N Start | Gap Ack Block #N End | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Duplicate TSN 1 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / / + * \ ... \ + * / / + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Duplicate TSN X | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + /* + * This parameter contains the TSN of the last DATA chunk received in + * sequence before a gap. In the case where no DATA chunk has been + * received, this value is set to the peer's Initial TSN minus one. + */ + u32 cumulative_tsn_ack; + + /* + * This field indicates the updated receive buffer space in bytes of + * the sender of this SACK. + */ + u32 a_rwnd; + + /* + * Indicates the number of Gap Ack Blocks included in this SACK. + */ + u16 gap_ack_blocks_count; + + /* + * This field contains the number of duplicate TSNs the endpoint has + * received. Each duplicate TSN is listed following the Gap Ack Block + * list. + */ + u16 duplicate_tsn_count; + + /* + * Indicates the Start offset TSN for this Gap Ack Block. To calculate + * the actual TSN number the Cumulative TSN Ack is added to this offset + * number. This calculated TSN identifies the first TSN in this Gap Ack + * Block that has been received. + */ + u16 *gap_ack_block_start; + + /* + * Indicates the End offset TSN for this Gap Ack Block. To calculate + * the actual TSN number, the Cumulative TSN Ack is added to this offset + * number. This calculated TSN identifies the TSN of the last DATA chunk + * received in this Gap Ack Block. + */ + u16 *gap_ack_block_end; + + /* + * Indicates the number of times a TSN was received in duplicate since + * the last SACK was sent. Every time a receiver gets a duplicate TSN + * (before sending the SACK), it adds it to the list of duplicates. + * The duplicate count is reinitialized to zero after sending each SACK. + */ + u32 duplicate_tsn; + +} sctp_selective_ack_chunk_t; + +always_inline void +vnet_sctp_set_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s, + u32 cumulative_tsn_ack) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->cumulative_tsn_ack = clib_host_to_net_u32 (cumulative_tsn_ack); +} + +always_inline u32 +vnet_sctp_get_cumulative_tsn_ack (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->cumulative_tsn_ack); +} + +always_inline void +vnet_sctp_set_arwnd (sctp_selective_ack_chunk_t * s, u32 a_rwnd) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->a_rwnd = clib_host_to_net_u32 (a_rwnd); +} + +always_inline u32 +vnet_sctp_get_arwnd (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->a_rwnd); +} + +always_inline void +vnet_sctp_set_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s, + u16 gap_ack_blocks_count) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->gap_ack_blocks_count = clib_host_to_net_u16 (gap_ack_blocks_count); + + if (s->gap_ack_block_start == NULL) + s->gap_ack_block_start = + clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count); + if (s->gap_ack_block_end == NULL) + s->gap_ack_block_end = + clib_mem_alloc (sizeof (u16) * gap_ack_blocks_count); +} + +always_inline u16 +vnet_sctp_get_gap_ack_blocks_count (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u32 (s->gap_ack_blocks_count); +} + +always_inline void +vnet_sctp_set_duplicate_tsn_count (sctp_selective_ack_chunk_t * s, + u16 duplicate_tsn_count) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SACK); + s->duplicate_tsn_count = clib_host_to_net_u16 (duplicate_tsn_count); +} + +always_inline u16 +vnet_sctp_get_duplicate_tsn_count (sctp_selective_ack_chunk_t * s) +{ + return clib_net_to_host_u16 (s->duplicate_tsn_count); +} + +/* + * Heartbeat Info + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Heartbeat Info Type=1 | HB Info Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Sender-Specific Heartbeat Info / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_opt_params_hdr_t param_hdr; + + /* + * The Sender-Specific Heartbeat Info field should normally include + * information about the sender's current time when this HEARTBEAT + * chunk is sent and the destination transport address to which this + * HEARTBEAT is sent. + * This information is simply reflected back by the receiver in the + * HEARTBEAT ACK message. + * + * Note also that the HEARTBEAT message is both for reachability + * checking and for path verification. + * When a HEARTBEAT chunk is being used for path verification purposes, + * it MUST hold a 64-bit random nonce. + */ + u64 hb_info; + +} sctp_hb_info_param_t; + +always_inline void +vnet_sctp_set_heartbeat_info (sctp_hb_info_param_t * h, u64 hb_info, + u16 hb_info_length) +{ + h->hb_info = clib_host_to_net_u16 (1); + h->param_hdr.length = clib_host_to_net_u16 (hb_info_length); + h->hb_info = clib_host_to_net_u64 (hb_info); +} + +/* + * Heartbeat Request + * + * An endpoint should send this chunk to its peer endpoint to probe the + * reachability of a particular destination transport address defined in + * the present association. + * The parameter field contains the Heartbeat Information, which is a + * variable-length opaque data structure understood only by the sender. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 4 | Chunk Flags | Heartbeat Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Heartbeat Information TLV (Variable-Length) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + sctp_hb_info_param_t hb_info; + +} sctp_hb_req_chunk_t; + +always_inline void +vnet_sctp_set_hb_request_info (sctp_hb_req_chunk_t * h, + sctp_hb_info_param_t * hb_info) +{ + vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT); + memcpy (&h->hb_info, hb_info, sizeof (h->hb_info)); +} + +/* + * Heartbeat Acknowledgement + * + * An endpoint should send this chunk to its peer endpoint as a response + * to a HEARTBEAT chunk. + * A HEARTBEAT ACK is always sent to the source IP address of the IP datagram + * containing the HEARTBEAT chunk to which this ack is responding. + */ +/* + * + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 5 | Chunk Flags | Heartbeat Ack Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / Heartbeat Information TLV (Variable-Length) / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef sctp_hb_req_chunk_t sctp_hb_ack_chunk_t; + +always_inline void +vnet_sctp_set_hb_ack_info (sctp_hb_ack_chunk_t * h, + sctp_hb_info_param_t * hb_info) +{ + vnet_sctp_set_chunk_type (&h->chunk_hdr, HEARTBEAT_ACK); + memcpy (&h->hb_info, hb_info, sizeof (h->hb_info)); +} + +/* + * Error cause + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cause Code | Cause Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * / Cause-Specific Information / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ +*/ +typedef struct +{ + + sctp_opt_params_hdr_t param_hdr; + u64 cause_info; + +} sctp_err_cause_param_t; + +/* + * Abort Association (ABORT) + * + * The ABORT chunk is sent to the peer of an association to close the + * association. The ABORT chunk may contain Cause Parameters to inform + * the receiver about the reason of the abort. DATA chunks MUST NOT be + * bundled with ABORT. Control chunks (except for INIT, INIT ACK, and + * SHUTDOWN COMPLETE) MAY be bundled with an ABORT, but they MUST be + * placed before the ABORT in the SCTP packet or they will be ignored by + * the receiver. + * + * If an endpoint receives an ABORT with a format error or no TCB is + * found, it MUST silently discard it. Moreover, under any + * circumstances, an endpoint that receives an ABORT MUST NOT respond to + * that ABORT by sending an ABORT of its own. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 6 |Reserved |T| Length | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * \ \ + * / zero or more Error Causes / + * \ \ + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + sctp_err_cause_param_t err_causes[]; + +} sctp_abort_chunk_t; + +always_inline void +vnet_sctp_set_tbit (sctp_abort_chunk_t * a) +{ + vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT); + // a->chunk_hdr.flags = clib_host_to_net_u16 (1); +} + +always_inline void +vnet_sctp_unset_tbit (sctp_abort_chunk_t * a) +{ + vnet_sctp_set_chunk_type (&a->chunk_hdr, ABORT); + // a->chunk_hdr.flags = clib_host_to_net_u16 (0); +} + +/* + * Shutdown Association (SHUTDOWN) + * + * An endpoint in an association MUST use this chunk to initiate a + * graceful close of the association with its peer. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 7 | Chunk Flags | Length = 8 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Cumulative TSN Ack | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; + /* + * This parameter contains the TSN of the last chunk received in + * sequence before any gaps. + * + * Note: Since the SHUTDOWN message does not contain Gap Ack Blocks, + * it cannot be used to acknowledge TSNs received out of order. In a + * SACK, lack of Gap Ack Blocks that were previously included + * indicates that the data receiver reneged on the associated DATA + * chunks. Since SHUTDOWN does not contain Gap Ack Blocks, the + * receiver of the SHUTDOWN shouldn't interpret the lack of a Gap Ack + * Block as a renege. + */ + u32 cumulative_tsn_ack; + +} sctp_shutdown_association_chunk_t; + +always_inline void +vnet_sctp_set_tsn_last_received_chunk (sctp_shutdown_association_chunk_t * s, + u32 tsn_last_chunk) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN); + s->cumulative_tsn_ack = clib_host_to_net_u32 (tsn_last_chunk); +} + +/* + * Shutdown Acknowledgement (SHUTDOWN ACK) + * + * This chunk MUST be used to acknowledge the receipt of the SHUTDOWN + * chunk at the completion of the shutdown process. + */ +/* + * 0 1 2 3 + * 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + * | Type = 8 |Chunk Flags | Length = 4 | + * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+ + */ +typedef struct +{ + sctp_header_t sctp_hdr; + sctp_chunks_common_hdr_t chunk_hdr; +} sctp_shutdown_ack_chunk_t; + +always_inline void +vnet_sctp_fill_shutdown_ack (sctp_shutdown_ack_chunk_t * s) +{ + vnet_sctp_set_chunk_type (&s->chunk_hdr, SHUTDOWN_ACK); + vnet_sctp_set_chunk_length (&s->chunk_hdr, 4); +} + +#endif /* included_vnet_sctp_packet_h */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_pg.c b/src/vnet/sctp/sctp_pg.c new file mode 100644 index 00000000000..d253330143c --- /dev/null +++ b/src/vnet/sctp/sctp_pg.c @@ -0,0 +1,30 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#include <vnet/ip/ip.h> +#include <vnet/pg/pg.h> + +uword +unformat_pg_sctp_header (unformat_input_t * input, va_list * args) +{ + return 1; +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/sctp/sctp_timer.h b/src/vnet/sctp/sctp_timer.h new file mode 100644 index 00000000000..259dea92e09 --- /dev/null +++ b/src/vnet/sctp/sctp_timer.h @@ -0,0 +1,29 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_sctp_timer_h__ +#define included_sctp_timer_h__ + +#include <vppinfra/tw_timer_16t_2w_512sl.h> +#include <vppinfra/tw_timer_16t_1w_2048sl.h> + +#endif /* included_sctp_timer_h__ */ + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ diff --git a/src/vnet/session/application_interface.c b/src/vnet/session/application_interface.c index 8dab3d67bce..ec317896f14 100644 --- a/src/vnet/session/application_interface.c +++ b/src/vnet/session/application_interface.c @@ -326,7 +326,22 @@ unformat_vnet_uri (unformat_input_t * input, va_list * args) sep->is_ip4 = 0; return 1; } - + if (unformat (input, "sctp://%U/%d", unformat_ip4_address, &sep->ip.ip4, + &sep->port)) + { + sep->transport_proto = TRANSPORT_PROTO_SCTP; + sep->port = clib_host_to_net_u16 (sep->port); + sep->is_ip4 = 1; + return 1; + } + if (unformat (input, "sctp://%U/%d", unformat_ip6_address, &sep->ip.ip6, + &sep->port)) + { + sep->transport_proto = TRANSPORT_PROTO_SCTP; + sep->port = clib_host_to_net_u16 (sep->port); + sep->is_ip4 = 0; + return 1; + } return 0; } diff --git a/src/vnet/session/transport.c b/src/vnet/session/transport.c index 2f01ac6c092..964e739550e 100644 --- a/src/vnet/session/transport.c +++ b/src/vnet/session/transport.c @@ -54,6 +54,9 @@ format_transport_proto (u8 * s, va_list * args) case TRANSPORT_PROTO_UDP: s = format (s, "UDP"); break; + case TRANSPORT_PROTO_SCTP: + s = format (s, "SCTP"); + break; } return s; } @@ -86,6 +89,10 @@ unformat_transport_proto (unformat_input_t * input, va_list * args) *proto = TRANSPORT_PROTO_UDP; else if (unformat (input, "UDP")) *proto = TRANSPORT_PROTO_UDP; + if (unformat (input, "sctp")) + *proto = TRANSPORT_PROTO_SCTP; + else if (unformat (input, "SCTP")) + *proto = TRANSPORT_PROTO_SCTP; else return 0; return 1; diff --git a/src/vnet/session/transport.h b/src/vnet/session/transport.h index 61a2b7b8aa6..12b6a0551d8 100644 --- a/src/vnet/session/transport.h +++ b/src/vnet/session/transport.h @@ -70,6 +70,7 @@ typedef enum _transport_proto { TRANSPORT_PROTO_TCP, TRANSPORT_PROTO_UDP, + TRANSPORT_PROTO_SCTP, TRANSPORT_N_PROTO } transport_proto_t; @@ -110,7 +111,7 @@ transport_endpoint_fib_proto (transport_endpoint_t * tep) always_inline u8 transport_is_stream (u8 proto) { - return (proto == TRANSPORT_PROTO_TCP); + return ((proto == TRANSPORT_PROTO_TCP) || (proto == TRANSPORT_PROTO_SCTP)); } always_inline u8 |