summaryrefslogtreecommitdiffstats
path: root/src/plugins/sctp/sctp.h
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/sctp/sctp.h')
-rw-r--r--src/plugins/sctp/sctp.h1019
1 files changed, 1019 insertions, 0 deletions
diff --git a/src/plugins/sctp/sctp.h b/src/plugins/sctp/sctp.h
new file mode 100644
index 00000000000..a99b01c1c0a
--- /dev/null
+++ b/src/plugins/sctp/sctp.h
@@ -0,0 +1,1019 @@
+/*
+ * Copyright (c) 2017 SUSE LLC.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+#ifndef included_vnet_sctp_h
+#define included_vnet_sctp_h
+
+#include <vnet/vnet.h>
+#include <vnet/ip/ip.h>
+#include <sctp/sctp_timer.h>
+#include <sctp/sctp_packet.h>
+#include <vnet/session/transport.h>
+#include <vnet/session/session.h>
+
+/* SCTP buffer opaque definition */
+typedef struct
+{
+ struct
+ {
+ u32 connection_index;
+ u16 sid; /**< Stream ID */
+ u16 ssn; /**< Stream Sequence Number */
+ u32 tsn; /**< Transmission Sequence Number */
+ u16 hdr_offset; /**< offset relative to ip hdr */
+ u16 data_offset; /**< offset relative to ip hdr */
+ u16 data_len; /**< data len */
+ u8 subconn_idx; /**< index of the sub_connection being used */
+ u8 flags;
+ } sctp;
+} sctp_buffer_opaque_t;
+
+STATIC_ASSERT (sizeof (sctp_buffer_opaque_t) <=
+ STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused),
+ "sctp_buffer_opaque_t too large for vnet_buffer_opaque_t");
+
+#define sctp_buffer_opaque(b) \
+ ((sctp_buffer_opaque_t *)((u8 *)((b)->opaque) + \
+STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused)))
+
+
+/* SCTP timers */
+#define foreach_sctp_timer \
+ _(T1_INIT, "T1_INIT") \
+ _(T1_COOKIE, "T1_COOKIE") \
+ _(T2_SHUTDOWN, "T2_SHUTDOWN") \
+ _(T3_RXTX, "T3_RXTX") \
+ _(T4_HEARTBEAT, "T4_HB") \
+ _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD")
+
+typedef enum _sctp_timers
+{
+#define _(sym, str) SCTP_TIMER_##sym,
+ foreach_sctp_timer
+#undef _
+ SCTP_N_TIMERS
+} sctp_timers_e;
+
+#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0)
+
+always_inline char *
+sctp_timer_to_string (u8 timer_id)
+{
+ switch (timer_id)
+ {
+ case SCTP_TIMER_T1_INIT:
+ return "SCTP_TIMER_T1_INIT";
+ case SCTP_TIMER_T1_COOKIE:
+ return "SCTP_TIMER_T1_COOKIE";
+ case SCTP_TIMER_T2_SHUTDOWN:
+ return "SCTP_TIMER_T2_SHUTDOWN";
+ case SCTP_TIMER_T3_RXTX:
+ return "SCTP_TIMER_T3_RXTX";
+ case SCTP_TIMER_T4_HEARTBEAT:
+ return "SCTP_TIMER_T4_HEARTBEAT";
+ case SCTP_TIMER_T5_SHUTDOWN_GUARD:
+ return "SCTP_TIMER_T5_SHUTDOWN_GUARD";
+ }
+ return NULL;
+}
+
+typedef enum _sctp_error
+{
+#define sctp_error(n,s) SCTP_ERROR_##n,
+#include <sctp/sctp_error.def>
+#undef sctp_error
+ SCTP_N_ERROR,
+} sctp_error_t;
+
+#define NO_FLAG 0
+
+#define IS_T_BIT_SET(var) ((var) & (1))
+#define IS_E_BIT_SET(var) ((var) & (1))
+#define IS_B_BIT_SET(var) ((var) & (1<<1))
+#define IS_U_BIT_SET(var) ((var) & (1<<2))
+
+#define MAX_SCTP_CONNECTIONS 8
+#define SCTP_PRIMARY_PATH_IDX 0
+
+#if (VLIB_BUFFER_TRACE_TRAJECTORY)
+#define sctp_trajectory_add_start(b, start) \
+{ \
+ (*vlib_buffer_trace_trajectory_cb) (b, start); \
+}
+#else
+#define sctp_trajectory_add_start(b, start)
+#endif
+
+enum _sctp_subconn_state
+{
+ SCTP_SUBCONN_STATE_DOWN = 0,
+ SCTP_SUBCONN_STATE_UP,
+ SCTP_SUBCONN_STATE_ALLOW_HB,
+ SCTP_SUBCONN_AWAITING_SACK,
+ SCTP_SUBCONN_SACK_RECEIVED
+};
+
+#define SCTP_INITIAL_SSHTRESH 65535
+typedef struct _sctp_sub_connection
+{
+ transport_connection_t connection; /**< Common transport data. First! */
+
+ u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */
+ u32 error_count; /**< The current error count for this destination. */
+ u32 error_threshold; /**< Current error threshold for this destination,
+ i.e. what value marks the destination down if error count reaches this value. */
+ u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by
+ the sender based on observed network conditions. */
+ u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the
+ sender to distinguish slow-start and congestion avoidance phases. */
+
+ u64 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */
+
+ u32 RTO; /**< The current retransmission timeout value. */
+ u64 SRTT; /**< The current smoothed round-trip time. */
+ f64 RTTVAR; /**< The current RTT variation. */
+
+ u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in
+ congestion avoidance mode (see Section 7.2.2).*/
+
+ u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */
+
+ u16 PMTU; /**< The current known path MTU. */
+
+ u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */
+
+ u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to
+ this address is currently being used to compute an RTT.
+ If this flag is 0, the next DATA chunk sent to this destination
+ should be used to compute an RTT and this flag should be set.
+ Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd),
+ clear this flag. */
+
+ u64 last_seen; /**< The time to which this destination was last sent a packet to.
+ This can be used to determine if a HEARTBEAT is needed. */
+
+ u64 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */
+
+ u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had;
+ If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */
+
+ u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */
+
+ u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */
+
+} sctp_sub_connection_t;
+
+typedef struct
+{
+ u32 a_rwnd; /**< Maximum segment size advertised */
+
+} sctp_options_t;
+
+/* Useful macros to deal with the out_of_order_map (array of bit) */
+#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) )
+#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) )
+#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) )
+
+always_inline void
+_bytes_swap (void *pv, size_t n)
+{
+ char *p = pv;
+ size_t lo, hi;
+ for (lo = 0, hi = n - 1; hi > lo; lo++, hi--)
+ {
+ char tmp = p[lo];
+ p[lo] = p[hi];
+ p[hi] = tmp;
+ }
+}
+
+#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x));
+
+#define MAX_INFLIGHT_PACKETS 128
+#define MAX_ENQUEABLE_SACKS 2
+
+/* This parameter indicates to the receiver how much increment in
+ * milliseconds the sender wishes the receiver to add to its default
+ * cookie life-span.
+ */
+#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000
+
+typedef struct _sctp_user_configuration
+{
+ u8 never_delay_sack;
+ u8 never_bundle;
+
+} sctp_user_configuration_t;
+
+typedef struct _sctp_connection
+{
+ /** Required for pool_get_aligned */
+ CLIB_CACHE_LINE_ALIGN_MARK (cacheline0);
+
+ sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */
+ sctp_user_configuration_t conn_config; /**< Allows tuning of some SCTP behaviors */
+
+ u8 state; /**< SCTP state as per sctp_state_t */
+ u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */
+
+ u32 local_tag; /**< INIT_TAG generated locally */
+ u32 remote_tag; /**< INIT_TAG generated by the remote peer */
+
+ u32 local_initial_tsn; /**< Initial TSN generated locally */
+ u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */
+
+ u32 peer_cookie_life_span_increment;
+
+ u32 overall_err_count; /**< The overall association error count. */
+ u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count
+ reaches will cause this association to be torn down. */
+
+ u8 init_retransmit_err; /**< Error counter for the INIT transmission phase */
+
+ u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */
+
+ u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk.
+ This is sent in the INIT or INIT ACK chunk to the peer
+ and incremented each time a DATA chunk is assigned a
+ TSN (normally just prior to transmit or during
+ fragmentation). */
+
+ u32 last_unacked_tsn; /** < Last TSN number still unacked */
+ u32 next_tsn_expected; /**< The next TSN number expected to be received. */
+
+ u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value
+ is set initially by taking the peer's initial TSN,
+ received in the INIT or INIT ACK chunk, and
+ subtracting one from it. */
+
+ u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order
+ TSNs have been received (relative to the Last Rcvd TSN).
+ If no gaps exist, i.e., no out-of-order packets have been received,
+ this array will be set to all zero. */
+
+ u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK.
+ This is initialized to 0. When a packet is received it is incremented.
+ If this value reaches 2 or more, a SACK is sent and the value is reset to 0.
+ Note: This is used only when no DATA chunks are received out-of-order.
+ When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */
+
+ u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */
+
+ u8 overall_sending_status; /**< 0 indicates first fragment of a user message
+ 1 indicates normal stream
+ 2 indicates last fragment of a user message */
+
+ u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during
+ the life-span of the association itself. For instance, a new sub-connection might have been added. */
+
+ sctp_state_cookie_param_t cookie_param; /**< Temporary location to save cookie information; it can be used to
+ when timeout expires and sending again a COOKIE is require. */
+
+} sctp_connection_t;
+
+typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id);
+
+sctp_connection_t *sctp_connection_new (u8 thread_index);
+
+u8
+sctp_sub_connection_add_ip4 (vlib_main_t * vm,
+ ip4_address_t * lcl_addr,
+ ip4_address_t * rmt_addr);
+
+u8
+sctp_sub_connection_add_ip6 (vlib_main_t * vm,
+ ip6_address_t * lcl_addr,
+ ip6_address_t * rmt_addr);
+
+u8
+sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr,
+ ip4_address_t * rmt_addr);
+
+u8
+sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr,
+ ip6_address_t * rmt_addr);
+
+u8 sctp_configure (sctp_user_configuration_t config);
+
+void sctp_connection_close (sctp_connection_t * sctp_conn);
+void sctp_connection_cleanup (sctp_connection_t * sctp_conn);
+void sctp_connection_del (sctp_connection_t * sctp_conn);
+
+u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b);
+void sctp_send_init (sctp_connection_t * sctp_conn);
+void sctp_send_cookie_echo (sctp_connection_t * sctp_conn);
+void sctp_send_shutdown (sctp_connection_t * sctp_conn);
+void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b);
+void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b0);
+void sctp_send_heartbeat (sctp_connection_t * sctp_conn);
+void sctp_data_retransmit (sctp_connection_t * sctp_conn);
+void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index,
+ u8 is_ip4);
+void sctp_flush_frames_to_output (u8 thread_index);
+void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add);
+
+format_function_t format_sctp_state;
+
+u8 *format_sctp_connection_id (u8 * s, va_list * args);
+u8 *format_sctp_connection (u8 * s, va_list * args);
+u8 *format_sctp_scoreboard (u8 * s, va_list * args);
+u8 *format_sctp_header (u8 * s, va_list * args);
+u8 *format_sctp_tx_trace (u8 * s, va_list * args);
+unformat_function_t unformat_pg_sctp_header;
+
+clib_error_t *sctp_init (vlib_main_t * vm);
+void sctp_connection_timers_init (sctp_connection_t * sctp_conn);
+void sctp_connection_timers_reset (sctp_connection_t * sctp_conn);
+void sctp_init_snd_vars (sctp_connection_t * sctp_conn);
+void sctp_init_mss (sctp_connection_t * sctp_conn);
+
+void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b, ip4_address_t * ip4_addr,
+ u8 add_ip4, ip6_address_t * ip6_addr,
+ u8 add_ip6);
+void sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn,
+ u8 idx, vlib_buffer_t * b,
+ ip4_address_t * ip4_addr,
+ ip6_address_t * ip6_addr);
+void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b,
+ ip4_address_t * ip4_addr,
+ ip6_address_t * ip6_addr);
+void sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b, u8 err_cause);
+void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b, u8 reuse_buffer);
+void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b);
+void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b);
+void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx,
+ vlib_buffer_t * b);
+
+u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn);
+
+void sctp_api_reference (void);
+
+#define IP_PROTOCOL_SCTP 132
+
+/** SSCTP FSM state definitions as per RFC4960. */
+#define foreach_sctp_fsm_state \
+ _(CLOSED, "CLOSED") \
+ _(COOKIE_WAIT, "COOKIE_WAIT") \
+ _(COOKIE_ECHOED, "COOKIE_ECHOED") \
+ _(ESTABLISHED, "ESTABLISHED") \
+ _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \
+ _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \
+ _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \
+ _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT")
+
+typedef enum _sctp_state
+{
+#define _(sym, str) SCTP_STATE_##sym,
+ foreach_sctp_fsm_state
+#undef _
+ SCTP_N_STATES
+} sctp_state_t;
+
+always_inline char *
+sctp_state_to_string (u8 state)
+{
+ switch (state)
+ {
+ case SCTP_STATE_CLOSED:
+ return "SCTP_STATE_CLOSED";
+ case SCTP_STATE_COOKIE_WAIT:
+ return "SCTP_STATE_COOKIE_WAIT";
+ case SCTP_STATE_COOKIE_ECHOED:
+ return "SCTP_STATE_COOKIE_ECHOED";
+ case SCTP_STATE_ESTABLISHED:
+ return "SCTP_STATE_ESTABLISHED";
+ case SCTP_STATE_SHUTDOWN_PENDING:
+ return "SCTP_STATE_SHUTDOWN_PENDING";
+ case SCTP_STATE_SHUTDOWN_SENT:
+ return "SCTP_STATE_SHUTDOWN_SENT";
+ case SCTP_STATE_SHUTDOWN_RECEIVED:
+ return "SCTP_STATE_SHUTDOWN_RECEIVED";
+ case SCTP_STATE_SHUTDOWN_ACK_SENT:
+ return "SCTP_STATE_SHUTDOWN_ACK_SENT";
+ }
+ return NULL;
+}
+
+always_inline char *
+sctp_chunk_to_string (u8 type)
+{
+ switch (type)
+ {
+ case DATA:
+ return "DATA";
+ case INIT:
+ return "INIT";
+ case INIT_ACK:
+ return "INIT_ACK";
+ case SACK:
+ return "SACK";
+ case HEARTBEAT:
+ return "HEARTBEAT";
+ case HEARTBEAT_ACK:
+ return "HEARTBEAT_ACK";
+ case ABORT:
+ return "ABORT";
+ case SHUTDOWN:
+ return "SHUTDOWN";
+ case SHUTDOWN_ACK:
+ return "SHUTDOWN_ACK";
+ case OPERATION_ERROR:
+ return "OPERATION_ERROR";
+ case COOKIE_ECHO:
+ return "COOKIE_ECHO";
+ case COOKIE_ACK:
+ return "COOKIE_ACK";
+ case ECNE:
+ return "ECNE";
+ case CWR:
+ return "CWR";
+ case SHUTDOWN_COMPLETE:
+ return "SHUTDOWN_COMPLETE";
+ }
+ return NULL;
+}
+
+always_inline char *
+sctp_optparam_type_to_string (u8 type)
+{
+ switch (type)
+ {
+ case SCTP_IPV4_ADDRESS_TYPE:
+ return "SCTP_IPV4_ADDRESS_TYPE";
+ case SCTP_IPV6_ADDRESS_TYPE:
+ return "SCTP_IPV6_ADDRESS_TYPE";
+ case SCTP_STATE_COOKIE_TYPE:
+ return "SCTP_STATE_COOKIE_TYPE";
+ case SCTP_UNRECOGNIZED_TYPE:
+ return "SCTP_UNRECOGNIZED_TYPE";
+ case SCTP_COOKIE_PRESERVATIVE_TYPE:
+ return "SCTP_COOKIE_PRESERVATIVE_TYPE";
+ case SCTP_HOSTNAME_ADDRESS_TYPE:
+ return "SCTP_HOSTNAME_ADDRESS_TYPE";
+ case SCTP_SUPPORTED_ADDRESS_TYPES:
+ return "SCTP_SUPPORTED_ADDRESS_TYPES";
+ }
+ return NULL;
+}
+
+#define SCTP_TICK 0.001 /**< SCTP tick period (s) */
+#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */
+#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */
+
+/* As per RFC4960, page 83 */
+#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */
+#define SCTP_RTO_MIN 1 * SHZ /* 1 second */
+#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */
+#define SCTP_RTO_BURST 4
+#define SCTP_RTO_ALPHA 1/8
+#define SCTP_RTO_BETA 1/4
+#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */
+#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection
+#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address
+#define SCTP_MAX_INIT_RETRANS 8 // number of attempts
+#define SCTP_HB_INTERVAL 30 * SHZ
+#define SCTP_HB_MAX_BURST 1
+#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */
+#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */
+
+#define SCTP_CONN_RECOVERY 1 << 1
+#define SCTP_FAST_RECOVERY 1 << 2
+
+typedef struct _sctp_lookup_dispatch
+{
+ u8 next, error;
+} sctp_lookup_dispatch_t;
+
+typedef struct _sctp_main
+{
+ /* Per-worker thread SCTP connection pools */
+ sctp_connection_t **connections;
+
+ /* Pool of listeners. */
+ sctp_connection_t *listener_pool;
+
+ /** Dispatch table by state and flags */
+ sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64];
+
+ u8 log2_tstamp_clocks_per_tick;
+ f64 tstamp_ticks_per_clock;
+ u64 *time_now;
+
+ /** per-worker tx buffer free lists */
+ u32 **tx_buffers;
+ /** per-worker tx frames to SCTP 4/6 output nodes */
+ vlib_frame_t **tx_frames[2];
+ /** per-worker tx frames to ip 4/6 lookup nodes */
+ vlib_frame_t **ip_lookup_tx_frames[2];
+
+ /* Per worker-thread timer wheel for connections timers */
+ tw_timer_wheel_16t_2w_512sl_t *timer_wheels;
+
+ /* Pool of half-open connections on which we've sent a SYN */
+ sctp_connection_t *half_open_connections;
+ clib_spinlock_t half_open_lock;
+
+ /* TODO: Congestion control algorithms registered */
+ /* sctp_cc_algorithm_t *cc_algos; */
+
+ /* Flag that indicates if stack is on or off */
+ u8 is_enabled;
+ u8 is_init;
+
+ /** Number of preallocated connections */
+ u32 preallocated_connections;
+
+ /** Transport table (preallocation) size parameters */
+ u32 local_endpoints_table_memory;
+ u32 local_endpoints_table_buckets;
+
+ /** Vectors of src addresses. Optional unless one needs > 63K active-opens */
+ ip4_address_t *ip4_src_addresses;
+ u32 last_v4_address_rotor;
+ u32 last_v6_address_rotor;
+ ip6_address_t *ip6_src_addresses;
+
+ /** vlib buffer size */
+ u32 bytes_per_buffer;
+
+ u8 punt_unknown4;
+ u8 punt_unknown6;
+
+ u32 sctp4_established_phase_node_index;
+ u32 sctp6_established_phase_node_index;
+
+ u16 msg_id_base;
+} sctp_main_t;
+
+extern sctp_main_t sctp_main;
+extern vlib_node_registration_t sctp4_input_node;
+extern vlib_node_registration_t sctp6_input_node;
+extern vlib_node_registration_t sctp4_output_node;
+extern vlib_node_registration_t sctp6_output_node;
+
+always_inline sctp_main_t *
+vnet_get_sctp_main ()
+{
+ return &sctp_main;
+}
+
+always_inline sctp_header_t *
+sctp_buffer_hdr (vlib_buffer_t * b)
+{
+ ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE);
+ return (sctp_header_t *) (b->data + b->current_data
+ + sctp_buffer_opaque (b)->sctp.hdr_offset);
+}
+
+clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en);
+clib_error_t *sctp_plugin_api_hookup (vlib_main_t * vm);
+
+always_inline sctp_connection_t *
+sctp_half_open_connection_get (u32 conn_index)
+{
+ sctp_connection_t *tc = 0;
+ clib_spinlock_lock_if_init (&sctp_main.half_open_lock);
+ if (!pool_is_free_index (sctp_main.half_open_connections, conn_index))
+ tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index);
+ tc->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = SCTP_PRIMARY_PATH_IDX;
+ clib_spinlock_unlock_if_init (&sctp_main.half_open_lock);
+ return tc;
+}
+
+/**
+ * Cleanup half-open connection
+ *
+ */
+always_inline void
+sctp_half_open_connection_del (sctp_connection_t * tc)
+{
+ sctp_main_t *sctp_main = vnet_get_sctp_main ();
+ clib_spinlock_lock_if_init (&sctp_main->half_open_lock);
+ pool_put_index (sctp_main->half_open_connections,
+ tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index);
+ if (CLIB_DEBUG)
+ clib_memset (tc, 0xFA, sizeof (*tc));
+ clib_spinlock_unlock_if_init (&sctp_main->half_open_lock);
+}
+
+always_inline u64
+sctp_set_time_now (u32 thread_index)
+{
+ sctp_main.time_now[thread_index] = clib_cpu_time_now ()
+ * sctp_main.tstamp_ticks_per_clock;
+ return sctp_main.time_now[thread_index];
+}
+
+always_inline void
+sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
+ u32 interval)
+{
+ ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
+ vlib_get_thread_index ());
+ ASSERT (tc->sub_conn[conn_idx].timers[timer_id] ==
+ SCTP_TIMER_HANDLE_INVALID);
+
+ sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+ sub->timers[timer_id] =
+ tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+ sub->c_c_index, timer_id, interval);
+}
+
+always_inline void
+sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id)
+{
+ ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ());
+ if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID)
+ return;
+
+ sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+
+ tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+ sub->timers[timer_id]);
+ sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID;
+}
+
+/**
+ * Try to cleanup half-open connection
+ *
+ * If called from a thread that doesn't own tc, the call won't have any
+ * effect.
+ *
+ * @param tc - connection to be cleaned up
+ * @return non-zero if cleanup failed.
+ */
+always_inline int
+sctp_half_open_connection_cleanup (sctp_connection_t * tc)
+{
+ /* Make sure this is the owning thread */
+ if (tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index !=
+ vlib_get_thread_index ())
+ return 1;
+ sctp_timer_reset (tc, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T1_INIT);
+ sctp_half_open_connection_del (tc);
+ return 0;
+}
+
+always_inline u32
+sctp_header_bytes ()
+{
+ return sizeof (sctp_header_t);
+}
+
+always_inline sctp_connection_t *
+sctp_get_connection_from_transport (transport_connection_t * tconn)
+{
+ ASSERT (tconn != NULL);
+
+ sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn;
+#if SCTP_ADV_DEBUG
+ if (sub == NULL)
+ SCTP_ADV_DBG ("sub == NULL");
+ if (sub->parent == NULL)
+ SCTP_ADV_DBG ("sub->parent == NULL");
+#endif
+ if (sub->subconn_idx > 0)
+ return (sctp_connection_t *) sub -
+ (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1));
+
+ return (sctp_connection_t *) sub;
+}
+
+always_inline u64
+sctp_time_now (void)
+{
+ return sctp_main.time_now[vlib_get_thread_index ()];
+}
+
+#define ABS(x) ((x) > 0) ? (x) : -(x);
+
+always_inline void
+sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx)
+{
+ /* See RFC4960, 6.3.1. RTO Calculation */
+ u64 RTO = 0;
+ f64 RTTVAR = 0;
+ u64 now = sctp_time_now ();
+ u64 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts;
+ u64 R = prev_ts - now;
+
+ if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO
+ {
+ sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN;
+ return;
+ }
+
+ if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation
+ {
+ sctp_conn->sub_conn[conn_idx].SRTT = R;
+ RTTVAR = R / 2;
+
+ if (RTTVAR == 0)
+ RTTVAR = 100e-3; /* 100 ms */
+
+ sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
+ }
+ else // C3: RTT already exists; let's recalculate
+ {
+ RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR +
+ SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R);
+
+ if (RTTVAR == 0)
+ RTTVAR = 100e-3; /* 100 ms */
+
+ sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR;
+
+ sctp_conn->sub_conn[conn_idx].SRTT =
+ (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT +
+ SCTP_RTO_ALPHA * R;
+ }
+
+ RTO =
+ sctp_conn->sub_conn[conn_idx].SRTT +
+ 4 * sctp_conn->sub_conn[conn_idx].RTTVAR;
+ if (RTO < SCTP_RTO_MIN) // C6
+ RTO = SCTP_RTO_MIN;
+
+ if (RTO > SCTP_RTO_MAX) // C7
+ RTO = SCTP_RTO_MAX;
+
+ sctp_conn->sub_conn[conn_idx].RTO = RTO;
+}
+
+always_inline void
+sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id,
+ u32 interval)
+{
+ ASSERT (tc->sub_conn[conn_idx].connection.thread_index ==
+ vlib_get_thread_index ());
+ sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx];
+
+ if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID)
+ tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+ sub->timers[timer_id]);
+
+ tc->sub_conn[conn_idx].timers[timer_id] =
+ tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index],
+ sub->c_c_index, timer_id, interval);
+}
+
+always_inline sctp_connection_t *
+sctp_listener_get (u32 tli)
+{
+ return pool_elt_at_index (sctp_main.listener_pool, tli);
+}
+
+#endif
+
+always_inline sctp_connection_t *
+sctp_connection_get (u32 conn_index, u32 thread_index)
+{
+ if (PREDICT_FALSE
+ (pool_is_free_index (sctp_main.connections[thread_index], conn_index)))
+ return 0;
+ return pool_elt_at_index (sctp_main.connections[thread_index], conn_index);
+}
+
+#define SELECT_MAX_RETRIES 8
+
+always_inline u8
+sctp_data_subconn_select (sctp_connection_t * sctp_conn)
+{
+ u32 sub = SCTP_PRIMARY_PATH_IDX;
+ u8 i, cwnd = sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].cwnd;
+ for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
+ continue;
+
+ if (sctp_conn->sub_conn[i].cwnd > cwnd)
+ {
+ sub = i;
+ cwnd = sctp_conn->sub_conn[i].cwnd;
+ }
+ }
+ return sub;
+}
+
+always_inline u8
+sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h)
+{
+ u8 i;
+
+ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] ==
+ ip6h->dst_address.as_u64[0] &&
+ sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] ==
+ ip6h->dst_address.as_u64[1] &&
+ sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] ==
+ ip6h->src_address.as_u64[0] &&
+ sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] ==
+ ip6h->src_address.as_u64[1])
+ return i;
+ }
+ clib_warning ("Did not find a sub-connection; defaulting to %u",
+ SCTP_PRIMARY_PATH_IDX);
+ return SCTP_PRIMARY_PATH_IDX;
+}
+
+always_inline u8
+sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h)
+{
+ u8 i;
+
+ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 ==
+ ip4h->dst_address.as_u32
+ && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 ==
+ ip4h->src_address.as_u32)
+ return i;
+ }
+ clib_warning ("Did not find a sub-connection; defaulting to %u",
+ SCTP_PRIMARY_PATH_IDX);
+ return SCTP_PRIMARY_PATH_IDX;
+}
+
+/**
+ * Push SCTP header to buffer
+ *
+ * @param vm - vlib_main
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param sctp_hdr_opts_len - header and options length in bytes
+ *
+ * @return - pointer to start of SCTP header
+ */
+always_inline void *
+vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp,
+ u8 sctp_hdr_opts_len)
+{
+ sctp_full_hdr_t *full_hdr;
+
+ full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len);
+
+ full_hdr->hdr.src_port = sp;
+ full_hdr->hdr.dst_port = dp;
+ full_hdr->hdr.checksum = 0;
+ return full_hdr;
+}
+
+/**
+ * Push SCTP header to buffer
+ *
+ * @param b - buffer to write the header to
+ * @param sp_net - source port net order
+ * @param dp_net - destination port net order
+ * @param sctp_hdr_opts_len - header and options length in bytes
+ *
+ * @return - pointer to start of SCTP header
+ */
+always_inline void *
+vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net,
+ u8 sctp_hdr_opts_len)
+{
+ return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net,
+ sctp_hdr_opts_len);
+}
+
+always_inline u8
+sctp_next_avail_subconn (sctp_connection_t * sctp_conn)
+{
+ u8 i;
+
+ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN)
+ return i;
+ }
+ return MAX_SCTP_CONNECTIONS;
+}
+
+always_inline void
+update_smallest_pmtu_idx (sctp_connection_t * sctp_conn)
+{
+ u8 i;
+ u8 smallest_pmtu_index = SCTP_PRIMARY_PATH_IDX;
+
+ for (i = 1; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN)
+ {
+ if (sctp_conn->sub_conn[i].PMTU <
+ sctp_conn->sub_conn[smallest_pmtu_index].PMTU)
+ smallest_pmtu_index = i;
+ }
+ }
+
+ sctp_conn->smallest_PMTU_idx = smallest_pmtu_index;
+}
+
+/* As per RFC4960; section 7.2.1: Slow-Start */
+always_inline void
+sctp_init_cwnd (sctp_connection_t * sctp_conn)
+{
+ u8 i;
+ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ /* Section 7.2.1; point (1) */
+ sctp_conn->sub_conn[i].cwnd =
+ clib_min (4 * sctp_conn->sub_conn[i].PMTU,
+ clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380));
+
+ /* Section 7.2.1; point (3) */
+ sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH;
+
+ /* Section 7.2.2; point (1) */
+ sctp_conn->sub_conn[i].partially_acked_bytes = 0;
+ }
+}
+
+always_inline u8
+sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx)
+{
+ return 0;
+}
+
+always_inline u8
+cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx)
+{
+ if (sctp_conn->sub_conn[idx].cwnd == 0)
+ return 1;
+ return 0;
+}
+
+/* As per RFC4960; section 7.2.1: Slow-Start */
+always_inline void
+update_cwnd (sctp_connection_t * sctp_conn)
+{
+ u8 i;
+ u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn;
+
+ for (i = 0; i < MAX_SCTP_CONNECTIONS; i++)
+ {
+ /* Section 7.2.1; point (2) */
+ if (sctp_conn->sub_conn[i].is_retransmitting)
+ {
+ sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU;
+ continue;
+ }
+
+ /* Section 7.2.2; point (4) */
+ if (sctp_conn->sub_conn[i].last_data_ts >
+ sctp_time_now () + SCTP_DATA_IDLE_INTERVAL)
+ {
+ sctp_conn->sub_conn[i].cwnd =
+ clib_max (sctp_conn->sub_conn[i].cwnd / 2,
+ 4 * sctp_conn->sub_conn[i].PMTU);
+ continue;
+ }
+
+ /* Section 7.2.1; point (5) */
+ if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh)
+ {
+ if (!cwnd_fully_utilized (sctp_conn, i))
+ continue;
+
+ if (sctp_in_cong_recovery (sctp_conn, i))
+ continue;
+
+ sctp_conn->sub_conn[i].cwnd =
+ clib_min (sctp_conn->sub_conn[i].PMTU, 1);
+ }
+
+ /* Section 6.1; point (D) */
+ if ((inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU) <
+ sctp_conn->sub_conn[i].cwnd)
+ sctp_conn->sub_conn[i].cwnd =
+ inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU;
+ }
+}
+
+/*
+ * fd.io coding-style-patch-verification: ON
+ *
+ * Local Variables:
+ * eval: (c-set-style "gnu")
+ * End:
+ */