From 3ffe6cadf083d1a0bc32e4a37d56b42a1153ff7b Mon Sep 17 00:00:00 2001 From: Florin Coras Date: Wed, 26 Jun 2019 16:27:13 -0700 Subject: sctp: move to plugins, disabled by default Removed sctp buffer metadata from vnet/buffer.h, added it to the plugin. Add registration APIs for plugin-based vlib_buffer_opaque / opaque2 decoders, used by "pcap dispatch trace ..." for display in the wireshark dissector. Type:refactor Not actively maintained. Change-Id: Ie4cb6ba66f68b3b3a7d7d2c63c917fdccf994371 Signed-off-by: Florin Coras Signed-off-by: Dave Barach --- src/plugins/sctp/sctp.h | 1019 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 1019 insertions(+) create mode 100644 src/plugins/sctp/sctp.h (limited to 'src/plugins/sctp/sctp.h') diff --git a/src/plugins/sctp/sctp.h b/src/plugins/sctp/sctp.h new file mode 100644 index 00000000000..a99b01c1c0a --- /dev/null +++ b/src/plugins/sctp/sctp.h @@ -0,0 +1,1019 @@ +/* + * Copyright (c) 2017 SUSE LLC. + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at: + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +#ifndef included_vnet_sctp_h +#define included_vnet_sctp_h + +#include +#include +#include +#include +#include +#include + +/* SCTP buffer opaque definition */ +typedef struct +{ + struct + { + u32 connection_index; + u16 sid; /**< Stream ID */ + u16 ssn; /**< Stream Sequence Number */ + u32 tsn; /**< Transmission Sequence Number */ + u16 hdr_offset; /**< offset relative to ip hdr */ + u16 data_offset; /**< offset relative to ip hdr */ + u16 data_len; /**< data len */ + u8 subconn_idx; /**< index of the sub_connection being used */ + u8 flags; + } sctp; +} sctp_buffer_opaque_t; + +STATIC_ASSERT (sizeof (sctp_buffer_opaque_t) <= + STRUCT_SIZE_OF (vnet_buffer_opaque_t, unused), + "sctp_buffer_opaque_t too large for vnet_buffer_opaque_t"); + +#define sctp_buffer_opaque(b) \ + ((sctp_buffer_opaque_t *)((u8 *)((b)->opaque) + \ +STRUCT_OFFSET_OF (vnet_buffer_opaque_t, unused))) + + +/* SCTP timers */ +#define foreach_sctp_timer \ + _(T1_INIT, "T1_INIT") \ + _(T1_COOKIE, "T1_COOKIE") \ + _(T2_SHUTDOWN, "T2_SHUTDOWN") \ + _(T3_RXTX, "T3_RXTX") \ + _(T4_HEARTBEAT, "T4_HB") \ + _(T5_SHUTDOWN_GUARD, "T5_SHUTDOWN_GUARD") + +typedef enum _sctp_timers +{ +#define _(sym, str) SCTP_TIMER_##sym, + foreach_sctp_timer +#undef _ + SCTP_N_TIMERS +} sctp_timers_e; + +#define SCTP_TIMER_HANDLE_INVALID ((u32) ~0) + +always_inline char * +sctp_timer_to_string (u8 timer_id) +{ + switch (timer_id) + { + case SCTP_TIMER_T1_INIT: + return "SCTP_TIMER_T1_INIT"; + case SCTP_TIMER_T1_COOKIE: + return "SCTP_TIMER_T1_COOKIE"; + case SCTP_TIMER_T2_SHUTDOWN: + return "SCTP_TIMER_T2_SHUTDOWN"; + case SCTP_TIMER_T3_RXTX: + return "SCTP_TIMER_T3_RXTX"; + case SCTP_TIMER_T4_HEARTBEAT: + return "SCTP_TIMER_T4_HEARTBEAT"; + case SCTP_TIMER_T5_SHUTDOWN_GUARD: + return "SCTP_TIMER_T5_SHUTDOWN_GUARD"; + } + return NULL; +} + +typedef enum _sctp_error +{ +#define sctp_error(n,s) SCTP_ERROR_##n, +#include +#undef sctp_error + SCTP_N_ERROR, +} sctp_error_t; + +#define NO_FLAG 0 + +#define IS_T_BIT_SET(var) ((var) & (1)) +#define IS_E_BIT_SET(var) ((var) & (1)) +#define IS_B_BIT_SET(var) ((var) & (1<<1)) +#define IS_U_BIT_SET(var) ((var) & (1<<2)) + +#define MAX_SCTP_CONNECTIONS 8 +#define SCTP_PRIMARY_PATH_IDX 0 + +#if (VLIB_BUFFER_TRACE_TRAJECTORY) +#define sctp_trajectory_add_start(b, start) \ +{ \ + (*vlib_buffer_trace_trajectory_cb) (b, start); \ +} +#else +#define sctp_trajectory_add_start(b, start) +#endif + +enum _sctp_subconn_state +{ + SCTP_SUBCONN_STATE_DOWN = 0, + SCTP_SUBCONN_STATE_UP, + SCTP_SUBCONN_STATE_ALLOW_HB, + SCTP_SUBCONN_AWAITING_SACK, + SCTP_SUBCONN_SACK_RECEIVED +}; + +#define SCTP_INITIAL_SSHTRESH 65535 +typedef struct _sctp_sub_connection +{ + transport_connection_t connection; /**< Common transport data. First! */ + + u8 subconn_idx; /**< This indicates the position of this sub-connection in the super-set container of connections pool */ + u32 error_count; /**< The current error count for this destination. */ + u32 error_threshold; /**< Current error threshold for this destination, + i.e. what value marks the destination down if error count reaches this value. */ + u32 cwnd; /**< Congestion control window (cwnd, in bytes), which is adjusted by + the sender based on observed network conditions. */ + u32 ssthresh; /**< Slow-start threshold (in bytes), which is used by the + sender to distinguish slow-start and congestion avoidance phases. */ + + u64 rtt_ts; /**< USED to hold the timestamp of when the packet has been sent */ + + u32 RTO; /**< The current retransmission timeout value. */ + u64 SRTT; /**< The current smoothed round-trip time. */ + f64 RTTVAR; /**< The current RTT variation. */ + + u32 partially_acked_bytes; /**< The tracking method for increase of cwnd when in + congestion avoidance mode (see Section 7.2.2).*/ + + u8 state; /**< The current state of this destination, i.e., DOWN, UP, ALLOW-HB, NO-HEARTBEAT, etc. */ + + u16 PMTU; /**< The current known path MTU. */ + + u32 timers[SCTP_N_TIMERS]; /**< A timer used by each destination. */ + + u8 RTO_pending; /**< A flag used to track if one of the DATA chunks sent to + this address is currently being used to compute an RTT. + If this flag is 0, the next DATA chunk sent to this destination + should be used to compute an RTT and this flag should be set. + Every time the RTT calculation completes (i.e., the DATA chunk is SACK'd), + clear this flag. */ + + u64 last_seen; /**< The time to which this destination was last sent a packet to. + This can be used to determine if a HEARTBEAT is needed. */ + + u64 last_data_ts; /**< Used to hold the timestamp value of last time we sent a DATA chunk */ + + u8 unacknowledged_hb; /**< Used to track how many unacknowledged heartbeats we had; + If more than SCTP_PATH_MAX_RETRANS then connection is considered unreachable. */ + + u8 is_retransmitting; /**< A flag (0 = no, 1 = yes) indicating whether the connection is retransmitting a previous packet */ + + u8 enqueue_state; /**< if set to 1 indicates that DATA is still being handled hence cannot shutdown this connection yet */ + +} sctp_sub_connection_t; + +typedef struct +{ + u32 a_rwnd; /**< Maximum segment size advertised */ + +} sctp_options_t; + +/* Useful macros to deal with the out_of_order_map (array of bit) */ +#define SET_BIT(A,k) ( A[(k/32)] |= (1 << (k%32)) ) +#define CLEAR_BIT(A,k) ( A[(k/32)] &= ~(1 << (k%32)) ) +#define TEST_BIT(A,k) ( A[(k/32)] & (1 << (k%32)) ) + +always_inline void +_bytes_swap (void *pv, size_t n) +{ + char *p = pv; + size_t lo, hi; + for (lo = 0, hi = n - 1; hi > lo; lo++, hi--) + { + char tmp = p[lo]; + p[lo] = p[hi]; + p[hi] = tmp; + } +} + +#define ENDIANESS_SWAP(x) _bytes_swap(&x, sizeof(x)); + +#define MAX_INFLIGHT_PACKETS 128 +#define MAX_ENQUEABLE_SACKS 2 + +/* This parameter indicates to the receiver how much increment in + * milliseconds the sender wishes the receiver to add to its default + * cookie life-span. + */ +#define SUGGESTED_COOKIE_LIFE_SPAN_INCREMENT 1000 + +typedef struct _sctp_user_configuration +{ + u8 never_delay_sack; + u8 never_bundle; + +} sctp_user_configuration_t; + +typedef struct _sctp_connection +{ + /** Required for pool_get_aligned */ + CLIB_CACHE_LINE_ALIGN_MARK (cacheline0); + + sctp_sub_connection_t sub_conn[MAX_SCTP_CONNECTIONS]; /**< Common transport data. First! */ + sctp_user_configuration_t conn_config; /**< Allows tuning of some SCTP behaviors */ + + u8 state; /**< SCTP state as per sctp_state_t */ + u16 flags; /**< Chunk flag (see sctp_chunks_common_hdr_t) */ + + u32 local_tag; /**< INIT_TAG generated locally */ + u32 remote_tag; /**< INIT_TAG generated by the remote peer */ + + u32 local_initial_tsn; /**< Initial TSN generated locally */ + u32 remote_initial_tsn; /**< Initial TSN generated by the remote-peer */ + + u32 peer_cookie_life_span_increment; + + u32 overall_err_count; /**< The overall association error count. */ + u32 overall_err_treshold; /**< The threshold for this association that if the Overall Error Count + reaches will cause this association to be torn down. */ + + u8 init_retransmit_err; /**< Error counter for the INIT transmission phase */ + + u32 peer_rwnd; /**< Current calculated value of the peer's rwnd. */ + + u32 next_tsn; /**< The next TSN number to be assigned to a new DATA chunk. + This is sent in the INIT or INIT ACK chunk to the peer + and incremented each time a DATA chunk is assigned a + TSN (normally just prior to transmit or during + fragmentation). */ + + u32 last_unacked_tsn; /** < Last TSN number still unacked */ + u32 next_tsn_expected; /**< The next TSN number expected to be received. */ + + u32 last_rcvd_tsn; /**< This is the last TSN received in sequence. This value + is set initially by taking the peer's initial TSN, + received in the INIT or INIT ACK chunk, and + subtracting one from it. */ + + u32 out_of_order_map[MAX_INFLIGHT_PACKETS]; /**< An array of bits or bytes indicating which out-of-order + TSNs have been received (relative to the Last Rcvd TSN). + If no gaps exist, i.e., no out-of-order packets have been received, + this array will be set to all zero. */ + + u8 ack_state; /**< This flag indicates if the next received packet is set to be responded to with a SACK. + This is initialized to 0. When a packet is received it is incremented. + If this value reaches 2 or more, a SACK is sent and the value is reset to 0. + Note: This is used only when no DATA chunks are received out-of-order. + When DATA chunks are out-of-order, SACKs are not delayed (see Section 6). */ + + u8 smallest_PMTU_idx; /** The index of the sub-connection with the smallest PMTU discovered across all peer's transport addresses. */ + + u8 overall_sending_status; /**< 0 indicates first fragment of a user message + 1 indicates normal stream + 2 indicates last fragment of a user message */ + + u8 forming_association_changed; /**< This is a flag indicating whether the original association has been modified during + the life-span of the association itself. For instance, a new sub-connection might have been added. */ + + sctp_state_cookie_param_t cookie_param; /**< Temporary location to save cookie information; it can be used to + when timeout expires and sending again a COOKIE is require. */ + +} sctp_connection_t; + +typedef void (sctp_timer_expiration_handler) (u32 conn_index, u32 timer_id); + +sctp_connection_t *sctp_connection_new (u8 thread_index); + +u8 +sctp_sub_connection_add_ip4 (vlib_main_t * vm, + ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_add_ip6 (vlib_main_t * vm, + ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip4 (ip4_address_t * lcl_addr, + ip4_address_t * rmt_addr); + +u8 +sctp_sub_connection_del_ip6 (ip6_address_t * lcl_addr, + ip6_address_t * rmt_addr); + +u8 sctp_configure (sctp_user_configuration_t config); + +void sctp_connection_close (sctp_connection_t * sctp_conn); +void sctp_connection_cleanup (sctp_connection_t * sctp_conn); +void sctp_connection_del (sctp_connection_t * sctp_conn); + +u32 sctp_push_header (transport_connection_t * tconn, vlib_buffer_t * b); +void sctp_send_init (sctp_connection_t * sctp_conn); +void sctp_send_cookie_echo (sctp_connection_t * sctp_conn); +void sctp_send_shutdown (sctp_connection_t * sctp_conn); +void sctp_send_shutdown_ack (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_send_shutdown_complete (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b0); +void sctp_send_heartbeat (sctp_connection_t * sctp_conn); +void sctp_data_retransmit (sctp_connection_t * sctp_conn); +void sctp_flush_frame_to_output (vlib_main_t * vm, u8 thread_index, + u8 is_ip4); +void sctp_flush_frames_to_output (u8 thread_index); +void sctp_punt_unknown (vlib_main_t * vm, u8 is_ip4, u8 is_add); + +format_function_t format_sctp_state; + +u8 *format_sctp_connection_id (u8 * s, va_list * args); +u8 *format_sctp_connection (u8 * s, va_list * args); +u8 *format_sctp_scoreboard (u8 * s, va_list * args); +u8 *format_sctp_header (u8 * s, va_list * args); +u8 *format_sctp_tx_trace (u8 * s, va_list * args); +unformat_function_t unformat_pg_sctp_header; + +clib_error_t *sctp_init (vlib_main_t * vm); +void sctp_connection_timers_init (sctp_connection_t * sctp_conn); +void sctp_connection_timers_reset (sctp_connection_t * sctp_conn); +void sctp_init_snd_vars (sctp_connection_t * sctp_conn); +void sctp_init_mss (sctp_connection_t * sctp_conn); + +void sctp_prepare_initack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, ip4_address_t * ip4_addr, + u8 add_ip4, ip6_address_t * ip6_addr, + u8 add_ip6); +void sctp_prepare_initack_chunk_for_collision (sctp_connection_t * sctp_conn, + u8 idx, vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_abort_for_collision (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, + ip4_address_t * ip4_addr, + ip6_address_t * ip6_addr); +void sctp_prepare_operation_error (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 err_cause); +void sctp_prepare_cookie_echo_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b, u8 reuse_buffer); +void sctp_prepare_cookie_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_prepare_sack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); +void sctp_prepare_heartbeat_ack_chunk (sctp_connection_t * sctp_conn, u8 idx, + vlib_buffer_t * b); + +u16 sctp_check_outstanding_data_chunks (sctp_connection_t * sctp_conn); + +void sctp_api_reference (void); + +#define IP_PROTOCOL_SCTP 132 + +/** SSCTP FSM state definitions as per RFC4960. */ +#define foreach_sctp_fsm_state \ + _(CLOSED, "CLOSED") \ + _(COOKIE_WAIT, "COOKIE_WAIT") \ + _(COOKIE_ECHOED, "COOKIE_ECHOED") \ + _(ESTABLISHED, "ESTABLISHED") \ + _(SHUTDOWN_PENDING, "SHUTDOWN_PENDING") \ + _(SHUTDOWN_SENT, "SHUTDOWN_SENT") \ + _(SHUTDOWN_RECEIVED, "SHUTDOWN_RECEIVED") \ + _(SHUTDOWN_ACK_SENT, "SHUTDOWN_ACK_SENT") + +typedef enum _sctp_state +{ +#define _(sym, str) SCTP_STATE_##sym, + foreach_sctp_fsm_state +#undef _ + SCTP_N_STATES +} sctp_state_t; + +always_inline char * +sctp_state_to_string (u8 state) +{ + switch (state) + { + case SCTP_STATE_CLOSED: + return "SCTP_STATE_CLOSED"; + case SCTP_STATE_COOKIE_WAIT: + return "SCTP_STATE_COOKIE_WAIT"; + case SCTP_STATE_COOKIE_ECHOED: + return "SCTP_STATE_COOKIE_ECHOED"; + case SCTP_STATE_ESTABLISHED: + return "SCTP_STATE_ESTABLISHED"; + case SCTP_STATE_SHUTDOWN_PENDING: + return "SCTP_STATE_SHUTDOWN_PENDING"; + case SCTP_STATE_SHUTDOWN_SENT: + return "SCTP_STATE_SHUTDOWN_SENT"; + case SCTP_STATE_SHUTDOWN_RECEIVED: + return "SCTP_STATE_SHUTDOWN_RECEIVED"; + case SCTP_STATE_SHUTDOWN_ACK_SENT: + return "SCTP_STATE_SHUTDOWN_ACK_SENT"; + } + return NULL; +} + +always_inline char * +sctp_chunk_to_string (u8 type) +{ + switch (type) + { + case DATA: + return "DATA"; + case INIT: + return "INIT"; + case INIT_ACK: + return "INIT_ACK"; + case SACK: + return "SACK"; + case HEARTBEAT: + return "HEARTBEAT"; + case HEARTBEAT_ACK: + return "HEARTBEAT_ACK"; + case ABORT: + return "ABORT"; + case SHUTDOWN: + return "SHUTDOWN"; + case SHUTDOWN_ACK: + return "SHUTDOWN_ACK"; + case OPERATION_ERROR: + return "OPERATION_ERROR"; + case COOKIE_ECHO: + return "COOKIE_ECHO"; + case COOKIE_ACK: + return "COOKIE_ACK"; + case ECNE: + return "ECNE"; + case CWR: + return "CWR"; + case SHUTDOWN_COMPLETE: + return "SHUTDOWN_COMPLETE"; + } + return NULL; +} + +always_inline char * +sctp_optparam_type_to_string (u8 type) +{ + switch (type) + { + case SCTP_IPV4_ADDRESS_TYPE: + return "SCTP_IPV4_ADDRESS_TYPE"; + case SCTP_IPV6_ADDRESS_TYPE: + return "SCTP_IPV6_ADDRESS_TYPE"; + case SCTP_STATE_COOKIE_TYPE: + return "SCTP_STATE_COOKIE_TYPE"; + case SCTP_UNRECOGNIZED_TYPE: + return "SCTP_UNRECOGNIZED_TYPE"; + case SCTP_COOKIE_PRESERVATIVE_TYPE: + return "SCTP_COOKIE_PRESERVATIVE_TYPE"; + case SCTP_HOSTNAME_ADDRESS_TYPE: + return "SCTP_HOSTNAME_ADDRESS_TYPE"; + case SCTP_SUPPORTED_ADDRESS_TYPES: + return "SCTP_SUPPORTED_ADDRESS_TYPES"; + } + return NULL; +} + +#define SCTP_TICK 0.001 /**< SCTP tick period (s) */ +#define SHZ (u32) (1/SCTP_TICK) /**< SCTP tick frequency */ +#define SCTP_TSTAMP_RESOLUTION SCTP_TICK /**< Time stamp resolution */ + +/* As per RFC4960, page 83 */ +#define SCTP_RTO_INIT 3 * SHZ /* 3 seconds */ +#define SCTP_RTO_MIN 1 * SHZ /* 1 second */ +#define SCTP_RTO_MAX 60 * SHZ /* 60 seconds */ +#define SCTP_RTO_BURST 4 +#define SCTP_RTO_ALPHA 1/8 +#define SCTP_RTO_BETA 1/4 +#define SCTP_VALID_COOKIE_LIFE 60 * SHZ /* 60 seconds */ +#define SCTP_ASSOCIATION_MAX_RETRANS 10 // the overall connection +#define SCTP_PATH_MAX_RETRANS 5 // number of attempts per destination address +#define SCTP_MAX_INIT_RETRANS 8 // number of attempts +#define SCTP_HB_INTERVAL 30 * SHZ +#define SCTP_HB_MAX_BURST 1 +#define SCTP_DATA_IDLE_INTERVAL 15 * SHZ /* 15 seconds; the time-interval after which the connetion is considered IDLE */ +#define SCTP_TO_TIMER_TICK SCTP_TICK*10 /* Period for converting from SCTP_TICK */ + +#define SCTP_CONN_RECOVERY 1 << 1 +#define SCTP_FAST_RECOVERY 1 << 2 + +typedef struct _sctp_lookup_dispatch +{ + u8 next, error; +} sctp_lookup_dispatch_t; + +typedef struct _sctp_main +{ + /* Per-worker thread SCTP connection pools */ + sctp_connection_t **connections; + + /* Pool of listeners. */ + sctp_connection_t *listener_pool; + + /** Dispatch table by state and flags */ + sctp_lookup_dispatch_t dispatch_table[SCTP_N_STATES][64]; + + u8 log2_tstamp_clocks_per_tick; + f64 tstamp_ticks_per_clock; + u64 *time_now; + + /** per-worker tx buffer free lists */ + u32 **tx_buffers; + /** per-worker tx frames to SCTP 4/6 output nodes */ + vlib_frame_t **tx_frames[2]; + /** per-worker tx frames to ip 4/6 lookup nodes */ + vlib_frame_t **ip_lookup_tx_frames[2]; + + /* Per worker-thread timer wheel for connections timers */ + tw_timer_wheel_16t_2w_512sl_t *timer_wheels; + + /* Pool of half-open connections on which we've sent a SYN */ + sctp_connection_t *half_open_connections; + clib_spinlock_t half_open_lock; + + /* TODO: Congestion control algorithms registered */ + /* sctp_cc_algorithm_t *cc_algos; */ + + /* Flag that indicates if stack is on or off */ + u8 is_enabled; + u8 is_init; + + /** Number of preallocated connections */ + u32 preallocated_connections; + + /** Transport table (preallocation) size parameters */ + u32 local_endpoints_table_memory; + u32 local_endpoints_table_buckets; + + /** Vectors of src addresses. Optional unless one needs > 63K active-opens */ + ip4_address_t *ip4_src_addresses; + u32 last_v4_address_rotor; + u32 last_v6_address_rotor; + ip6_address_t *ip6_src_addresses; + + /** vlib buffer size */ + u32 bytes_per_buffer; + + u8 punt_unknown4; + u8 punt_unknown6; + + u32 sctp4_established_phase_node_index; + u32 sctp6_established_phase_node_index; + + u16 msg_id_base; +} sctp_main_t; + +extern sctp_main_t sctp_main; +extern vlib_node_registration_t sctp4_input_node; +extern vlib_node_registration_t sctp6_input_node; +extern vlib_node_registration_t sctp4_output_node; +extern vlib_node_registration_t sctp6_output_node; + +always_inline sctp_main_t * +vnet_get_sctp_main () +{ + return &sctp_main; +} + +always_inline sctp_header_t * +sctp_buffer_hdr (vlib_buffer_t * b) +{ + ASSERT ((signed) b->current_data >= (signed) -VLIB_BUFFER_PRE_DATA_SIZE); + return (sctp_header_t *) (b->data + b->current_data + + sctp_buffer_opaque (b)->sctp.hdr_offset); +} + +clib_error_t *vnet_sctp_enable_disable (vlib_main_t * vm, u8 is_en); +clib_error_t *sctp_plugin_api_hookup (vlib_main_t * vm); + +always_inline sctp_connection_t * +sctp_half_open_connection_get (u32 conn_index) +{ + sctp_connection_t *tc = 0; + clib_spinlock_lock_if_init (&sctp_main.half_open_lock); + if (!pool_is_free_index (sctp_main.half_open_connections, conn_index)) + tc = pool_elt_at_index (sctp_main.half_open_connections, conn_index); + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].subconn_idx = SCTP_PRIMARY_PATH_IDX; + clib_spinlock_unlock_if_init (&sctp_main.half_open_lock); + return tc; +} + +/** + * Cleanup half-open connection + * + */ +always_inline void +sctp_half_open_connection_del (sctp_connection_t * tc) +{ + sctp_main_t *sctp_main = vnet_get_sctp_main (); + clib_spinlock_lock_if_init (&sctp_main->half_open_lock); + pool_put_index (sctp_main->half_open_connections, + tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_c_index); + if (CLIB_DEBUG) + clib_memset (tc, 0xFA, sizeof (*tc)); + clib_spinlock_unlock_if_init (&sctp_main->half_open_lock); +} + +always_inline u64 +sctp_set_time_now (u32 thread_index) +{ + sctp_main.time_now[thread_index] = clib_cpu_time_now () + * sctp_main.tstamp_ticks_per_clock; + return sctp_main.time_now[thread_index]; +} + +always_inline void +sctp_timer_set (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + ASSERT (tc->sub_conn[conn_idx].timers[timer_id] == + SCTP_TIMER_HANDLE_INVALID); + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + sub->timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline void +sctp_timer_reset (sctp_connection_t * tc, u8 conn_idx, u8 timer_id) +{ + ASSERT (tc->sub_conn[conn_idx].c_thread_index == vlib_get_thread_index ()); + if (tc->sub_conn[conn_idx].timers[timer_id] == SCTP_TIMER_HANDLE_INVALID) + return; + + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + sub->timers[timer_id] = SCTP_TIMER_HANDLE_INVALID; +} + +/** + * Try to cleanup half-open connection + * + * If called from a thread that doesn't own tc, the call won't have any + * effect. + * + * @param tc - connection to be cleaned up + * @return non-zero if cleanup failed. + */ +always_inline int +sctp_half_open_connection_cleanup (sctp_connection_t * tc) +{ + /* Make sure this is the owning thread */ + if (tc->sub_conn[SCTP_PRIMARY_PATH_IDX].c_thread_index != + vlib_get_thread_index ()) + return 1; + sctp_timer_reset (tc, SCTP_PRIMARY_PATH_IDX, SCTP_TIMER_T1_INIT); + sctp_half_open_connection_del (tc); + return 0; +} + +always_inline u32 +sctp_header_bytes () +{ + return sizeof (sctp_header_t); +} + +always_inline sctp_connection_t * +sctp_get_connection_from_transport (transport_connection_t * tconn) +{ + ASSERT (tconn != NULL); + + sctp_sub_connection_t *sub = (sctp_sub_connection_t *) tconn; +#if SCTP_ADV_DEBUG + if (sub == NULL) + SCTP_ADV_DBG ("sub == NULL"); + if (sub->parent == NULL) + SCTP_ADV_DBG ("sub->parent == NULL"); +#endif + if (sub->subconn_idx > 0) + return (sctp_connection_t *) sub - + (sizeof (sctp_sub_connection_t) * (sub->subconn_idx - 1)); + + return (sctp_connection_t *) sub; +} + +always_inline u64 +sctp_time_now (void) +{ + return sctp_main.time_now[vlib_get_thread_index ()]; +} + +#define ABS(x) ((x) > 0) ? (x) : -(x); + +always_inline void +sctp_calculate_rto (sctp_connection_t * sctp_conn, u8 conn_idx) +{ + /* See RFC4960, 6.3.1. RTO Calculation */ + u64 RTO = 0; + f64 RTTVAR = 0; + u64 now = sctp_time_now (); + u64 prev_ts = sctp_conn->sub_conn[conn_idx].rtt_ts; + u64 R = prev_ts - now; + + if (sctp_conn->sub_conn[conn_idx].RTO == 0) // C1: Let's initialize our RTO + { + sctp_conn->sub_conn[conn_idx].RTO = SCTP_RTO_MIN; + return; + } + + if (sctp_conn->sub_conn[conn_idx].RTO == SCTP_RTO_MIN && sctp_conn->sub_conn[conn_idx].SRTT == 0) // C2: First RTT calculation + { + sctp_conn->sub_conn[conn_idx].SRTT = R; + RTTVAR = R / 2; + + if (RTTVAR == 0) + RTTVAR = 100e-3; /* 100 ms */ + + sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR; + } + else // C3: RTT already exists; let's recalculate + { + RTTVAR = (1 - SCTP_RTO_BETA) * sctp_conn->sub_conn[conn_idx].RTTVAR + + SCTP_RTO_BETA * ABS (sctp_conn->sub_conn[conn_idx].SRTT - R); + + if (RTTVAR == 0) + RTTVAR = 100e-3; /* 100 ms */ + + sctp_conn->sub_conn[conn_idx].RTTVAR = RTTVAR; + + sctp_conn->sub_conn[conn_idx].SRTT = + (1 - SCTP_RTO_ALPHA) * sctp_conn->sub_conn[conn_idx].SRTT + + SCTP_RTO_ALPHA * R; + } + + RTO = + sctp_conn->sub_conn[conn_idx].SRTT + + 4 * sctp_conn->sub_conn[conn_idx].RTTVAR; + if (RTO < SCTP_RTO_MIN) // C6 + RTO = SCTP_RTO_MIN; + + if (RTO > SCTP_RTO_MAX) // C7 + RTO = SCTP_RTO_MAX; + + sctp_conn->sub_conn[conn_idx].RTO = RTO; +} + +always_inline void +sctp_timer_update (sctp_connection_t * tc, u8 conn_idx, u8 timer_id, + u32 interval) +{ + ASSERT (tc->sub_conn[conn_idx].connection.thread_index == + vlib_get_thread_index ()); + sctp_sub_connection_t *sub = &tc->sub_conn[conn_idx]; + + if (tc->sub_conn[conn_idx].timers[timer_id] != SCTP_TIMER_HANDLE_INVALID) + tw_timer_stop_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->timers[timer_id]); + + tc->sub_conn[conn_idx].timers[timer_id] = + tw_timer_start_16t_2w_512sl (&sctp_main.timer_wheels[sub->c_thread_index], + sub->c_c_index, timer_id, interval); +} + +always_inline sctp_connection_t * +sctp_listener_get (u32 tli) +{ + return pool_elt_at_index (sctp_main.listener_pool, tli); +} + +#endif + +always_inline sctp_connection_t * +sctp_connection_get (u32 conn_index, u32 thread_index) +{ + if (PREDICT_FALSE + (pool_is_free_index (sctp_main.connections[thread_index], conn_index))) + return 0; + return pool_elt_at_index (sctp_main.connections[thread_index], conn_index); +} + +#define SELECT_MAX_RETRIES 8 + +always_inline u8 +sctp_data_subconn_select (sctp_connection_t * sctp_conn) +{ + u32 sub = SCTP_PRIMARY_PATH_IDX; + u8 i, cwnd = sctp_conn->sub_conn[SCTP_PRIMARY_PATH_IDX].cwnd; + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + continue; + + if (sctp_conn->sub_conn[i].cwnd > cwnd) + { + sub = i; + cwnd = sctp_conn->sub_conn[i].cwnd; + } + } + return sub; +} + +always_inline u8 +sctp_sub_conn_id_via_ip6h (sctp_connection_t * sctp_conn, ip6_header_t * ip6h) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[0] == + ip6h->dst_address.as_u64[0] && + sctp_conn->sub_conn[i].connection.lcl_ip.ip6.as_u64[1] == + ip6h->dst_address.as_u64[1] && + sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[0] == + ip6h->src_address.as_u64[0] && + sctp_conn->sub_conn[i].connection.rmt_ip.ip6.as_u64[1] == + ip6h->src_address.as_u64[1]) + return i; + } + clib_warning ("Did not find a sub-connection; defaulting to %u", + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; +} + +always_inline u8 +sctp_sub_conn_id_via_ip4h (sctp_connection_t * sctp_conn, ip4_header_t * ip4h) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].connection.lcl_ip.ip4.as_u32 == + ip4h->dst_address.as_u32 + && sctp_conn->sub_conn[i].connection.rmt_ip.ip4.as_u32 == + ip4h->src_address.as_u32) + return i; + } + clib_warning ("Did not find a sub-connection; defaulting to %u", + SCTP_PRIMARY_PATH_IDX); + return SCTP_PRIMARY_PATH_IDX; +} + +/** + * Push SCTP header to buffer + * + * @param vm - vlib_main + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp_net_order (vlib_buffer_t * b, u16 sp, u16 dp, + u8 sctp_hdr_opts_len) +{ + sctp_full_hdr_t *full_hdr; + + full_hdr = vlib_buffer_push_uninit (b, sctp_hdr_opts_len); + + full_hdr->hdr.src_port = sp; + full_hdr->hdr.dst_port = dp; + full_hdr->hdr.checksum = 0; + return full_hdr; +} + +/** + * Push SCTP header to buffer + * + * @param b - buffer to write the header to + * @param sp_net - source port net order + * @param dp_net - destination port net order + * @param sctp_hdr_opts_len - header and options length in bytes + * + * @return - pointer to start of SCTP header + */ +always_inline void * +vlib_buffer_push_sctp (vlib_buffer_t * b, u16 sp_net, u16 dp_net, + u8 sctp_hdr_opts_len) +{ + return vlib_buffer_push_sctp_net_order (b, sp_net, dp_net, + sctp_hdr_opts_len); +} + +always_inline u8 +sctp_next_avail_subconn (sctp_connection_t * sctp_conn) +{ + u8 i; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state == SCTP_SUBCONN_STATE_DOWN) + return i; + } + return MAX_SCTP_CONNECTIONS; +} + +always_inline void +update_smallest_pmtu_idx (sctp_connection_t * sctp_conn) +{ + u8 i; + u8 smallest_pmtu_index = SCTP_PRIMARY_PATH_IDX; + + for (i = 1; i < MAX_SCTP_CONNECTIONS; i++) + { + if (sctp_conn->sub_conn[i].state != SCTP_SUBCONN_STATE_DOWN) + { + if (sctp_conn->sub_conn[i].PMTU < + sctp_conn->sub_conn[smallest_pmtu_index].PMTU) + smallest_pmtu_index = i; + } + } + + sctp_conn->smallest_PMTU_idx = smallest_pmtu_index; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +sctp_init_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (1) */ + sctp_conn->sub_conn[i].cwnd = + clib_min (4 * sctp_conn->sub_conn[i].PMTU, + clib_max (2 * sctp_conn->sub_conn[i].PMTU, 4380)); + + /* Section 7.2.1; point (3) */ + sctp_conn->sub_conn[i].ssthresh = SCTP_INITIAL_SSHTRESH; + + /* Section 7.2.2; point (1) */ + sctp_conn->sub_conn[i].partially_acked_bytes = 0; + } +} + +always_inline u8 +sctp_in_cong_recovery (sctp_connection_t * sctp_conn, u8 idx) +{ + return 0; +} + +always_inline u8 +cwnd_fully_utilized (sctp_connection_t * sctp_conn, u8 idx) +{ + if (sctp_conn->sub_conn[idx].cwnd == 0) + return 1; + return 0; +} + +/* As per RFC4960; section 7.2.1: Slow-Start */ +always_inline void +update_cwnd (sctp_connection_t * sctp_conn) +{ + u8 i; + u32 inflight = sctp_conn->next_tsn - sctp_conn->last_unacked_tsn; + + for (i = 0; i < MAX_SCTP_CONNECTIONS; i++) + { + /* Section 7.2.1; point (2) */ + if (sctp_conn->sub_conn[i].is_retransmitting) + { + sctp_conn->sub_conn[i].cwnd = 1 * sctp_conn->sub_conn[i].PMTU; + continue; + } + + /* Section 7.2.2; point (4) */ + if (sctp_conn->sub_conn[i].last_data_ts > + sctp_time_now () + SCTP_DATA_IDLE_INTERVAL) + { + sctp_conn->sub_conn[i].cwnd = + clib_max (sctp_conn->sub_conn[i].cwnd / 2, + 4 * sctp_conn->sub_conn[i].PMTU); + continue; + } + + /* Section 7.2.1; point (5) */ + if (sctp_conn->sub_conn[i].cwnd <= sctp_conn->sub_conn[i].ssthresh) + { + if (!cwnd_fully_utilized (sctp_conn, i)) + continue; + + if (sctp_in_cong_recovery (sctp_conn, i)) + continue; + + sctp_conn->sub_conn[i].cwnd = + clib_min (sctp_conn->sub_conn[i].PMTU, 1); + } + + /* Section 6.1; point (D) */ + if ((inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU) < + sctp_conn->sub_conn[i].cwnd) + sctp_conn->sub_conn[i].cwnd = + inflight + SCTP_RTO_BURST * sctp_conn->sub_conn[i].PMTU; + } +} + +/* + * fd.io coding-style-patch-verification: ON + * + * Local Variables: + * eval: (c-set-style "gnu") + * End: + */ -- cgit 1.2.3-korg