aboutsummaryrefslogtreecommitdiffstats
path: root/vnet/vnet/ip/tcp.h
diff options
context:
space:
mode:
Diffstat (limited to 'vnet/vnet/ip/tcp.h')
-rw-r--r--vnet/vnet/ip/tcp.h396
1 files changed, 396 insertions, 0 deletions
diff --git a/vnet/vnet/ip/tcp.h b/vnet/vnet/ip/tcp.h
new file mode 100644
index 00000000000..98d8e34f0d5
--- /dev/null
+++ b/vnet/vnet/ip/tcp.h
@@ -0,0 +1,396 @@
+/*
+ * Copyright (c) 2015 Cisco and/or its affiliates.
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at:
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+/*
+ * ip/tcp.h: tcp protocol
+ *
+ * Copyright (c) 2011 Eliot Dresselhaus
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ */
+
+#ifndef included_tcp_protocol_h
+#define included_tcp_protocol_h
+
+#include <vppinfra/vector.h>
+
+/* No support for e.g. Altivec. */
+#if defined (__SSE2__)
+#define TCP_HAVE_VEC128
+#endif
+
+typedef union {
+ struct {
+ u16 src, dst;
+ };
+ u32 as_u32;
+} tcp_udp_ports_t;
+
+typedef union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4;
+#endif
+ tcp_udp_ports_t as_ports[4];
+} tcp_udp_ports_x4_t;
+
+typedef struct {
+ union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4;
+#endif
+ ip4_address_t as_ip4_address[4];
+ } src, dst;
+ tcp_udp_ports_x4_t ports;
+} ip4_tcp_udp_address_x4_t;
+
+typedef struct {
+ union {
+#ifdef TCP_HAVE_VEC128
+ u32x4 as_u32x4[4];
+#endif
+ u32 as_u32[4][4];
+ } src, dst;
+ tcp_udp_ports_x4_t ports;
+} ip6_tcp_udp_address_x4_t;
+
+typedef struct {
+ u32 his, ours;
+} tcp_sequence_pair_t;
+
+/* Time stamps saved from options. */
+typedef struct {
+ u32 ours_host_byte_order, his_net_byte_order;
+} tcp_time_stamp_pair_t;
+
+typedef struct {
+ ip4_tcp_udp_address_x4_t address_x4;
+ u32 time_stamps[4];
+} ip4_tcp_udp_address_x4_and_timestamps_t;
+
+typedef struct {
+ ip6_tcp_udp_address_x4_t address_x4;
+ u32 time_stamps[4];
+} ip6_tcp_udp_address_x4_and_timestamps_t;
+
+#define foreach_tcp_connection_state \
+ /* unused */ \
+ _ (unused) \
+ /* Sent SYN-ACK waiting for ACK if he ever feels like sending one. */ \
+ _ (listen_ack_wait) \
+ /* Sent SYN waiting for ACK or RST. */ \
+ _ (connecting) \
+ /* Pseudo-type for established connections. */ \
+ _ (established)
+
+typedef enum {
+#define _(f) TCP_CONNECTION_STATE_##f,
+ foreach_tcp_connection_state
+#undef _
+ TCP_N_CONNECTION_STATE,
+} tcp_connection_state_t;
+
+/* Kept small to fight off syn flood attacks. */
+typedef struct {
+ tcp_sequence_pair_t sequence_numbers;
+
+ tcp_time_stamp_pair_t time_stamps;
+
+ /* segment size and window scale (saved from options
+ or set to defaults). */
+ u16 max_segment_size;
+
+ u8 window_scale;
+
+ tcp_connection_state_t state : 8;
+} tcp_mini_connection_t;
+
+typedef struct {
+ /* Sum and sum^2 of measurements.
+ Used to compute average and RMS. */
+ f64 sum, sum2;
+
+ /* Number of measurements. */
+ f64 count;
+} tcp_round_trip_time_stats_t;
+
+typedef struct {
+ u32 first_buffer_index_this_packet;
+
+ u16 data_ip_checksum;
+
+ u16 n_data_bytes;
+} tcp_tx_packet_t;
+
+typedef struct {
+ tcp_sequence_pair_t sequence_numbers;
+
+ tcp_time_stamp_pair_t time_stamps;
+
+ tcp_tx_packet_t head_packet, tx_tail_packet, write_tail_packet;
+
+ u32 write_tail_buffer_index;
+
+ tcp_round_trip_time_stats_t round_trip_time_stats;
+
+ /* Number of un-acknowledged bytes we've sent. */
+ u32 n_tx_unacked_bytes;
+
+ /* segment size and window scale (saved from options
+ or set to defaults). */
+ u16 max_segment_size;
+
+ /* Window from latest received packet. */
+ u16 his_window;
+
+ u16 my_window;
+
+ u8 his_window_scale;
+
+ u8 my_window_scale;
+
+ /* ip4/ip6 tos/ttl to use for packets we send. */
+ u8 tos, ttl;
+
+ u16 flags;
+#define foreach_tcp_connection_flag \
+ _ (ack_pending) \
+ _ (fin_received) \
+ _ (fin_sent) \
+ _ (application_requested_close)
+
+ u8 listener_opaque[128
+ - 1 * sizeof (tcp_sequence_pair_t)
+ - 1 * sizeof (tcp_time_stamp_pair_t)
+ - 3 * sizeof (tcp_tx_packet_t)
+ - 1 * sizeof (tcp_round_trip_time_stats_t)
+ - 2 * sizeof (u32)
+ - 4 * sizeof (u16)
+ - 4 * sizeof (u8)];
+} tcp_connection_t;
+
+typedef enum {
+ TCP_IP4,
+ TCP_IP6,
+ TCP_N_IP46,
+} tcp_ip_4_or_6_t;
+
+typedef enum {
+#define _(f) LOG2_TCP_CONNECTION_FLAG_##f,
+ foreach_tcp_connection_flag
+#undef _
+ N_TCP_CONNECTION_FLAG,
+#define _(f) TCP_CONNECTION_FLAG_##f = 1 << LOG2_TCP_CONNECTION_FLAG_##f,
+ foreach_tcp_connection_flag
+#undef _
+} tcp_connection_flag_t;
+
+typedef enum {
+ TCP_PACKET_TEMPLATE_SYN,
+ TCP_PACKET_TEMPLATE_SYN_ACK,
+ TCP_PACKET_TEMPLATE_ACK,
+ TCP_PACKET_TEMPLATE_FIN_ACK,
+ TCP_PACKET_TEMPLATE_RST_ACK,
+ TCP_N_PACKET_TEMPLATE,
+} tcp_packet_template_type_t;
+
+typedef struct {
+ vlib_packet_template_t vlib;
+
+ /* TCP checksum of template with zeros for all
+ variable fields. Network byte order. */
+ u16 tcp_checksum_net_byte_order;
+
+ /* IP4 checksum. */
+ u16 ip4_checksum_net_byte_order;
+} tcp_packet_template_t;
+
+typedef struct {
+ u8 log2_n_mini_connection_hash_elts;
+ u8 log2_n_established_connection_hash_elts;
+ u8 is_ip6;
+
+ u32 mini_connection_hash_mask;
+ u32 established_connection_hash_mask;
+
+ uword * established_connection_overflow_hash;
+
+ tcp_mini_connection_t * mini_connections;
+
+ tcp_connection_t * established_connections;
+
+ /* Vector of established connection indices which need ACKs sent. */
+ u32 * connections_pending_acks;
+
+ /* Default valid_local_adjacency_bitmap for listeners who want to listen
+ for a given port in on all interfaces. */
+ uword * default_valid_local_adjacency_bitmap;
+
+ u32 output_node_index;
+
+ tcp_packet_template_t packet_templates[TCP_N_PACKET_TEMPLATE];
+} ip46_tcp_main_t;
+
+#define foreach_tcp_event \
+ /* Received a SYN-ACK after sending a SYN to connect. */ \
+ _ (connection_established) \
+ /* Received a reset (RST) after sending a SYN to connect. */ \
+ _ (connect_failed) \
+ /* Received a FIN from an established connection. */ \
+ _ (fin_received) \
+ _ (connection_closed) \
+ /* Received a reset RST from an established connection. */ \
+ _ (reset_received)
+
+typedef enum {
+#define _(f) TCP_EVENT_##f,
+ foreach_tcp_event
+#undef _
+} tcp_event_type_t;
+
+typedef void (tcp_event_function_t)
+ (u32 * connections,
+ tcp_event_type_t event_type);
+
+typedef struct {
+ /* Bitmap indicating which of local (interface) addresses
+ we should listen on for this destination port. */
+ uword * valid_local_adjacency_bitmap;
+
+ /* Destination tcp/udp port to listen for connections. */
+ u16 dst_port;
+
+ u16 next_index;
+
+ u32 flags;
+
+ /* Connection indices for which event in event_function applies to. */
+ u32 * event_connections[TCP_N_IP46];
+ u32 * eof_connections[TCP_N_IP46];
+ u32 * close_connections[TCP_N_IP46];
+
+ tcp_event_function_t * event_function;
+} tcp_listener_t;
+
+typedef struct {
+ u8 next, error;
+} tcp_lookup_disposition_t;
+
+#define foreach_tcp_timer \
+ /* Used to rank mini connections. */ \
+ _ (mini_connection, 10e-3) \
+ /* Used for timestamps. */ \
+ _ (timestamp, 1e-6)
+
+typedef enum {
+#define _(f,s) TCP_TIMER_##f,
+ foreach_tcp_timer
+#undef _
+ TCP_N_TIMER,
+} tcp_timer_type_t;
+
+typedef struct {
+ ip46_tcp_main_t ip4, ip6;
+
+ /* Array of non-established connections, but soon-to be established connections. */
+ ip4_tcp_udp_address_x4_and_timestamps_t * ip4_mini_connection_address_hash;
+ ip6_tcp_udp_address_x4_and_timestamps_t * ip6_mini_connection_address_hash;
+
+ /* Vector of size log2_n_established_connection_hash_elts plus overflow. */
+ ip4_tcp_udp_address_x4_t * ip4_established_connection_address_hash;
+ ip6_tcp_udp_address_x4_t * ip6_established_connection_address_hash;
+
+ /* Jenkins hash seeds for established and mini hash tables. */
+ u32x4_union_t connection_hash_seeds[2][3];
+ u32x4_union_t connection_hash_masks[2];
+
+ /* Pool of listeners. */
+ tcp_listener_t * listener_pool;
+
+ /* Table mapping destination port to listener index. */
+ u16 * listener_index_by_dst_port;
+
+ tcp_lookup_disposition_t disposition_by_state_and_flags[TCP_N_CONNECTION_STATE][64];
+
+ u8 log2_clocks_per_tick[TCP_N_TIMER];
+
+ f64 secs_per_tick[TCP_N_TIMER];
+
+ /* Holds pointers to default and per-packet TCP options while
+ parsing a TCP packet's options. */
+ tcp_mini_connection_t option_decode_mini_connection_template;
+
+ /* Count of currently established connections. */
+ u32 n_established_connections[TCP_N_IP46];
+
+ u32 tx_buffer_free_list;
+ u32 tx_buffer_free_list_n_buffer_bytes;
+} tcp_main_t;
+
+/* Global TCP main structure. */
+tcp_main_t tcp_main;
+
+typedef struct {
+ /* Listen on this port. */
+ u16 port;
+
+#define TCP_LISTENER_IP4 (1 << 0)
+#define TCP_LISTENER_IP6 (1 << 1)
+ u16 flags;
+
+ /* Next node index for data packets. */
+ u32 data_node_index;
+
+ /* Event function: called on new connections, etc. */
+ tcp_event_function_t * event_function;
+} tcp_listener_registration_t;
+
+uword
+tcp_register_listener (vlib_main_t * vm, tcp_listener_registration_t * r);
+
+always_inline tcp_ip_4_or_6_t
+tcp_connection_is_ip6 (u32 h)
+{ return h & 1; }
+
+always_inline tcp_ip_4_or_6_t
+tcp_connection_handle_set (u32 iest, tcp_ip_4_or_6_t is_ip6)
+{ return is_ip6 + 2*iest; }
+
+always_inline tcp_connection_t *
+tcp_get_connection (u32 connection_handle)
+{
+ u32 iest = connection_handle / 2;
+ tcp_ip_4_or_6_t is_ip6 = tcp_connection_is_ip6 (connection_handle);
+ tcp_main_t * tm = &tcp_main;
+ ip46_tcp_main_t * tm46 = is_ip6 ? &tm->ip6 : &tm->ip4;
+ return vec_elt_at_index (tm46->established_connections, iest);
+}
+
+#endif /* included_tcp_protocol_h */