diff options
author | Klement Sekera <ksekera@cisco.com> | 2022-01-10 21:57:27 +0000 |
---|---|---|
committer | Ole Tr�an <otroan@employees.org> | 2022-01-24 12:59:46 +0000 |
commit | 56c492aa0502751de2dd9d890096a82c5f04776d (patch) | |
tree | a2b8a1c300853070b26f9953a10bd1a4e41f3bdd /src | |
parent | 4634d02501235d3803a17839eeaf076110abcb18 (diff) |
nat: TCP state tracking based on RFC 7857/RFC 6146
Implement proper state machine based on above RFCs. ACKs to SYNs/FINs
are no longer required/tracked. This is more friendly to peers and
accounts for lost packets and retransmits.
This change also means that all traffic is translated and forwarded
while in transitory timeout, which helps delivering e.g. retransmitted
FINs, FINACKs and other messages.
Also support reopening a session in transitory timeout after seeing both
FINs by seeing both SYNs again. This helps quick connection
reestablishment if the peers want to.
Type: improvement
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Signed-off-by: Miklos Tirpak <miklos.tirpak@gmail.com>
Change-Id: Ibf521c79463472db97e593bfa02b32b4a06dfd2a
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/nat/lib/log.h | 15 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed.c | 47 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed.h | 88 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_api.c | 23 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_cli.c | 166 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_format.c | 26 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_in2out.c | 57 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_inlines.h | 380 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_out2in.c | 54 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/tcp_conn_track.rst | 65 |
10 files changed, 589 insertions, 332 deletions
diff --git a/src/plugins/nat/lib/log.h b/src/plugins/nat/lib/log.h index 26bd93f2589..a82028ed8bf 100644 --- a/src/plugins/nat/lib/log.h +++ b/src/plugins/nat/lib/log.h @@ -21,20 +21,7 @@ #include <vppinfra/elog.h> -#define foreach_nat_log_level \ - _ (0x00, LOG_NONE) \ - _ (0x01, LOG_ERROR) \ - _ (0x02, LOG_WARNING) \ - _ (0x03, LOG_NOTICE) \ - _ (0x04, LOG_INFO) \ - _ (0x05, LOG_DEBUG) - -typedef enum nat_log_level_t_ -{ -#define _(n, f) NAT_##f = n, - foreach_nat_log_level -#undef _ -} nat_log_level_t; +#include <nat/lib/nat_types.api_types.h> #define nat_elog(_pm, _level, _str) \ do \ diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c index d3ef3d54f89..e389a81aab1 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed.c +++ b/src/plugins/nat/nat44-ed/nat44_ed.c @@ -2390,6 +2390,8 @@ nat44_plugin_enable (nat44_config_t c) nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets); + nat44_ed_init_tcp_state_stable (sm); + nat_affinity_enable (); nat_reset_timeouts (&sm->timeouts); @@ -4066,6 +4068,51 @@ nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr, is_twicenat); } +u8 * +format_nat44_ed_tcp_state (u8 *s, va_list *args) +{ + nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e); + switch (e) + { + case NAT44_ED_TCP_STATE_CLOSED: + s = format (s, "closed"); + break; + case NAT44_ED_TCP_STATE_SYN_I2O: + s = format (s, "SYN seen in in2out direction"); + break; + case NAT44_ED_TCP_STATE_SYN_O2I: + s = format (s, "SYN seen in out2in direction"); + break; + case NAT44_ED_TCP_STATE_ESTABLISHED: + s = format (s, "SYN seen in both directions/established"); + break; + case NAT44_ED_TCP_STATE_FIN_I2O: + s = format (s, "FIN seen in in2out direction"); + break; + case NAT44_ED_TCP_STATE_FIN_O2I: + s = format (s, "FIN seen in out2in direction"); + break; + case NAT44_ED_TCP_STATE_RST_TRANS: + s = format (s, "RST seen/transitory timeout"); + break; + case NAT44_ED_TCP_STATE_FIN_TRANS: + s = format (s, "FIN seen in both directions/transitory timeout"); + break; + case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I: + s = format (s, "FIN seen in both directions/transitory timeout/session " + "reopening in out2in direction"); + break; + case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O: + s = format (s, "FIN seen in both directions/transitory timeout/session " + "reopening in in2out direction"); + break; + case NAT44_ED_TCP_N_STATE: + s = format (s, "BUG! unexpected N_STATE! BUG!"); + break; + } + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h index 9772f1ec79a..0706785514b 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed.h +++ b/src/plugins/nat/nat44-ed/nat44_ed.h @@ -105,33 +105,12 @@ typedef enum #undef _ } nat_addr_and_port_alloc_alg_t; -/* Session state */ -#define foreach_snat_session_state \ - _(0, UNKNOWN, "unknown") \ - _(1, UDP_ACTIVE, "udp-active") \ - _(2, TCP_SYN_SENT, "tcp-syn-sent") \ - _(3, TCP_ESTABLISHED, "tcp-established") \ - _(4, TCP_FIN_WAIT, "tcp-fin-wait") \ - _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \ - _(6, TCP_CLOSING, "tcp-closing") \ - _(7, TCP_LAST_ACK, "tcp-last-ack") \ - _(8, TCP_CLOSED, "tcp-closed") \ - _(9, ICMP_ACTIVE, "icmp-active") - -typedef enum -{ -#define _(v, N, s) SNAT_SESSION_##N = v, - foreach_snat_session_state -#undef _ -} snat_session_state_t; - #define foreach_nat_in2out_ed_error \ _ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \ _ (OUT_OF_PORTS, "out of ports") \ _ (BAD_ICMP_TYPE, "unsupported ICMP type") \ _ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ _ (NON_SYN, "non-SYN packet try to create session") \ - _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \ _ (TRNSL_FAILED, "couldn't translate packet") typedef enum @@ -161,15 +140,43 @@ typedef enum NAT_OUT2IN_ED_N_ERROR, } nat_out2in_ed_error_t; +typedef enum +{ + NAT44_ED_TCP_FLAG_NONE = 0, + NAT44_ED_TCP_FLAG_FIN, + NAT44_ED_TCP_FLAG_SYN, + NAT44_ED_TCP_FLAG_SYNFIN, + NAT44_ED_TCP_FLAG_RST, + NAT44_ED_TCP_FLAG_FINRST, + NAT44_ED_TCP_FLAG_SYNRST, + NAT44_ED_TCP_FLAG_SYNFINRST, + NAT44_ED_TCP_N_FLAG, +} nat44_ed_tcp_flag_e; + +typedef enum +{ + NAT44_ED_DIR_I2O = 0, + NAT44_ED_DIR_O2I, + NAT44_ED_N_DIR, +} nat44_ed_dir_e; /* Endpoint dependent TCP session state */ -#define NAT44_SES_I2O_FIN 1 -#define NAT44_SES_O2I_FIN 2 -#define NAT44_SES_I2O_FIN_ACK 4 -#define NAT44_SES_O2I_FIN_ACK 8 -#define NAT44_SES_I2O_SYN 16 -#define NAT44_SES_O2I_SYN 32 -#define NAT44_SES_RST 64 +typedef enum +{ + NAT44_ED_TCP_STATE_CLOSED = 0, + NAT44_ED_TCP_STATE_SYN_I2O, + NAT44_ED_TCP_STATE_SYN_O2I, + NAT44_ED_TCP_STATE_ESTABLISHED, + NAT44_ED_TCP_STATE_FIN_I2O, + NAT44_ED_TCP_STATE_FIN_O2I, + NAT44_ED_TCP_STATE_RST_TRANS, + NAT44_ED_TCP_STATE_FIN_TRANS, + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O, + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I, + NAT44_ED_TCP_N_STATE, +} nat44_ed_tcp_state_e; + +format_function_t format_nat44_ed_tcp_state; /* Session flags */ #define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0) @@ -341,10 +348,7 @@ typedef CLIB_PACKED(struct u16 ext_host_nat_port; /* TCP session state */ - u8 state; - u32 i2o_fin_seq; - u32 o2i_fin_seq; - u64 tcp_closed_timestamp; + nat44_ed_tcp_state_e tcp_state; /* per vrf sessions index */ u32 per_vrf_sessions_index; @@ -668,6 +672,16 @@ typedef struct snat_main_s vnet_main_t *vnet_main; + /* TCP session state machine table: + * first dimension is possible states + * second dimension is direction (in2out/out2in) + * third dimension is TCP flag (SYN, RST, FIN) + * + * value is next state to change to + */ + nat44_ed_tcp_state_e tcp_state_change_table[NAT44_ED_TCP_N_STATE] + [NAT44_ED_N_DIR] + [NAT44_ED_TCP_N_FLAG]; } snat_main_t; typedef struct @@ -789,16 +803,6 @@ nat44_ed_is_interface_outside (snat_interface_t *i) return i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE; } -/** \brief Check if NAT44 endpoint-dependent TCP session is closed. - @param s NAT session - @return true if session is closed -*/ -always_inline bool -nat44_is_ses_closed (snat_session_t *s) -{ - return s->state == 0xf; -} - /** \brief Check if client initiating TCP connection (received SYN from client) @param t TCP header @return true if client initiating TCP connection diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c index 4664fabfec5..6ab3aaa35d9 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_api.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c @@ -31,6 +31,8 @@ #include <nat/nat44-ed/nat44_ed.api_enum.h> #include <nat/nat44-ed/nat44_ed.api_types.h> +#include <nat/nat44-ed/nat44_ed_inlines.h> + #define REPLY_MSG_ID_BASE sm->msg_id_base #include <vlibapi/api_helper_macros.h> @@ -1806,26 +1808,7 @@ send_nat44_user_session_v2_details (snat_session_t *s, rmp->ext_host_nat_port = s->ext_host_nat_port; } - sess_timeout_time = s->last_heard; - switch (s->proto) - { - case IP_PROTOCOL_TCP: - if (s->state) - sess_timeout_time += sm->timeouts.tcp.established; - else - sess_timeout_time += sm->timeouts.tcp.transitory; - break; - case IP_PROTOCOL_UDP: - sess_timeout_time += sm->timeouts.udp; - break; - case IP_PROTOCOL_ICMP: - sess_timeout_time += sm->timeouts.icmp; - break; - default: - sess_timeout_time += sm->timeouts.udp; - break; - } - + sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s); rmp->is_timed_out = (now >= sess_timeout_time); vl_api_send_msg (reg, (u8 *) rmp); diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c index cfd36278674..7693063b8d8 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_cli.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c @@ -478,16 +478,12 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, u64 now = vlib_time_now (vm); u64 sess_timeout_time = 0; - u32 udp_sessions = 0; - u32 tcp_sessions = 0; - u32 icmp_sessions = 0; - u32 other_sessions = 0; - - u32 timed_out = 0; - u32 transitory = 0; - u32 transitory_wait_closed = 0; - u32 transitory_closed = 0; - u32 established = 0; + struct + { + u32 total; + u32 timed_out; + } udp = { 0 }, tcp = { 0 }, tcp_established = { 0 }, tcp_transitory = { 0 }, + icmp = { 0 }, other = { 0 }; u32 fib; @@ -501,43 +497,44 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, { pool_foreach (s, tsm->sessions) { - sess_timeout_time = s->last_heard + - (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time) - timed_out++; - - switch (s->proto) - { - case IP_PROTOCOL_ICMP: - icmp_sessions++; - break; - case IP_PROTOCOL_TCP: - tcp_sessions++; - if (s->state) - { - if (s->tcp_closed_timestamp) - { - if (now >= s->tcp_closed_timestamp) - { - ++transitory_closed; - } - else - { - ++transitory_wait_closed; - } - } - transitory++; - } - else - established++; - break; - case IP_PROTOCOL_UDP: - udp_sessions++; - break; - default: - ++other_sessions; - break; - } + sess_timeout_time = + s->last_heard + (f64) nat44_session_get_timeout (sm, s); + + switch (s->proto) + { + case IP_PROTOCOL_ICMP: + ++icmp.total; + if (now >= sess_timeout_time) + ++icmp.timed_out; + break; + case IP_PROTOCOL_TCP: + ++tcp.total; + if (now >= sess_timeout_time) + ++tcp.timed_out; + if (nat44_ed_tcp_is_established (s->tcp_state)) + { + ++tcp_established.total; + if (now >= sess_timeout_time) + ++tcp_established.timed_out; + } + else + { + ++tcp_transitory.total; + if (now >= sess_timeout_time) + ++tcp_transitory.timed_out; + } + break; + case IP_PROTOCOL_UDP: + ++udp.total; + if (now >= sess_timeout_time) + ++udp.timed_out; + break; + default: + ++other.total; + if (now >= sess_timeout_time) + ++other.timed_out; + break; + } } nat44_show_lru_summary (vm, tsm, now, sess_timeout_time); count += pool_elts (tsm->sessions); @@ -550,39 +547,40 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, { sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time) - timed_out++; switch (s->proto) { case IP_PROTOCOL_ICMP: - icmp_sessions++; + ++icmp.total; + if (now >= sess_timeout_time) + ++icmp.timed_out; break; case IP_PROTOCOL_TCP: - tcp_sessions++; - if (s->state) + ++tcp.total; + if (now >= sess_timeout_time) + ++tcp.timed_out; + if (nat44_ed_tcp_is_established (s->tcp_state)) { - if (s->tcp_closed_timestamp) - { - if (now >= s->tcp_closed_timestamp) - { - ++transitory_closed; - } - else - { - ++transitory_wait_closed; - } - } - transitory++; + ++tcp_established.total; + if (now >= sess_timeout_time) + ++tcp_established.timed_out; } else - established++; + { + ++tcp_transitory.total; + if (now >= sess_timeout_time) + ++tcp_transitory.timed_out; + } break; case IP_PROTOCOL_UDP: - udp_sessions++; + ++udp.total; + if (now >= sess_timeout_time) + ++udp.timed_out; break; default: - ++other_sessions; + ++other.total; + if (now >= sess_timeout_time) + ++other.timed_out; break; } } @@ -590,18 +588,25 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, count = pool_elts (tsm->sessions); } - vlib_cli_output (vm, "total timed out sessions: %u", timed_out); - vlib_cli_output (vm, "total sessions: %u", count); - vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions); - vlib_cli_output (vm, "total tcp established sessions: %u", established); - vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory); - vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u", - transitory_wait_closed); - vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u", - transitory_closed); - vlib_cli_output (vm, "total udp sessions: %u", udp_sessions); - vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions); - vlib_cli_output (vm, "total other sessions: %u", other_sessions); + u32 timed_out = + tcp.timed_out + icmp.timed_out + udp.timed_out + other.timed_out; + vlib_cli_output (vm, "total sessions: %u (timed out: %u)", count, timed_out); + vlib_cli_output (vm, "tcp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", tcp.total, + tcp.timed_out); + vlib_cli_output (vm, " established: %u (timed out: %u)", + tcp_established.total, tcp_established.timed_out); + vlib_cli_output (vm, " transitory: %u (timed out: %u)", + tcp_transitory.total, tcp_transitory.timed_out); + vlib_cli_output (vm, "udp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", udp.total, + udp.timed_out); + vlib_cli_output (vm, "icmp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", icmp.total, + icmp.timed_out); + vlib_cli_output (vm, "other sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", other.total, + other.timed_out); return 0; } @@ -1456,7 +1461,8 @@ print: continue; showed_sessions++; } - vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s); + vlib_cli_output (vm, " %U\n", format_snat_session, sm, tsm, s, + vlib_time_now (vm)); } if (filtering) { diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c index a6f63026897..4598c02208f 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_format.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c @@ -40,29 +40,13 @@ format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args) } u8 * -format_snat_session_state (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - u8 *t = 0; - - switch (i) - { -#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break; - foreach_snat_session_state -#undef _ - default: - t = format (t, "unknown"); - } - s = format (s, "%s", t); - return s; -} - -u8 * format_snat_session (u8 * s, va_list * args) { + snat_main_t *sm = va_arg (*args, snat_main_t *); snat_main_per_thread_data_t *tsm = va_arg (*args, snat_main_per_thread_data_t *); snat_session_t *sess = va_arg (*args, snat_session_t *); + f64 now = va_arg (*args, f64); if (nat44_ed_is_unk_proto (sess->proto)) { @@ -103,8 +87,10 @@ format_snat_session (u8 * s, va_list * args) s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i); s = format (s, " index %llu\n", sess - tsm->sessions); s = format (s, " last heard %.2f\n", sess->last_heard); - s = format (s, " total pkts %d, total bytes %lld\n", - sess->total_pkts, sess->total_bytes); + s = format (s, " timeout in %.2f\n", + nat44_session_get_timeout (sm, sess) - (now - sess->last_heard)); + s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts, + sess->total_bytes); if (nat44_ed_is_session_static (sess)) s = format (s, " static translation\n"); else diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c index 23e0957dabe..99db6010783 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c @@ -49,6 +49,7 @@ typedef struct u8 is_slow_path; u8 translation_via_i2of; u8 lookup_skipped; + u8 tcp_state; } nat_in2out_ed_trace_t; static u8 * @@ -78,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) { if (t->lookup_skipped) { - s = format (s, "\n lookup skipped - cached session index used"); + s = format (s, "\n lookup skipped - cached session index used"); } else { @@ -86,6 +87,11 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) &t->search_key); } } + if (IP_PROTOCOL_TCP == t->i2of.match.proto) + { + s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state, + t->tcp_state); + } return s; } @@ -675,7 +681,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, { if (ip->protocol == IP_PROTOCOL_TCP) { - nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } /* Accounting */ nat44_session_update_counters (s, now, @@ -697,7 +705,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, u16 src_port, u16 dst_port, u32 thread_index, u32 rx_sw_if_index, u32 tx_sw_if_index, - f64 now, int is_multi_worker) + int is_multi_worker) { clib_bihash_kv_16_8_t kv, value; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -715,12 +723,6 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b, s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); - if (nat44_is_ses_closed (s) - && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp)) - { - nat44_ed_free_session_data (sm, s, thread_index, 0); - nat_ed_session_delete (sm, s, thread_index, 1); - } return 1; } @@ -800,7 +802,7 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, - tx_sw_if_index, now, is_multi_worker))) + tx_sw_if_index, is_multi_worker))) { return next; } @@ -1173,22 +1175,6 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, goto trace0; } - if (s0->tcp_closed_timestamp) - { - if (now >= s0->tcp_closed_timestamp) - { - // session is closed, go slow path, freed in slow path - next[0] = def_slow; - } - else - { - // session in transitory timeout, drop - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED]; - next[0] = NAT_NEXT_DROP; - } - goto trace0; - } - // drop if session expired u64 sess_timeout_time; sess_timeout_time = @@ -1239,7 +1225,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, case IP_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp, thread_index, cntr_sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); break; case IP_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp, @@ -1282,6 +1270,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = (&s0->i2o == f); + t->tcp_state = s0->tcp_state; } else { @@ -1426,13 +1415,6 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - - if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) - { - nat44_ed_free_session_data (sm, s0, thread_index, 0); - nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; - } } if (!s0) @@ -1442,7 +1424,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index, - rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker))) + rx_sw_if_index0, tx_sw_if_index0, is_multi_worker))) goto trace0; /* @@ -1496,7 +1478,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, { vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp, thread_index, cntr_sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } else { @@ -1529,6 +1513,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = 1; + t->tcp_state = s0->tcp_state; } else diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h index cb418960249..a13f250cd3b 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h +++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h @@ -24,6 +24,7 @@ #include <vnet/fib/ip4_fib.h> #include <nat/lib/log.h> +#include <nat/lib/ipfix_logging.h> #include <nat/nat44-ed/nat44_ed.h> always_inline void @@ -171,6 +172,26 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0, return 0; } +always_inline int +nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state) +{ + static int lookup[] = { + [NAT44_ED_TCP_STATE_CLOSED] = 0, + [NAT44_ED_TCP_STATE_SYN_I2O] = 0, + [NAT44_ED_TCP_STATE_SYN_O2I] = 0, + [NAT44_ED_TCP_STATE_ESTABLISHED] = 1, + [NAT44_ED_TCP_STATE_FIN_I2O] = 1, + [NAT44_ED_TCP_STATE_FIN_O2I] = 1, + [NAT44_ED_TCP_STATE_RST_TRANS] = 0, + [NAT44_ED_TCP_STATE_FIN_TRANS] = 0, + [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] = 0, + [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] = 0, + [NAT44_ED_TCP_N_STATE] = 0, + }; + ASSERT (state <= ARRAY_LEN (lookup)); + return lookup[state]; +} + always_inline u32 nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s) { @@ -184,10 +205,10 @@ nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s) return sm->timeouts.udp; case IP_PROTOCOL_TCP: { - if (s->state) - return sm->timeouts.tcp.transitory; - else + if (nat44_ed_tcp_is_established (s->tcp_state)) return sm->timeouts.tcp.established; + else + return sm->timeouts.tcp.transitory; } default: return sm->timeouts.udp; @@ -340,8 +361,7 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now, sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time || - (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp)) + if (now >= sess_timeout_time) { nat44_ed_free_session_data (sm, s, thread_index, 0); nat_ed_session_delete (sm, s, thread_index, 0); @@ -701,101 +721,303 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node, } always_inline void -nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses, - vlib_buffer_t *b, u32 thread_index) +nat44_ed_session_reopen (u32 thread_index, snat_session_t *s) { - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; - u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number; - u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number; - if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) - ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) - ses->state = 0; - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && - (ses->state & NAT44_SES_O2I_SYN)) - ses->state = 0; - if (tcp_flags & TCP_FLAG_SYN) - ses->state |= NAT44_SES_I2O_SYN; - if (tcp_flags & TCP_FLAG_FIN) - { - ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number); - ses->state |= NAT44_SES_I2O_FIN; - } - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN)) + nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->proto, + nat44_ed_is_twice_nat_session (s)); + + nat_ipfix_logging_nat44_ses_delete ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto, + s->in2out.port, s->out2in.port, s->in2out.fib_index); + nat_ipfix_logging_nat44_ses_create ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto, + s->in2out.port, s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->proto, 0); + s->total_pkts = 0; + s->total_bytes = 0; +} + +always_inline void +nat44_ed_init_tcp_state_stable (snat_main_t *sm) +{ + /* first make sure whole table is initialised in a way where state + * is not changed, then define special cases */ + nat44_ed_tcp_state_e s; + for (s = 0; s < NAT44_ED_TCP_N_STATE; ++s) { - if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq) + int i; + for (i = 0; i < NAT44_ED_N_DIR; ++i) { - ses->state |= NAT44_SES_O2I_FIN_ACK; - if (nat44_is_ses_closed (ses)) - { // if session is now closed, save the timestamp - ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; - ses->last_lru_update = now; + int j = 0; + for (j = 0; j < NAT44_ED_TCP_N_FLAG; ++j) + { + sm->tcp_state_change_table[s][i][j] = s; } } } - // move the session to proper LRU - if (ses->state) - { - ses->lru_head_index = tsm->tcp_trans_lru_head_index; - } - else - { - ses->lru_head_index = tsm->tcp_estab_lru_head_index; - } - clib_dlist_remove (tsm->lru_pool, ses->lru_index); - clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); + /* CLOSED and any kind of SYN -> HALF-OPEN */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_SYN_O2I; + + /* HALF-OPEN and any kind of SYN in right direction -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + + /* ESTABLISHED and any kind of RST -> RST_TRANS */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_RST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_RST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + + /* ESTABLISHED and any kind of FIN without RST -> HALF-CLOSED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_O2I; + + /* HALF-CLOSED and any kind of FIN -> FIN_TRANS */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + + /* RST_TRANS and anything non-RST -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_NONE] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_NONE] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + + /* FIN_TRANS and any kind of SYN -> HALF-REOPEN */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + + /* HALF-REOPEN and any kind of SYN in right direction -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; } +/* TCP state tracking according to RFC 7857 (and RFC 6146, which is referenced + * by RFC 7857). Our implementation also goes beyond by supporting creation of + * a new session while old session is in transitory timeout after seeing FIN + * packets from both sides. */ always_inline void -nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses, - u8 tcp_flags, u32 tcp_ack_number, - u32 tcp_seq_number, u32 thread_index) +nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index, + nat44_ed_dir_e dir) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) - ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) - ses->state = 0; - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && - (ses->state & NAT44_SES_O2I_SYN)) - ses->state = 0; - if (tcp_flags & TCP_FLAG_SYN) - ses->state |= NAT44_SES_O2I_SYN; - if (tcp_flags & TCP_FLAG_FIN) - { - ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number); - ses->state |= NAT44_SES_O2I_FIN; - } - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN)) + nat44_ed_tcp_flag_e flags = + tcp_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST); + + u8 old_state = ses->tcp_state; + ses->tcp_state = sm->tcp_state_change_table[ses->tcp_state][dir][flags]; + + if (old_state != ses->tcp_state) { - if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq) - ses->state |= NAT44_SES_I2O_FIN_ACK; - if (nat44_is_ses_closed (ses)) - { // if session is now closed, save the timestamp - ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; - ses->last_lru_update = now; + if (nat44_ed_tcp_is_established (ses->tcp_state)) + { + if (NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O == old_state || + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I == old_state) + { + nat44_ed_session_reopen (thread_index, ses); + } + ses->lru_head_index = tsm->tcp_estab_lru_head_index; } + else + { + if (NAT44_ED_TCP_STATE_ESTABLISHED == old_state) + { // need to update last heard otherwise session might get + // immediately timed out if it has been idle longer than + // transitory timeout + ses->last_heard = now; + } + ses->lru_head_index = tsm->tcp_trans_lru_head_index; + } + ses->last_lru_update = now; + clib_dlist_remove (tsm->lru_pool, ses->lru_index); + clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); } - // move the session to proper LRU - if (ses->state) - { - ses->lru_head_index = tsm->tcp_trans_lru_head_index; - } - else - { - ses->lru_head_index = tsm->tcp_estab_lru_head_index; - } - clib_dlist_remove (tsm->lru_pool, ses->lru_index); - clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); +} + +always_inline void +nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index) +{ + return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index, + NAT44_ED_DIR_I2O); +} + +always_inline void +nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index) +{ + return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index, + NAT44_ED_DIR_O2I); } always_inline void nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes, u32 thread_index) { - s->last_heard = now; + if (NAT44_ED_TCP_STATE_RST_TRANS != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_TRANS != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I != s->tcp_state) + { + s->last_heard = now; + } s->total_pkts++; s->total_bytes += bytes; } diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c index 5ad57a17098..40a72122140 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c @@ -41,7 +41,6 @@ typedef enum NAT_ED_SP_REASON_NO_REASON, NAT_ED_SP_REASON_LOOKUP_FAILED, NAT_ED_SP_REASON_VRF_EXPIRED, - NAT_ED_SP_TCP_CLOSED, NAT_ED_SP_SESS_EXPIRED, } nat_slow_path_reason_e; @@ -57,6 +56,7 @@ typedef struct u8 is_slow_path; u8 translation_via_i2of; u8 lookup_skipped; + u8 tcp_state; nat_slow_path_reason_e slow_path_reason; } nat44_ed_out2in_trace_t; @@ -72,8 +72,6 @@ format_slow_path_reason (u8 *s, va_list *args) return format (s, "slow path because lookup failed"); case NAT_ED_SP_REASON_VRF_EXPIRED: return format (s, "slow path because vrf expired"); - case NAT_ED_SP_TCP_CLOSED: - return format (s, "slow path because tcp closed"); case NAT_ED_SP_SESS_EXPIRED: return format (s, "slow path because session expired"); } @@ -107,14 +105,19 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args) { if (t->lookup_skipped) { - s = format (s, "\n lookup skipped - cached session index used"); + s = format (s, "\n lookup skipped - cached session index used"); } else { s = format (s, "\n search key %U", format_ed_session_kvp, &t->search_key); } - s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason); + s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason); + } + if (IP_PROTOCOL_TCP == t->i2of.match.proto) + { + s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state, + t->tcp_state); } return s; @@ -645,10 +648,9 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s, if (ip->protocol == IP_PROTOCOL_TCP) { - tcp_header_t *tcp = ip4_next_header (ip); - nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags, - tcp->ack_number, tcp->seq_number, - thread_index); + nat44_set_tcp_session_state_o2i ( + sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } /* Accounting */ @@ -883,23 +885,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (s0->tcp_closed_timestamp) - { - if (now >= s0->tcp_closed_timestamp) - { - // session is closed, go slow path, freed in slow path - slow_path_reason = NAT_ED_SP_TCP_CLOSED; - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - } - else - { - // session in transitory timeout, drop - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED]; - next[0] = NAT_NEXT_DROP; - } - goto trace0; - } - // drop if session expired u64 sess_timeout_time; sess_timeout_time = @@ -981,10 +966,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, nat44_set_tcp_session_state_o2i (sm, now, s0, vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags, - vnet_buffer (b0)->ip. - reass.tcp_ack_number, - vnet_buffer (b0)->ip. - reass.tcp_seq_number, thread_index); break; case IP_PROTOCOL_UDP: @@ -1028,6 +1009,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = (&s0->i2o == f); + t->tcp_state = s0->tcp_state; } else { @@ -1170,13 +1152,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - - if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) - { - nat44_ed_free_session_data (sm, s0, thread_index, 0); - nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; - } } if (!s0) @@ -1264,10 +1239,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, nat44_set_tcp_session_state_o2i (sm, now, s0, vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags, - vnet_buffer (b0)->ip. - reass.tcp_ack_number, - vnet_buffer (b0)->ip. - reass.tcp_seq_number, thread_index); } else @@ -1300,6 +1271,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, t->session_index = s0 - tsm->sessions; clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->tcp_state = s0->tcp_state; } else { diff --git a/src/plugins/nat/nat44-ed/tcp_conn_track.rst b/src/plugins/nat/nat44-ed/tcp_conn_track.rst new file mode 100644 index 00000000000..faf0dec8b06 --- /dev/null +++ b/src/plugins/nat/nat44-ed/tcp_conn_track.rst @@ -0,0 +1,65 @@ +NAT44ED TCP connection tracking +=============================== + +TCP connection tracking in endpoint-dependent NAT is based on RFC 7857 +and RFC 6146, which RFC 7857 references. + +See RFC 7857 for the original graph - our graph is slightly different, +allowing creation of new session, while an old session is in transitory +timeout after seeing FIN packets from both sides: + +After discussion on vpp-dev and with Andrew Yourtschenko we agreed that +it's friendly behaviour to allow creating a new session while the old +one is closed and in transitory timeout. The alternative means VPP is +insisting that a 5-tuple connection cannot be created while an old one +is finished and timing out. There is no apparent reason why our change +would break anything and we agreed that it could only help users. + +:: + + + +------------transitory timeout----------------+ + | | + | +-------------+ | + | session created---->+ CLOSED | | + | +-------------+ | + | | | | ++-----+ | SYN SYN | +| v v IN2OUT OUT2IN | +| +->session removed | | | +| | ^ ^ ^ ^ ^ v v | +| | | | | | | +-------+ +-------+ | +| | | | | | +----transitory timeout---+SYN_I2O| |SYN_O2I+--+ +| | | | | | +---------+ |-------| |-------| +| | | | | +-transitory---+RST_TRANS| | | +| | | | | timeout +---------+ SYN SYN +| | | | | | ^ OUT2IN IN2OUT +| | | | | | | | | +| | | | | | | v v +| | | | | | | +-----------+ +| | | | | | +--RST----+ESTABLISHED+<-SYN IN2OUT-+ +| | | | | | +-----------+ | +| | | | | +---data pkt-----^ | | | ^ | +| | | | | | | | | | +| | | | +----established timeout---------------+ | | | | +| | | | | | | | +| | | | +-----FIN IN2OUT---------+ | | | +| | | | v | | | +| | | | +-------+ +--FIN OUT2IN----+ | | +| | | +--established---+FIN_I2O| | | | +| | | timeout +-------+ v +-SYN OUT2IN-+ | +| | | | +-------+ | | +| | +----established-------------+FIN_O2I| +--------------+ | +| | timeout | +-------+ |REOPEN_SYN_I2O| +--------------+ +| | | | +--------------+ |REOPEN_SYN_O2I| +| | FIN FIN ^ | +--------------+ +| | OUT2IN IN2OUT | | ^ | +| | | | | | | | +| | v v | | | | +| | +-------------+ | | | | +| +--transitory timeout---+ FIN_TRANS +-SYN IN2OUT-+ | | | +| +-------------+ | | | +| | | | | +| +--------SYN OUT2IN----|-----------+ | +| v | ++------------------transitory timeout-------------------+<-------------+ |