diff options
Diffstat (limited to 'src/plugins')
-rw-r--r-- | src/plugins/nat/lib/log.h | 15 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed.c | 47 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed.h | 88 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_api.c | 23 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_cli.c | 166 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_format.c | 26 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_in2out.c | 57 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_inlines.h | 380 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/nat44_ed_out2in.c | 54 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ed/tcp_conn_track.rst | 65 |
10 files changed, 589 insertions, 332 deletions
diff --git a/src/plugins/nat/lib/log.h b/src/plugins/nat/lib/log.h index 26bd93f2589..a82028ed8bf 100644 --- a/src/plugins/nat/lib/log.h +++ b/src/plugins/nat/lib/log.h @@ -21,20 +21,7 @@ #include <vppinfra/elog.h> -#define foreach_nat_log_level \ - _ (0x00, LOG_NONE) \ - _ (0x01, LOG_ERROR) \ - _ (0x02, LOG_WARNING) \ - _ (0x03, LOG_NOTICE) \ - _ (0x04, LOG_INFO) \ - _ (0x05, LOG_DEBUG) - -typedef enum nat_log_level_t_ -{ -#define _(n, f) NAT_##f = n, - foreach_nat_log_level -#undef _ -} nat_log_level_t; +#include <nat/lib/nat_types.api_types.h> #define nat_elog(_pm, _level, _str) \ do \ diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c index d3ef3d54f89..e389a81aab1 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed.c +++ b/src/plugins/nat/nat44-ed/nat44_ed.c @@ -2390,6 +2390,8 @@ nat44_plugin_enable (nat44_config_t c) nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets); + nat44_ed_init_tcp_state_stable (sm); + nat_affinity_enable (); nat_reset_timeouts (&sm->timeouts); @@ -4066,6 +4068,51 @@ nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr, is_twicenat); } +u8 * +format_nat44_ed_tcp_state (u8 *s, va_list *args) +{ + nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e); + switch (e) + { + case NAT44_ED_TCP_STATE_CLOSED: + s = format (s, "closed"); + break; + case NAT44_ED_TCP_STATE_SYN_I2O: + s = format (s, "SYN seen in in2out direction"); + break; + case NAT44_ED_TCP_STATE_SYN_O2I: + s = format (s, "SYN seen in out2in direction"); + break; + case NAT44_ED_TCP_STATE_ESTABLISHED: + s = format (s, "SYN seen in both directions/established"); + break; + case NAT44_ED_TCP_STATE_FIN_I2O: + s = format (s, "FIN seen in in2out direction"); + break; + case NAT44_ED_TCP_STATE_FIN_O2I: + s = format (s, "FIN seen in out2in direction"); + break; + case NAT44_ED_TCP_STATE_RST_TRANS: + s = format (s, "RST seen/transitory timeout"); + break; + case NAT44_ED_TCP_STATE_FIN_TRANS: + s = format (s, "FIN seen in both directions/transitory timeout"); + break; + case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I: + s = format (s, "FIN seen in both directions/transitory timeout/session " + "reopening in out2in direction"); + break; + case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O: + s = format (s, "FIN seen in both directions/transitory timeout/session " + "reopening in in2out direction"); + break; + case NAT44_ED_TCP_N_STATE: + s = format (s, "BUG! unexpected N_STATE! BUG!"); + break; + } + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h index 9772f1ec79a..0706785514b 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed.h +++ b/src/plugins/nat/nat44-ed/nat44_ed.h @@ -105,33 +105,12 @@ typedef enum #undef _ } nat_addr_and_port_alloc_alg_t; -/* Session state */ -#define foreach_snat_session_state \ - _(0, UNKNOWN, "unknown") \ - _(1, UDP_ACTIVE, "udp-active") \ - _(2, TCP_SYN_SENT, "tcp-syn-sent") \ - _(3, TCP_ESTABLISHED, "tcp-established") \ - _(4, TCP_FIN_WAIT, "tcp-fin-wait") \ - _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \ - _(6, TCP_CLOSING, "tcp-closing") \ - _(7, TCP_LAST_ACK, "tcp-last-ack") \ - _(8, TCP_CLOSED, "tcp-closed") \ - _(9, ICMP_ACTIVE, "icmp-active") - -typedef enum -{ -#define _(v, N, s) SNAT_SESSION_##N = v, - foreach_snat_session_state -#undef _ -} snat_session_state_t; - #define foreach_nat_in2out_ed_error \ _ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \ _ (OUT_OF_PORTS, "out of ports") \ _ (BAD_ICMP_TYPE, "unsupported ICMP type") \ _ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ _ (NON_SYN, "non-SYN packet try to create session") \ - _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \ _ (TRNSL_FAILED, "couldn't translate packet") typedef enum @@ -161,15 +140,43 @@ typedef enum NAT_OUT2IN_ED_N_ERROR, } nat_out2in_ed_error_t; +typedef enum +{ + NAT44_ED_TCP_FLAG_NONE = 0, + NAT44_ED_TCP_FLAG_FIN, + NAT44_ED_TCP_FLAG_SYN, + NAT44_ED_TCP_FLAG_SYNFIN, + NAT44_ED_TCP_FLAG_RST, + NAT44_ED_TCP_FLAG_FINRST, + NAT44_ED_TCP_FLAG_SYNRST, + NAT44_ED_TCP_FLAG_SYNFINRST, + NAT44_ED_TCP_N_FLAG, +} nat44_ed_tcp_flag_e; + +typedef enum +{ + NAT44_ED_DIR_I2O = 0, + NAT44_ED_DIR_O2I, + NAT44_ED_N_DIR, +} nat44_ed_dir_e; /* Endpoint dependent TCP session state */ -#define NAT44_SES_I2O_FIN 1 -#define NAT44_SES_O2I_FIN 2 -#define NAT44_SES_I2O_FIN_ACK 4 -#define NAT44_SES_O2I_FIN_ACK 8 -#define NAT44_SES_I2O_SYN 16 -#define NAT44_SES_O2I_SYN 32 -#define NAT44_SES_RST 64 +typedef enum +{ + NAT44_ED_TCP_STATE_CLOSED = 0, + NAT44_ED_TCP_STATE_SYN_I2O, + NAT44_ED_TCP_STATE_SYN_O2I, + NAT44_ED_TCP_STATE_ESTABLISHED, + NAT44_ED_TCP_STATE_FIN_I2O, + NAT44_ED_TCP_STATE_FIN_O2I, + NAT44_ED_TCP_STATE_RST_TRANS, + NAT44_ED_TCP_STATE_FIN_TRANS, + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O, + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I, + NAT44_ED_TCP_N_STATE, +} nat44_ed_tcp_state_e; + +format_function_t format_nat44_ed_tcp_state; /* Session flags */ #define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0) @@ -341,10 +348,7 @@ typedef CLIB_PACKED(struct u16 ext_host_nat_port; /* TCP session state */ - u8 state; - u32 i2o_fin_seq; - u32 o2i_fin_seq; - u64 tcp_closed_timestamp; + nat44_ed_tcp_state_e tcp_state; /* per vrf sessions index */ u32 per_vrf_sessions_index; @@ -668,6 +672,16 @@ typedef struct snat_main_s vnet_main_t *vnet_main; + /* TCP session state machine table: + * first dimension is possible states + * second dimension is direction (in2out/out2in) + * third dimension is TCP flag (SYN, RST, FIN) + * + * value is next state to change to + */ + nat44_ed_tcp_state_e tcp_state_change_table[NAT44_ED_TCP_N_STATE] + [NAT44_ED_N_DIR] + [NAT44_ED_TCP_N_FLAG]; } snat_main_t; typedef struct @@ -789,16 +803,6 @@ nat44_ed_is_interface_outside (snat_interface_t *i) return i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE; } -/** \brief Check if NAT44 endpoint-dependent TCP session is closed. - @param s NAT session - @return true if session is closed -*/ -always_inline bool -nat44_is_ses_closed (snat_session_t *s) -{ - return s->state == 0xf; -} - /** \brief Check if client initiating TCP connection (received SYN from client) @param t TCP header @return true if client initiating TCP connection diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c index 4664fabfec5..6ab3aaa35d9 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_api.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c @@ -31,6 +31,8 @@ #include <nat/nat44-ed/nat44_ed.api_enum.h> #include <nat/nat44-ed/nat44_ed.api_types.h> +#include <nat/nat44-ed/nat44_ed_inlines.h> + #define REPLY_MSG_ID_BASE sm->msg_id_base #include <vlibapi/api_helper_macros.h> @@ -1806,26 +1808,7 @@ send_nat44_user_session_v2_details (snat_session_t *s, rmp->ext_host_nat_port = s->ext_host_nat_port; } - sess_timeout_time = s->last_heard; - switch (s->proto) - { - case IP_PROTOCOL_TCP: - if (s->state) - sess_timeout_time += sm->timeouts.tcp.established; - else - sess_timeout_time += sm->timeouts.tcp.transitory; - break; - case IP_PROTOCOL_UDP: - sess_timeout_time += sm->timeouts.udp; - break; - case IP_PROTOCOL_ICMP: - sess_timeout_time += sm->timeouts.icmp; - break; - default: - sess_timeout_time += sm->timeouts.udp; - break; - } - + sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s); rmp->is_timed_out = (now >= sess_timeout_time); vl_api_send_msg (reg, (u8 *) rmp); diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c index cfd36278674..7693063b8d8 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_cli.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c @@ -478,16 +478,12 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, u64 now = vlib_time_now (vm); u64 sess_timeout_time = 0; - u32 udp_sessions = 0; - u32 tcp_sessions = 0; - u32 icmp_sessions = 0; - u32 other_sessions = 0; - - u32 timed_out = 0; - u32 transitory = 0; - u32 transitory_wait_closed = 0; - u32 transitory_closed = 0; - u32 established = 0; + struct + { + u32 total; + u32 timed_out; + } udp = { 0 }, tcp = { 0 }, tcp_established = { 0 }, tcp_transitory = { 0 }, + icmp = { 0 }, other = { 0 }; u32 fib; @@ -501,43 +497,44 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, { pool_foreach (s, tsm->sessions) { - sess_timeout_time = s->last_heard + - (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time) - timed_out++; - - switch (s->proto) - { - case IP_PROTOCOL_ICMP: - icmp_sessions++; - break; - case IP_PROTOCOL_TCP: - tcp_sessions++; - if (s->state) - { - if (s->tcp_closed_timestamp) - { - if (now >= s->tcp_closed_timestamp) - { - ++transitory_closed; - } - else - { - ++transitory_wait_closed; - } - } - transitory++; - } - else - established++; - break; - case IP_PROTOCOL_UDP: - udp_sessions++; - break; - default: - ++other_sessions; - break; - } + sess_timeout_time = + s->last_heard + (f64) nat44_session_get_timeout (sm, s); + + switch (s->proto) + { + case IP_PROTOCOL_ICMP: + ++icmp.total; + if (now >= sess_timeout_time) + ++icmp.timed_out; + break; + case IP_PROTOCOL_TCP: + ++tcp.total; + if (now >= sess_timeout_time) + ++tcp.timed_out; + if (nat44_ed_tcp_is_established (s->tcp_state)) + { + ++tcp_established.total; + if (now >= sess_timeout_time) + ++tcp_established.timed_out; + } + else + { + ++tcp_transitory.total; + if (now >= sess_timeout_time) + ++tcp_transitory.timed_out; + } + break; + case IP_PROTOCOL_UDP: + ++udp.total; + if (now >= sess_timeout_time) + ++udp.timed_out; + break; + default: + ++other.total; + if (now >= sess_timeout_time) + ++other.timed_out; + break; + } } nat44_show_lru_summary (vm, tsm, now, sess_timeout_time); count += pool_elts (tsm->sessions); @@ -550,39 +547,40 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, { sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time) - timed_out++; switch (s->proto) { case IP_PROTOCOL_ICMP: - icmp_sessions++; + ++icmp.total; + if (now >= sess_timeout_time) + ++icmp.timed_out; break; case IP_PROTOCOL_TCP: - tcp_sessions++; - if (s->state) + ++tcp.total; + if (now >= sess_timeout_time) + ++tcp.timed_out; + if (nat44_ed_tcp_is_established (s->tcp_state)) { - if (s->tcp_closed_timestamp) - { - if (now >= s->tcp_closed_timestamp) - { - ++transitory_closed; - } - else - { - ++transitory_wait_closed; - } - } - transitory++; + ++tcp_established.total; + if (now >= sess_timeout_time) + ++tcp_established.timed_out; } else - established++; + { + ++tcp_transitory.total; + if (now >= sess_timeout_time) + ++tcp_transitory.timed_out; + } break; case IP_PROTOCOL_UDP: - udp_sessions++; + ++udp.total; + if (now >= sess_timeout_time) + ++udp.timed_out; break; default: - ++other_sessions; + ++other.total; + if (now >= sess_timeout_time) + ++other.timed_out; break; } } @@ -590,18 +588,25 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input, count = pool_elts (tsm->sessions); } - vlib_cli_output (vm, "total timed out sessions: %u", timed_out); - vlib_cli_output (vm, "total sessions: %u", count); - vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions); - vlib_cli_output (vm, "total tcp established sessions: %u", established); - vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory); - vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u", - transitory_wait_closed); - vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u", - transitory_closed); - vlib_cli_output (vm, "total udp sessions: %u", udp_sessions); - vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions); - vlib_cli_output (vm, "total other sessions: %u", other_sessions); + u32 timed_out = + tcp.timed_out + icmp.timed_out + udp.timed_out + other.timed_out; + vlib_cli_output (vm, "total sessions: %u (timed out: %u)", count, timed_out); + vlib_cli_output (vm, "tcp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", tcp.total, + tcp.timed_out); + vlib_cli_output (vm, " established: %u (timed out: %u)", + tcp_established.total, tcp_established.timed_out); + vlib_cli_output (vm, " transitory: %u (timed out: %u)", + tcp_transitory.total, tcp_transitory.timed_out); + vlib_cli_output (vm, "udp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", udp.total, + udp.timed_out); + vlib_cli_output (vm, "icmp sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", icmp.total, + icmp.timed_out); + vlib_cli_output (vm, "other sessions:"); + vlib_cli_output (vm, " total: %u (timed out: %u)", other.total, + other.timed_out); return 0; } @@ -1456,7 +1461,8 @@ print: continue; showed_sessions++; } - vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s); + vlib_cli_output (vm, " %U\n", format_snat_session, sm, tsm, s, + vlib_time_now (vm)); } if (filtering) { diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c index a6f63026897..4598c02208f 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_format.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c @@ -40,29 +40,13 @@ format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args) } u8 * -format_snat_session_state (u8 * s, va_list * args) -{ - u32 i = va_arg (*args, u32); - u8 *t = 0; - - switch (i) - { -#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break; - foreach_snat_session_state -#undef _ - default: - t = format (t, "unknown"); - } - s = format (s, "%s", t); - return s; -} - -u8 * format_snat_session (u8 * s, va_list * args) { + snat_main_t *sm = va_arg (*args, snat_main_t *); snat_main_per_thread_data_t *tsm = va_arg (*args, snat_main_per_thread_data_t *); snat_session_t *sess = va_arg (*args, snat_session_t *); + f64 now = va_arg (*args, f64); if (nat44_ed_is_unk_proto (sess->proto)) { @@ -103,8 +87,10 @@ format_snat_session (u8 * s, va_list * args) s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i); s = format (s, " index %llu\n", sess - tsm->sessions); s = format (s, " last heard %.2f\n", sess->last_heard); - s = format (s, " total pkts %d, total bytes %lld\n", - sess->total_pkts, sess->total_bytes); + s = format (s, " timeout in %.2f\n", + nat44_session_get_timeout (sm, sess) - (now - sess->last_heard)); + s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts, + sess->total_bytes); if (nat44_ed_is_session_static (sess)) s = format (s, " static translation\n"); else diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c index 23e0957dabe..99db6010783 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c @@ -49,6 +49,7 @@ typedef struct u8 is_slow_path; u8 translation_via_i2of; u8 lookup_skipped; + u8 tcp_state; } nat_in2out_ed_trace_t; static u8 * @@ -78,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) { if (t->lookup_skipped) { - s = format (s, "\n lookup skipped - cached session index used"); + s = format (s, "\n lookup skipped - cached session index used"); } else { @@ -86,6 +87,11 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) &t->search_key); } } + if (IP_PROTOCOL_TCP == t->i2of.match.proto) + { + s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state, + t->tcp_state); + } return s; } @@ -675,7 +681,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, { if (ip->protocol == IP_PROTOCOL_TCP) { - nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } /* Accounting */ nat44_session_update_counters (s, now, @@ -697,7 +705,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, u16 src_port, u16 dst_port, u32 thread_index, u32 rx_sw_if_index, u32 tx_sw_if_index, - f64 now, int is_multi_worker) + int is_multi_worker) { clib_bihash_kv_16_8_t kv, value; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; @@ -715,12 +723,6 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b, s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); - if (nat44_is_ses_closed (s) - && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp)) - { - nat44_ed_free_session_data (sm, s, thread_index, 0); - nat_ed_session_delete (sm, s, thread_index, 1); - } return 1; } @@ -800,7 +802,7 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, { if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, - tx_sw_if_index, now, is_multi_worker))) + tx_sw_if_index, is_multi_worker))) { return next; } @@ -1173,22 +1175,6 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, goto trace0; } - if (s0->tcp_closed_timestamp) - { - if (now >= s0->tcp_closed_timestamp) - { - // session is closed, go slow path, freed in slow path - next[0] = def_slow; - } - else - { - // session in transitory timeout, drop - b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED]; - next[0] = NAT_NEXT_DROP; - } - goto trace0; - } - // drop if session expired u64 sess_timeout_time; sess_timeout_time = @@ -1239,7 +1225,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, case IP_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp, thread_index, cntr_sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); break; case IP_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp, @@ -1282,6 +1270,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = (&s0->i2o == f); + t->tcp_state = s0->tcp_state; } else { @@ -1426,13 +1415,6 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - - if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) - { - nat44_ed_free_session_data (sm, s0, thread_index, 0); - nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; - } } if (!s0) @@ -1442,7 +1424,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port, vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index, - rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker))) + rx_sw_if_index0, tx_sw_if_index0, is_multi_worker))) goto trace0; /* @@ -1496,7 +1478,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, { vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp, thread_index, cntr_sw_if_index0, 1); - nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); + nat44_set_tcp_session_state_i2o ( + sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } else { @@ -1529,6 +1513,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = 1; + t->tcp_state = s0->tcp_state; } else diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h index cb418960249..a13f250cd3b 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h +++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h @@ -24,6 +24,7 @@ #include <vnet/fib/ip4_fib.h> #include <nat/lib/log.h> +#include <nat/lib/ipfix_logging.h> #include <nat/nat44-ed/nat44_ed.h> always_inline void @@ -171,6 +172,26 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0, return 0; } +always_inline int +nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state) +{ + static int lookup[] = { + [NAT44_ED_TCP_STATE_CLOSED] = 0, + [NAT44_ED_TCP_STATE_SYN_I2O] = 0, + [NAT44_ED_TCP_STATE_SYN_O2I] = 0, + [NAT44_ED_TCP_STATE_ESTABLISHED] = 1, + [NAT44_ED_TCP_STATE_FIN_I2O] = 1, + [NAT44_ED_TCP_STATE_FIN_O2I] = 1, + [NAT44_ED_TCP_STATE_RST_TRANS] = 0, + [NAT44_ED_TCP_STATE_FIN_TRANS] = 0, + [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] = 0, + [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] = 0, + [NAT44_ED_TCP_N_STATE] = 0, + }; + ASSERT (state <= ARRAY_LEN (lookup)); + return lookup[state]; +} + always_inline u32 nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s) { @@ -184,10 +205,10 @@ nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s) return sm->timeouts.udp; case IP_PROTOCOL_TCP: { - if (s->state) - return sm->timeouts.tcp.transitory; - else + if (nat44_ed_tcp_is_established (s->tcp_state)) return sm->timeouts.tcp.established; + else + return sm->timeouts.tcp.transitory; } default: return sm->timeouts.udp; @@ -340,8 +361,7 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now, sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (now >= sess_timeout_time || - (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp)) + if (now >= sess_timeout_time) { nat44_ed_free_session_data (sm, s, thread_index, 0); nat_ed_session_delete (sm, s, thread_index, 0); @@ -701,101 +721,303 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node, } always_inline void -nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses, - vlib_buffer_t *b, u32 thread_index) +nat44_ed_session_reopen (u32 thread_index, snat_session_t *s) { - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags; - u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number; - u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number; - if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) - ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) - ses->state = 0; - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && - (ses->state & NAT44_SES_O2I_SYN)) - ses->state = 0; - if (tcp_flags & TCP_FLAG_SYN) - ses->state |= NAT44_SES_I2O_SYN; - if (tcp_flags & TCP_FLAG_FIN) - { - ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number); - ses->state |= NAT44_SES_I2O_FIN; - } - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN)) + nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->proto, + nat44_ed_is_twice_nat_session (s)); + + nat_ipfix_logging_nat44_ses_delete ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto, + s->in2out.port, s->out2in.port, s->in2out.fib_index); + nat_ipfix_logging_nat44_ses_create ( + thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto, + s->in2out.port, s->out2in.port, s->in2out.fib_index); + + nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr, + s->in2out.port, &s->ext_host_nat_addr, + s->ext_host_nat_port, &s->out2in.addr, s->out2in.port, + &s->ext_host_addr, s->ext_host_port, s->proto, 0); + s->total_pkts = 0; + s->total_bytes = 0; +} + +always_inline void +nat44_ed_init_tcp_state_stable (snat_main_t *sm) +{ + /* first make sure whole table is initialised in a way where state + * is not changed, then define special cases */ + nat44_ed_tcp_state_e s; + for (s = 0; s < NAT44_ED_TCP_N_STATE; ++s) { - if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq) + int i; + for (i = 0; i < NAT44_ED_N_DIR; ++i) { - ses->state |= NAT44_SES_O2I_FIN_ACK; - if (nat44_is_ses_closed (ses)) - { // if session is now closed, save the timestamp - ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; - ses->last_lru_update = now; + int j = 0; + for (j = 0; j < NAT44_ED_TCP_N_FLAG; ++j) + { + sm->tcp_state_change_table[s][i][j] = s; } } } - // move the session to proper LRU - if (ses->state) - { - ses->lru_head_index = tsm->tcp_trans_lru_head_index; - } - else - { - ses->lru_head_index = tsm->tcp_estab_lru_head_index; - } - clib_dlist_remove (tsm->lru_pool, ses->lru_index); - clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); + /* CLOSED and any kind of SYN -> HALF-OPEN */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_SYN_O2I; + + /* HALF-OPEN and any kind of SYN in right direction -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + + /* ESTABLISHED and any kind of RST -> RST_TRANS */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_RST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_RST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_RST_TRANS; + + /* ESTABLISHED and any kind of FIN without RST -> HALF-CLOSED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_O2I; + + /* HALF-CLOSED and any kind of FIN -> FIN_TRANS */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_TRANS; + + /* RST_TRANS and anything non-RST -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_NONE] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_NONE] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_FIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + + /* FIN_TRANS and any kind of SYN -> HALF-REOPEN */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I] + [NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I; + + /* HALF-REOPEN and any kind of SYN in right direction -> ESTABLISHED */ + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFIN] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] + [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; + sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] + [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFINRST] = + NAT44_ED_TCP_STATE_ESTABLISHED; } +/* TCP state tracking according to RFC 7857 (and RFC 6146, which is referenced + * by RFC 7857). Our implementation also goes beyond by supporting creation of + * a new session while old session is in transitory timeout after seeing FIN + * packets from both sides. */ always_inline void -nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses, - u8 tcp_flags, u32 tcp_ack_number, - u32 tcp_seq_number, u32 thread_index) +nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index, + nat44_ed_dir_e dir) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST)) - ses->state = NAT44_SES_RST; - if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST)) - ses->state = 0; - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) && - (ses->state & NAT44_SES_O2I_SYN)) - ses->state = 0; - if (tcp_flags & TCP_FLAG_SYN) - ses->state |= NAT44_SES_O2I_SYN; - if (tcp_flags & TCP_FLAG_FIN) - { - ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number); - ses->state |= NAT44_SES_O2I_FIN; - } - if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN)) + nat44_ed_tcp_flag_e flags = + tcp_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST); + + u8 old_state = ses->tcp_state; + ses->tcp_state = sm->tcp_state_change_table[ses->tcp_state][dir][flags]; + + if (old_state != ses->tcp_state) { - if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq) - ses->state |= NAT44_SES_I2O_FIN_ACK; - if (nat44_is_ses_closed (ses)) - { // if session is now closed, save the timestamp - ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory; - ses->last_lru_update = now; + if (nat44_ed_tcp_is_established (ses->tcp_state)) + { + if (NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O == old_state || + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I == old_state) + { + nat44_ed_session_reopen (thread_index, ses); + } + ses->lru_head_index = tsm->tcp_estab_lru_head_index; } + else + { + if (NAT44_ED_TCP_STATE_ESTABLISHED == old_state) + { // need to update last heard otherwise session might get + // immediately timed out if it has been idle longer than + // transitory timeout + ses->last_heard = now; + } + ses->lru_head_index = tsm->tcp_trans_lru_head_index; + } + ses->last_lru_update = now; + clib_dlist_remove (tsm->lru_pool, ses->lru_index); + clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); } - // move the session to proper LRU - if (ses->state) - { - ses->lru_head_index = tsm->tcp_trans_lru_head_index; - } - else - { - ses->lru_head_index = tsm->tcp_estab_lru_head_index; - } - clib_dlist_remove (tsm->lru_pool, ses->lru_index); - clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index); +} + +always_inline void +nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index) +{ + return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index, + NAT44_ED_DIR_I2O); +} + +always_inline void +nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses, + u8 tcp_flags, u32 thread_index) +{ + return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index, + NAT44_ED_DIR_O2I); } always_inline void nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes, u32 thread_index) { - s->last_heard = now; + if (NAT44_ED_TCP_STATE_RST_TRANS != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_TRANS != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O != s->tcp_state && + NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I != s->tcp_state) + { + s->last_heard = now; + } s->total_pkts++; s->total_bytes += bytes; } diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c index 5ad57a17098..40a72122140 100644 --- a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c +++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c @@ -41,7 +41,6 @@ typedef enum NAT_ED_SP_REASON_NO_REASON, NAT_ED_SP_REASON_LOOKUP_FAILED, NAT_ED_SP_REASON_VRF_EXPIRED, - NAT_ED_SP_TCP_CLOSED, NAT_ED_SP_SESS_EXPIRED, } nat_slow_path_reason_e; @@ -57,6 +56,7 @@ typedef struct u8 is_slow_path; u8 translation_via_i2of; u8 lookup_skipped; + u8 tcp_state; nat_slow_path_reason_e slow_path_reason; } nat44_ed_out2in_trace_t; @@ -72,8 +72,6 @@ format_slow_path_reason (u8 *s, va_list *args) return format (s, "slow path because lookup failed"); case NAT_ED_SP_REASON_VRF_EXPIRED: return format (s, "slow path because vrf expired"); - case NAT_ED_SP_TCP_CLOSED: - return format (s, "slow path because tcp closed"); case NAT_ED_SP_SESS_EXPIRED: return format (s, "slow path because session expired"); } @@ -107,14 +105,19 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args) { if (t->lookup_skipped) { - s = format (s, "\n lookup skipped - cached session index used"); + s = format (s, "\n lookup skipped - cached session index used"); } else { s = format (s, "\n search key %U", format_ed_session_kvp, &t->search_key); } - s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason); + s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason); + } + if (IP_PROTOCOL_TCP == t->i2of.match.proto) + { + s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state, + t->tcp_state); } return s; @@ -645,10 +648,9 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s, if (ip->protocol == IP_PROTOCOL_TCP) { - tcp_header_t *tcp = ip4_next_header (ip); - nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags, - tcp->ack_number, tcp->seq_number, - thread_index); + nat44_set_tcp_session_state_o2i ( + sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags, + thread_index); } /* Accounting */ @@ -883,23 +885,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - if (s0->tcp_closed_timestamp) - { - if (now >= s0->tcp_closed_timestamp) - { - // session is closed, go slow path, freed in slow path - slow_path_reason = NAT_ED_SP_TCP_CLOSED; - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - } - else - { - // session in transitory timeout, drop - b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED]; - next[0] = NAT_NEXT_DROP; - } - goto trace0; - } - // drop if session expired u64 sess_timeout_time; sess_timeout_time = @@ -981,10 +966,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, nat44_set_tcp_session_state_o2i (sm, now, s0, vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags, - vnet_buffer (b0)->ip. - reass.tcp_ack_number, - vnet_buffer (b0)->ip. - reass.tcp_seq_number, thread_index); break; case IP_PROTOCOL_UDP: @@ -1028,6 +1009,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); t->translation_via_i2of = (&s0->i2o == f); + t->tcp_state = s0->tcp_state; } else { @@ -1170,13 +1152,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - - if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp) - { - nat44_ed_free_session_data (sm, s0, thread_index, 0); - nat_ed_session_delete (sm, s0, thread_index, 1); - s0 = NULL; - } } if (!s0) @@ -1264,10 +1239,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, nat44_set_tcp_session_state_o2i (sm, now, s0, vnet_buffer (b0)->ip. reass.icmp_type_or_tcp_flags, - vnet_buffer (b0)->ip. - reass.tcp_ack_number, - vnet_buffer (b0)->ip. - reass.tcp_seq_number, thread_index); } else @@ -1300,6 +1271,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, t->session_index = s0 - tsm->sessions; clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->tcp_state = s0->tcp_state; } else { diff --git a/src/plugins/nat/nat44-ed/tcp_conn_track.rst b/src/plugins/nat/nat44-ed/tcp_conn_track.rst new file mode 100644 index 00000000000..faf0dec8b06 --- /dev/null +++ b/src/plugins/nat/nat44-ed/tcp_conn_track.rst @@ -0,0 +1,65 @@ +NAT44ED TCP connection tracking +=============================== + +TCP connection tracking in endpoint-dependent NAT is based on RFC 7857 +and RFC 6146, which RFC 7857 references. + +See RFC 7857 for the original graph - our graph is slightly different, +allowing creation of new session, while an old session is in transitory +timeout after seeing FIN packets from both sides: + +After discussion on vpp-dev and with Andrew Yourtschenko we agreed that +it's friendly behaviour to allow creating a new session while the old +one is closed and in transitory timeout. The alternative means VPP is +insisting that a 5-tuple connection cannot be created while an old one +is finished and timing out. There is no apparent reason why our change +would break anything and we agreed that it could only help users. + +:: + + + +------------transitory timeout----------------+ + | | + | +-------------+ | + | session created---->+ CLOSED | | + | +-------------+ | + | | | | ++-----+ | SYN SYN | +| v v IN2OUT OUT2IN | +| +->session removed | | | +| | ^ ^ ^ ^ ^ v v | +| | | | | | | +-------+ +-------+ | +| | | | | | +----transitory timeout---+SYN_I2O| |SYN_O2I+--+ +| | | | | | +---------+ |-------| |-------| +| | | | | +-transitory---+RST_TRANS| | | +| | | | | timeout +---------+ SYN SYN +| | | | | | ^ OUT2IN IN2OUT +| | | | | | | | | +| | | | | | | v v +| | | | | | | +-----------+ +| | | | | | +--RST----+ESTABLISHED+<-SYN IN2OUT-+ +| | | | | | +-----------+ | +| | | | | +---data pkt-----^ | | | ^ | +| | | | | | | | | | +| | | | +----established timeout---------------+ | | | | +| | | | | | | | +| | | | +-----FIN IN2OUT---------+ | | | +| | | | v | | | +| | | | +-------+ +--FIN OUT2IN----+ | | +| | | +--established---+FIN_I2O| | | | +| | | timeout +-------+ v +-SYN OUT2IN-+ | +| | | | +-------+ | | +| | +----established-------------+FIN_O2I| +--------------+ | +| | timeout | +-------+ |REOPEN_SYN_I2O| +--------------+ +| | | | +--------------+ |REOPEN_SYN_O2I| +| | FIN FIN ^ | +--------------+ +| | OUT2IN IN2OUT | | ^ | +| | | | | | | | +| | v v | | | | +| | +-------------+ | | | | +| +--transitory timeout---+ FIN_TRANS +-SYN IN2OUT-+ | | | +| +-------------+ | | | +| | | | | +| +--------SYN OUT2IN----|-----------+ | +| v | ++------------------transitory timeout-------------------+<-------------+ |