summaryrefslogtreecommitdiffstats
path: root/src/plugins/nat
diff options
context:
space:
mode:
Diffstat (limited to 'src/plugins/nat')
-rw-r--r--src/plugins/nat/lib/log.h15
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.c47
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed.h88
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_api.c23
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_cli.c166
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_format.c26
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_in2out.c57
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_inlines.h380
-rw-r--r--src/plugins/nat/nat44-ed/nat44_ed_out2in.c54
-rw-r--r--src/plugins/nat/nat44-ed/tcp_conn_track.rst65
10 files changed, 589 insertions, 332 deletions
diff --git a/src/plugins/nat/lib/log.h b/src/plugins/nat/lib/log.h
index 26bd93f2589..a82028ed8bf 100644
--- a/src/plugins/nat/lib/log.h
+++ b/src/plugins/nat/lib/log.h
@@ -21,20 +21,7 @@
#include <vppinfra/elog.h>
-#define foreach_nat_log_level \
- _ (0x00, LOG_NONE) \
- _ (0x01, LOG_ERROR) \
- _ (0x02, LOG_WARNING) \
- _ (0x03, LOG_NOTICE) \
- _ (0x04, LOG_INFO) \
- _ (0x05, LOG_DEBUG)
-
-typedef enum nat_log_level_t_
-{
-#define _(n, f) NAT_##f = n,
- foreach_nat_log_level
-#undef _
-} nat_log_level_t;
+#include <nat/lib/nat_types.api_types.h>
#define nat_elog(_pm, _level, _str) \
do \
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.c b/src/plugins/nat/nat44-ed/nat44_ed.c
index d3ef3d54f89..e389a81aab1 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed.c
@@ -2390,6 +2390,8 @@ nat44_plugin_enable (nat44_config_t c)
nat44_ed_db_init (sm->max_translations_per_thread, sm->translation_buckets);
+ nat44_ed_init_tcp_state_stable (sm);
+
nat_affinity_enable ();
nat_reset_timeouts (&sm->timeouts);
@@ -4066,6 +4068,51 @@ nat_syslog_nat44_sdel (u32 ssubix, u32 sfibix, ip4_address_t *isaddr,
is_twicenat);
}
+u8 *
+format_nat44_ed_tcp_state (u8 *s, va_list *args)
+{
+ nat44_ed_tcp_state_e e = va_arg (*args, nat44_ed_tcp_state_e);
+ switch (e)
+ {
+ case NAT44_ED_TCP_STATE_CLOSED:
+ s = format (s, "closed");
+ break;
+ case NAT44_ED_TCP_STATE_SYN_I2O:
+ s = format (s, "SYN seen in in2out direction");
+ break;
+ case NAT44_ED_TCP_STATE_SYN_O2I:
+ s = format (s, "SYN seen in out2in direction");
+ break;
+ case NAT44_ED_TCP_STATE_ESTABLISHED:
+ s = format (s, "SYN seen in both directions/established");
+ break;
+ case NAT44_ED_TCP_STATE_FIN_I2O:
+ s = format (s, "FIN seen in in2out direction");
+ break;
+ case NAT44_ED_TCP_STATE_FIN_O2I:
+ s = format (s, "FIN seen in out2in direction");
+ break;
+ case NAT44_ED_TCP_STATE_RST_TRANS:
+ s = format (s, "RST seen/transitory timeout");
+ break;
+ case NAT44_ED_TCP_STATE_FIN_TRANS:
+ s = format (s, "FIN seen in both directions/transitory timeout");
+ break;
+ case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I:
+ s = format (s, "FIN seen in both directions/transitory timeout/session "
+ "reopening in out2in direction");
+ break;
+ case NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O:
+ s = format (s, "FIN seen in both directions/transitory timeout/session "
+ "reopening in in2out direction");
+ break;
+ case NAT44_ED_TCP_N_STATE:
+ s = format (s, "BUG! unexpected N_STATE! BUG!");
+ break;
+ }
+ return s;
+}
+
/*
* fd.io coding-style-patch-verification: ON
*
diff --git a/src/plugins/nat/nat44-ed/nat44_ed.h b/src/plugins/nat/nat44-ed/nat44_ed.h
index 9772f1ec79a..0706785514b 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed.h
@@ -105,33 +105,12 @@ typedef enum
#undef _
} nat_addr_and_port_alloc_alg_t;
-/* Session state */
-#define foreach_snat_session_state \
- _(0, UNKNOWN, "unknown") \
- _(1, UDP_ACTIVE, "udp-active") \
- _(2, TCP_SYN_SENT, "tcp-syn-sent") \
- _(3, TCP_ESTABLISHED, "tcp-established") \
- _(4, TCP_FIN_WAIT, "tcp-fin-wait") \
- _(5, TCP_CLOSE_WAIT, "tcp-close-wait") \
- _(6, TCP_CLOSING, "tcp-closing") \
- _(7, TCP_LAST_ACK, "tcp-last-ack") \
- _(8, TCP_CLOSED, "tcp-closed") \
- _(9, ICMP_ACTIVE, "icmp-active")
-
-typedef enum
-{
-#define _(v, N, s) SNAT_SESSION_##N = v,
- foreach_snat_session_state
-#undef _
-} snat_session_state_t;
-
#define foreach_nat_in2out_ed_error \
_ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \
_ (OUT_OF_PORTS, "out of ports") \
_ (BAD_ICMP_TYPE, "unsupported ICMP type") \
_ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \
_ (NON_SYN, "non-SYN packet try to create session") \
- _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \
_ (TRNSL_FAILED, "couldn't translate packet")
typedef enum
@@ -161,15 +140,43 @@ typedef enum
NAT_OUT2IN_ED_N_ERROR,
} nat_out2in_ed_error_t;
+typedef enum
+{
+ NAT44_ED_TCP_FLAG_NONE = 0,
+ NAT44_ED_TCP_FLAG_FIN,
+ NAT44_ED_TCP_FLAG_SYN,
+ NAT44_ED_TCP_FLAG_SYNFIN,
+ NAT44_ED_TCP_FLAG_RST,
+ NAT44_ED_TCP_FLAG_FINRST,
+ NAT44_ED_TCP_FLAG_SYNRST,
+ NAT44_ED_TCP_FLAG_SYNFINRST,
+ NAT44_ED_TCP_N_FLAG,
+} nat44_ed_tcp_flag_e;
+
+typedef enum
+{
+ NAT44_ED_DIR_I2O = 0,
+ NAT44_ED_DIR_O2I,
+ NAT44_ED_N_DIR,
+} nat44_ed_dir_e;
/* Endpoint dependent TCP session state */
-#define NAT44_SES_I2O_FIN 1
-#define NAT44_SES_O2I_FIN 2
-#define NAT44_SES_I2O_FIN_ACK 4
-#define NAT44_SES_O2I_FIN_ACK 8
-#define NAT44_SES_I2O_SYN 16
-#define NAT44_SES_O2I_SYN 32
-#define NAT44_SES_RST 64
+typedef enum
+{
+ NAT44_ED_TCP_STATE_CLOSED = 0,
+ NAT44_ED_TCP_STATE_SYN_I2O,
+ NAT44_ED_TCP_STATE_SYN_O2I,
+ NAT44_ED_TCP_STATE_ESTABLISHED,
+ NAT44_ED_TCP_STATE_FIN_I2O,
+ NAT44_ED_TCP_STATE_FIN_O2I,
+ NAT44_ED_TCP_STATE_RST_TRANS,
+ NAT44_ED_TCP_STATE_FIN_TRANS,
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O,
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I,
+ NAT44_ED_TCP_N_STATE,
+} nat44_ed_tcp_state_e;
+
+format_function_t format_nat44_ed_tcp_state;
/* Session flags */
#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0)
@@ -341,10 +348,7 @@ typedef CLIB_PACKED(struct
u16 ext_host_nat_port;
/* TCP session state */
- u8 state;
- u32 i2o_fin_seq;
- u32 o2i_fin_seq;
- u64 tcp_closed_timestamp;
+ nat44_ed_tcp_state_e tcp_state;
/* per vrf sessions index */
u32 per_vrf_sessions_index;
@@ -668,6 +672,16 @@ typedef struct snat_main_s
vnet_main_t *vnet_main;
+ /* TCP session state machine table:
+ * first dimension is possible states
+ * second dimension is direction (in2out/out2in)
+ * third dimension is TCP flag (SYN, RST, FIN)
+ *
+ * value is next state to change to
+ */
+ nat44_ed_tcp_state_e tcp_state_change_table[NAT44_ED_TCP_N_STATE]
+ [NAT44_ED_N_DIR]
+ [NAT44_ED_TCP_N_FLAG];
} snat_main_t;
typedef struct
@@ -789,16 +803,6 @@ nat44_ed_is_interface_outside (snat_interface_t *i)
return i->flags & NAT_INTERFACE_FLAG_IS_OUTSIDE;
}
-/** \brief Check if NAT44 endpoint-dependent TCP session is closed.
- @param s NAT session
- @return true if session is closed
-*/
-always_inline bool
-nat44_is_ses_closed (snat_session_t *s)
-{
- return s->state == 0xf;
-}
-
/** \brief Check if client initiating TCP connection (received SYN from client)
@param t TCP header
@return true if client initiating TCP connection
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_api.c b/src/plugins/nat/nat44-ed/nat44_ed_api.c
index 4664fabfec5..6ab3aaa35d9 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_api.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_api.c
@@ -31,6 +31,8 @@
#include <nat/nat44-ed/nat44_ed.api_enum.h>
#include <nat/nat44-ed/nat44_ed.api_types.h>
+#include <nat/nat44-ed/nat44_ed_inlines.h>
+
#define REPLY_MSG_ID_BASE sm->msg_id_base
#include <vlibapi/api_helper_macros.h>
@@ -1806,26 +1808,7 @@ send_nat44_user_session_v2_details (snat_session_t *s,
rmp->ext_host_nat_port = s->ext_host_nat_port;
}
- sess_timeout_time = s->last_heard;
- switch (s->proto)
- {
- case IP_PROTOCOL_TCP:
- if (s->state)
- sess_timeout_time += sm->timeouts.tcp.established;
- else
- sess_timeout_time += sm->timeouts.tcp.transitory;
- break;
- case IP_PROTOCOL_UDP:
- sess_timeout_time += sm->timeouts.udp;
- break;
- case IP_PROTOCOL_ICMP:
- sess_timeout_time += sm->timeouts.icmp;
- break;
- default:
- sess_timeout_time += sm->timeouts.udp;
- break;
- }
-
+ sess_timeout_time = s->last_heard + nat44_session_get_timeout (sm, s);
rmp->is_timed_out = (now >= sess_timeout_time);
vl_api_send_msg (reg, (u8 *) rmp);
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_cli.c b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
index cfd36278674..7693063b8d8 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_cli.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_cli.c
@@ -478,16 +478,12 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
u64 now = vlib_time_now (vm);
u64 sess_timeout_time = 0;
- u32 udp_sessions = 0;
- u32 tcp_sessions = 0;
- u32 icmp_sessions = 0;
- u32 other_sessions = 0;
-
- u32 timed_out = 0;
- u32 transitory = 0;
- u32 transitory_wait_closed = 0;
- u32 transitory_closed = 0;
- u32 established = 0;
+ struct
+ {
+ u32 total;
+ u32 timed_out;
+ } udp = { 0 }, tcp = { 0 }, tcp_established = { 0 }, tcp_transitory = { 0 },
+ icmp = { 0 }, other = { 0 };
u32 fib;
@@ -501,43 +497,44 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
pool_foreach (s, tsm->sessions)
{
- sess_timeout_time = s->last_heard +
- (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
-
- switch (s->proto)
- {
- case IP_PROTOCOL_ICMP:
- icmp_sessions++;
- break;
- case IP_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
- {
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
- }
- else
- established++;
- break;
- case IP_PROTOCOL_UDP:
- udp_sessions++;
- break;
- default:
- ++other_sessions;
- break;
- }
+ sess_timeout_time =
+ s->last_heard + (f64) nat44_session_get_timeout (sm, s);
+
+ switch (s->proto)
+ {
+ case IP_PROTOCOL_ICMP:
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
+ break;
+ case IP_PROTOCOL_TCP:
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
+ {
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
+ }
+ else
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
+ break;
+ case IP_PROTOCOL_UDP:
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
+ break;
+ default:
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
+ break;
+ }
}
nat44_show_lru_summary (vm, tsm, now, sess_timeout_time);
count += pool_elts (tsm->sessions);
@@ -550,39 +547,40 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
{
sess_timeout_time = s->last_heard +
(f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time)
- timed_out++;
switch (s->proto)
{
case IP_PROTOCOL_ICMP:
- icmp_sessions++;
+ ++icmp.total;
+ if (now >= sess_timeout_time)
+ ++icmp.timed_out;
break;
case IP_PROTOCOL_TCP:
- tcp_sessions++;
- if (s->state)
+ ++tcp.total;
+ if (now >= sess_timeout_time)
+ ++tcp.timed_out;
+ if (nat44_ed_tcp_is_established (s->tcp_state))
{
- if (s->tcp_closed_timestamp)
- {
- if (now >= s->tcp_closed_timestamp)
- {
- ++transitory_closed;
- }
- else
- {
- ++transitory_wait_closed;
- }
- }
- transitory++;
+ ++tcp_established.total;
+ if (now >= sess_timeout_time)
+ ++tcp_established.timed_out;
}
else
- established++;
+ {
+ ++tcp_transitory.total;
+ if (now >= sess_timeout_time)
+ ++tcp_transitory.timed_out;
+ }
break;
case IP_PROTOCOL_UDP:
- udp_sessions++;
+ ++udp.total;
+ if (now >= sess_timeout_time)
+ ++udp.timed_out;
break;
default:
- ++other_sessions;
+ ++other.total;
+ if (now >= sess_timeout_time)
+ ++other.timed_out;
break;
}
}
@@ -590,18 +588,25 @@ nat44_show_summary_command_fn (vlib_main_t * vm, unformat_input_t * input,
count = pool_elts (tsm->sessions);
}
- vlib_cli_output (vm, "total timed out sessions: %u", timed_out);
- vlib_cli_output (vm, "total sessions: %u", count);
- vlib_cli_output (vm, "total tcp sessions: %u", tcp_sessions);
- vlib_cli_output (vm, "total tcp established sessions: %u", established);
- vlib_cli_output (vm, "total tcp transitory sessions: %u", transitory);
- vlib_cli_output (vm, "total tcp transitory (WAIT-CLOSED) sessions: %u",
- transitory_wait_closed);
- vlib_cli_output (vm, "total tcp transitory (CLOSED) sessions: %u",
- transitory_closed);
- vlib_cli_output (vm, "total udp sessions: %u", udp_sessions);
- vlib_cli_output (vm, "total icmp sessions: %u", icmp_sessions);
- vlib_cli_output (vm, "total other sessions: %u", other_sessions);
+ u32 timed_out =
+ tcp.timed_out + icmp.timed_out + udp.timed_out + other.timed_out;
+ vlib_cli_output (vm, "total sessions: %u (timed out: %u)", count, timed_out);
+ vlib_cli_output (vm, "tcp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", tcp.total,
+ tcp.timed_out);
+ vlib_cli_output (vm, " established: %u (timed out: %u)",
+ tcp_established.total, tcp_established.timed_out);
+ vlib_cli_output (vm, " transitory: %u (timed out: %u)",
+ tcp_transitory.total, tcp_transitory.timed_out);
+ vlib_cli_output (vm, "udp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", udp.total,
+ udp.timed_out);
+ vlib_cli_output (vm, "icmp sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", icmp.total,
+ icmp.timed_out);
+ vlib_cli_output (vm, "other sessions:");
+ vlib_cli_output (vm, " total: %u (timed out: %u)", other.total,
+ other.timed_out);
return 0;
}
@@ -1456,7 +1461,8 @@ print:
continue;
showed_sessions++;
}
- vlib_cli_output (vm, " %U\n", format_snat_session, tsm, s);
+ vlib_cli_output (vm, " %U\n", format_snat_session, sm, tsm, s,
+ vlib_time_now (vm));
}
if (filtering)
{
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_format.c b/src/plugins/nat/nat44-ed/nat44_ed_format.c
index a6f63026897..4598c02208f 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_format.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_format.c
@@ -40,29 +40,13 @@ format_nat_addr_and_port_alloc_alg (u8 * s, va_list * args)
}
u8 *
-format_snat_session_state (u8 * s, va_list * args)
-{
- u32 i = va_arg (*args, u32);
- u8 *t = 0;
-
- switch (i)
- {
-#define _(v, N, str) case SNAT_SESSION_##N: t = (u8 *) str; break;
- foreach_snat_session_state
-#undef _
- default:
- t = format (t, "unknown");
- }
- s = format (s, "%s", t);
- return s;
-}
-
-u8 *
format_snat_session (u8 * s, va_list * args)
{
+ snat_main_t *sm = va_arg (*args, snat_main_t *);
snat_main_per_thread_data_t *tsm =
va_arg (*args, snat_main_per_thread_data_t *);
snat_session_t *sess = va_arg (*args, snat_session_t *);
+ f64 now = va_arg (*args, f64);
if (nat44_ed_is_unk_proto (sess->proto))
{
@@ -103,8 +87,10 @@ format_snat_session (u8 * s, va_list * args)
s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i);
s = format (s, " index %llu\n", sess - tsm->sessions);
s = format (s, " last heard %.2f\n", sess->last_heard);
- s = format (s, " total pkts %d, total bytes %lld\n",
- sess->total_pkts, sess->total_bytes);
+ s = format (s, " timeout in %.2f\n",
+ nat44_session_get_timeout (sm, sess) - (now - sess->last_heard));
+ s = format (s, " total pkts %d, total bytes %lld\n", sess->total_pkts,
+ sess->total_bytes);
if (nat44_ed_is_session_static (sess))
s = format (s, " static translation\n");
else
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
index 23e0957dabe..99db6010783 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_in2out.c
@@ -49,6 +49,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
} nat_in2out_ed_trace_t;
static u8 *
@@ -78,7 +79,7 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
@@ -86,6 +87,11 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args)
&t->search_key);
}
}
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
+ {
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
+ }
return s;
}
@@ -675,7 +681,9 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip,
{
if (ip->protocol == IP_PROTOCOL_TCP)
{
- nat44_set_tcp_session_state_i2o (sm, now, s, b, thread_index);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
nat44_session_update_counters (s, now,
@@ -697,7 +705,7 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
ip4_header_t *ip, u16 src_port,
u16 dst_port, u32 thread_index,
u32 rx_sw_if_index, u32 tx_sw_if_index,
- f64 now, int is_multi_worker)
+ int is_multi_worker)
{
clib_bihash_kv_16_8_t kv, value;
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
@@ -715,12 +723,6 @@ nat44_ed_not_translate_output_feature (snat_main_t *sm, vlib_buffer_t *b,
s =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value));
- if (nat44_is_ses_closed (s)
- && (!s->tcp_closed_timestamp || now >= s->tcp_closed_timestamp))
- {
- nat44_ed_free_session_data (sm, s, thread_index, 0);
- nat_ed_session_delete (sm, s, thread_index, 1);
- }
return 1;
}
@@ -800,7 +802,7 @@ icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip,
{
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b, ip, lookup_sport, lookup_dport, thread_index, sw_if_index,
- tx_sw_if_index, now, is_multi_worker)))
+ tx_sw_if_index, is_multi_worker)))
{
return next;
}
@@ -1173,22 +1175,6 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
goto trace0;
}
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- next[0] = def_slow;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_IN2OUT_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
- goto trace0;
- }
-
// drop if session expired
u64 sess_timeout_time;
sess_timeout_time =
@@ -1239,7 +1225,9 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
case IP_PROTOCOL_TCP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.tcp,
thread_index, cntr_sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
break;
case IP_PROTOCOL_UDP:
vlib_increment_simple_counter (&sm->counters.fastpath.in2out.udp,
@@ -1282,6 +1270,7 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1426,13 +1415,6 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat44_ed_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1442,7 +1424,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
if (PREDICT_FALSE (nat44_ed_not_translate_output_feature (
sm, b0, ip0, vnet_buffer (b0)->ip.reass.l4_src_port,
vnet_buffer (b0)->ip.reass.l4_dst_port, thread_index,
- rx_sw_if_index0, tx_sw_if_index0, now, is_multi_worker)))
+ rx_sw_if_index0, tx_sw_if_index0, is_multi_worker)))
goto trace0;
/*
@@ -1496,7 +1478,9 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
{
vlib_increment_simple_counter (&sm->counters.slowpath.in2out.tcp,
thread_index, cntr_sw_if_index0, 1);
- nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index);
+ nat44_set_tcp_session_state_i2o (
+ sm, now, s0, vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
else
{
@@ -1529,6 +1513,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t *vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = 1;
+ t->tcp_state = s0->tcp_state;
}
else
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
index cb418960249..a13f250cd3b 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
+++ b/src/plugins/nat/nat44-ed/nat44_ed_inlines.h
@@ -24,6 +24,7 @@
#include <vnet/fib/ip4_fib.h>
#include <nat/lib/log.h>
+#include <nat/lib/ipfix_logging.h>
#include <nat/nat44-ed/nat44_ed.h>
always_inline void
@@ -171,6 +172,26 @@ nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0,
return 0;
}
+always_inline int
+nat44_ed_tcp_is_established (nat44_ed_tcp_state_e state)
+{
+ static int lookup[] = {
+ [NAT44_ED_TCP_STATE_CLOSED] = 0,
+ [NAT44_ED_TCP_STATE_SYN_I2O] = 0,
+ [NAT44_ED_TCP_STATE_SYN_O2I] = 0,
+ [NAT44_ED_TCP_STATE_ESTABLISHED] = 1,
+ [NAT44_ED_TCP_STATE_FIN_I2O] = 1,
+ [NAT44_ED_TCP_STATE_FIN_O2I] = 1,
+ [NAT44_ED_TCP_STATE_RST_TRANS] = 0,
+ [NAT44_ED_TCP_STATE_FIN_TRANS] = 0,
+ [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O] = 0,
+ [NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I] = 0,
+ [NAT44_ED_TCP_N_STATE] = 0,
+ };
+ ASSERT (state <= ARRAY_LEN (lookup));
+ return lookup[state];
+}
+
always_inline u32
nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
{
@@ -184,10 +205,10 @@ nat44_session_get_timeout (snat_main_t *sm, snat_session_t *s)
return sm->timeouts.udp;
case IP_PROTOCOL_TCP:
{
- if (s->state)
- return sm->timeouts.tcp.transitory;
- else
+ if (nat44_ed_tcp_is_established (s->tcp_state))
return sm->timeouts.tcp.established;
+ else
+ return sm->timeouts.tcp.transitory;
}
default:
return sm->timeouts.udp;
@@ -340,8 +361,7 @@ nat_lru_free_one_with_head (snat_main_t *sm, int thread_index, f64 now,
sess_timeout_time =
s->last_heard + (f64) nat44_session_get_timeout (sm, s);
- if (now >= sess_timeout_time ||
- (s->tcp_closed_timestamp && now >= s->tcp_closed_timestamp))
+ if (now >= sess_timeout_time)
{
nat44_ed_free_session_data (sm, s, thread_index, 0);
nat_ed_session_delete (sm, s, thread_index, 0);
@@ -701,101 +721,303 @@ is_interface_addr (snat_main_t *sm, vlib_node_runtime_t *node,
}
always_inline void
-nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
- vlib_buffer_t *b, u32 thread_index)
+nat44_ed_session_reopen (u32 thread_index, snat_session_t *s)
{
- snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- u8 tcp_flags = vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags;
- u32 tcp_ack_number = vnet_buffer (b)->ip.reass.tcp_ack_number;
- u32 tcp_seq_number = vnet_buffer (b)->ip.reass.tcp_seq_number;
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_I2O_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->i2o_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_I2O_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_O2I_FIN))
+ nat_syslog_nat44_sdel (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto,
+ nat44_ed_is_twice_nat_session (s));
+
+ nat_ipfix_logging_nat44_ses_delete (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+ nat_ipfix_logging_nat44_ses_create (
+ thread_index, s->in2out.addr.as_u32, s->out2in.addr.as_u32, s->proto,
+ s->in2out.port, s->out2in.port, s->in2out.fib_index);
+
+ nat_syslog_nat44_sadd (0, s->in2out.fib_index, &s->in2out.addr,
+ s->in2out.port, &s->ext_host_nat_addr,
+ s->ext_host_nat_port, &s->out2in.addr, s->out2in.port,
+ &s->ext_host_addr, s->ext_host_port, s->proto, 0);
+ s->total_pkts = 0;
+ s->total_bytes = 0;
+}
+
+always_inline void
+nat44_ed_init_tcp_state_stable (snat_main_t *sm)
+{
+ /* first make sure whole table is initialised in a way where state
+ * is not changed, then define special cases */
+ nat44_ed_tcp_state_e s;
+ for (s = 0; s < NAT44_ED_TCP_N_STATE; ++s)
{
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->o2i_fin_seq)
+ int i;
+ for (i = 0; i < NAT44_ED_N_DIR; ++i)
{
- ses->state |= NAT44_SES_O2I_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
+ int j = 0;
+ for (j = 0; j < NAT44_ED_TCP_N_FLAG; ++j)
+ {
+ sm->tcp_state_change_table[s][i][j] = s;
}
}
}
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
- clib_dlist_remove (tsm->lru_pool, ses->lru_index);
- clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+ /* CLOSED and any kind of SYN -> HALF-OPEN */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_SYN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_SYN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_CLOSED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_SYN_O2I;
+
+ /* HALF-OPEN and any kind of SYN in right direction -> ESTABLISHED */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_SYN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+
+ /* ESTABLISHED and any kind of RST -> RST_TRANS */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_RST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_RST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_FINRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_FINRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_RST_TRANS;
+
+ /* ESTABLISHED and any kind of FIN without RST -> HALF-CLOSED */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_FIN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_FIN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_ESTABLISHED][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_O2I;
+
+ /* HALF-CLOSED and any kind of FIN -> FIN_TRANS */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_FINRST] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_FINRST] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_I2O][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_O2I][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_FIN_TRANS;
+
+ /* RST_TRANS and anything non-RST -> ESTABLISHED */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_NONE] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_NONE] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_FIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_RST_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+
+ /* FIN_TRANS and any kind of SYN -> HALF-REOPEN */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_I2O]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_TRANS][NAT44_ED_DIR_O2I]
+ [NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I;
+
+ /* HALF-REOPEN and any kind of SYN in right direction -> ESTABLISHED */
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
+ [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
+ [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
+ [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
+ [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
+ [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
+ [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFIN] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O]
+ [NAT44_ED_DIR_O2I][NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
+ sm->tcp_state_change_table[NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I]
+ [NAT44_ED_DIR_I2O][NAT44_ED_TCP_FLAG_SYNFINRST] =
+ NAT44_ED_TCP_STATE_ESTABLISHED;
}
+/* TCP state tracking according to RFC 7857 (and RFC 6146, which is referenced
+ * by RFC 7857). Our implementation also goes beyond by supporting creation of
+ * a new session while old session is in transitory timeout after seeing FIN
+ * packets from both sides. */
always_inline void
-nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
- u8 tcp_flags, u32 tcp_ack_number,
- u32 tcp_seq_number, u32 thread_index)
+nat44_set_tcp_session_state (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index,
+ nat44_ed_dir_e dir)
{
snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index];
- if ((ses->state == 0) && (tcp_flags & TCP_FLAG_RST))
- ses->state = NAT44_SES_RST;
- if ((ses->state == NAT44_SES_RST) && !(tcp_flags & TCP_FLAG_RST))
- ses->state = 0;
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_SYN) &&
- (ses->state & NAT44_SES_O2I_SYN))
- ses->state = 0;
- if (tcp_flags & TCP_FLAG_SYN)
- ses->state |= NAT44_SES_O2I_SYN;
- if (tcp_flags & TCP_FLAG_FIN)
- {
- ses->o2i_fin_seq = clib_net_to_host_u32 (tcp_seq_number);
- ses->state |= NAT44_SES_O2I_FIN;
- }
- if ((tcp_flags & TCP_FLAG_ACK) && (ses->state & NAT44_SES_I2O_FIN))
+ nat44_ed_tcp_flag_e flags =
+ tcp_flags & (TCP_FLAG_SYN | TCP_FLAG_FIN | TCP_FLAG_RST);
+
+ u8 old_state = ses->tcp_state;
+ ses->tcp_state = sm->tcp_state_change_table[ses->tcp_state][dir][flags];
+
+ if (old_state != ses->tcp_state)
{
- if (clib_net_to_host_u32 (tcp_ack_number) > ses->i2o_fin_seq)
- ses->state |= NAT44_SES_I2O_FIN_ACK;
- if (nat44_is_ses_closed (ses))
- { // if session is now closed, save the timestamp
- ses->tcp_closed_timestamp = now + sm->timeouts.tcp.transitory;
- ses->last_lru_update = now;
+ if (nat44_ed_tcp_is_established (ses->tcp_state))
+ {
+ if (NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O == old_state ||
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I == old_state)
+ {
+ nat44_ed_session_reopen (thread_index, ses);
+ }
+ ses->lru_head_index = tsm->tcp_estab_lru_head_index;
}
+ else
+ {
+ if (NAT44_ED_TCP_STATE_ESTABLISHED == old_state)
+ { // need to update last heard otherwise session might get
+ // immediately timed out if it has been idle longer than
+ // transitory timeout
+ ses->last_heard = now;
+ }
+ ses->lru_head_index = tsm->tcp_trans_lru_head_index;
+ }
+ ses->last_lru_update = now;
+ clib_dlist_remove (tsm->lru_pool, ses->lru_index);
+ clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
}
- // move the session to proper LRU
- if (ses->state)
- {
- ses->lru_head_index = tsm->tcp_trans_lru_head_index;
- }
- else
- {
- ses->lru_head_index = tsm->tcp_estab_lru_head_index;
- }
- clib_dlist_remove (tsm->lru_pool, ses->lru_index);
- clib_dlist_addtail (tsm->lru_pool, ses->lru_head_index, ses->lru_index);
+}
+
+always_inline void
+nat44_set_tcp_session_state_i2o (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index)
+{
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_I2O);
+}
+
+always_inline void
+nat44_set_tcp_session_state_o2i (snat_main_t *sm, f64 now, snat_session_t *ses,
+ u8 tcp_flags, u32 thread_index)
+{
+ return nat44_set_tcp_session_state (sm, now, ses, tcp_flags, thread_index,
+ NAT44_ED_DIR_O2I);
}
always_inline void
nat44_session_update_counters (snat_session_t *s, f64 now, uword bytes,
u32 thread_index)
{
- s->last_heard = now;
+ if (NAT44_ED_TCP_STATE_RST_TRANS != s->tcp_state &&
+ NAT44_ED_TCP_STATE_FIN_TRANS != s->tcp_state &&
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_I2O != s->tcp_state &&
+ NAT44_ED_TCP_STATE_FIN_REOPEN_SYN_O2I != s->tcp_state)
+ {
+ s->last_heard = now;
+ }
s->total_pkts++;
s->total_bytes += bytes;
}
diff --git a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
index 5ad57a17098..40a72122140 100644
--- a/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
+++ b/src/plugins/nat/nat44-ed/nat44_ed_out2in.c
@@ -41,7 +41,6 @@ typedef enum
NAT_ED_SP_REASON_NO_REASON,
NAT_ED_SP_REASON_LOOKUP_FAILED,
NAT_ED_SP_REASON_VRF_EXPIRED,
- NAT_ED_SP_TCP_CLOSED,
NAT_ED_SP_SESS_EXPIRED,
} nat_slow_path_reason_e;
@@ -57,6 +56,7 @@ typedef struct
u8 is_slow_path;
u8 translation_via_i2of;
u8 lookup_skipped;
+ u8 tcp_state;
nat_slow_path_reason_e slow_path_reason;
} nat44_ed_out2in_trace_t;
@@ -72,8 +72,6 @@ format_slow_path_reason (u8 *s, va_list *args)
return format (s, "slow path because lookup failed");
case NAT_ED_SP_REASON_VRF_EXPIRED:
return format (s, "slow path because vrf expired");
- case NAT_ED_SP_TCP_CLOSED:
- return format (s, "slow path because tcp closed");
case NAT_ED_SP_SESS_EXPIRED:
return format (s, "slow path because session expired");
}
@@ -107,14 +105,19 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args)
{
if (t->lookup_skipped)
{
- s = format (s, "\n lookup skipped - cached session index used");
+ s = format (s, "\n lookup skipped - cached session index used");
}
else
{
s = format (s, "\n search key %U", format_ed_session_kvp,
&t->search_key);
}
- s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ s = format (s, "\n %U", format_slow_path_reason, t->slow_path_reason);
+ }
+ if (IP_PROTOCOL_TCP == t->i2of.match.proto)
+ {
+ s = format (s, "\n TCP state: %U", format_nat44_ed_tcp_state,
+ t->tcp_state);
}
return s;
@@ -645,10 +648,9 @@ create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s,
if (ip->protocol == IP_PROTOCOL_TCP)
{
- tcp_header_t *tcp = ip4_next_header (ip);
- nat44_set_tcp_session_state_o2i (sm, now, s, tcp->flags,
- tcp->ack_number, tcp->seq_number,
- thread_index);
+ nat44_set_tcp_session_state_o2i (
+ sm, now, s, vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags,
+ thread_index);
}
/* Accounting */
@@ -883,23 +885,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
goto trace0;
}
- if (s0->tcp_closed_timestamp)
- {
- if (now >= s0->tcp_closed_timestamp)
- {
- // session is closed, go slow path, freed in slow path
- slow_path_reason = NAT_ED_SP_TCP_CLOSED;
- next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH;
- }
- else
- {
- // session in transitory timeout, drop
- b0->error = node->errors[NAT_OUT2IN_ED_ERROR_TCP_CLOSED];
- next[0] = NAT_NEXT_DROP;
- }
- goto trace0;
- }
-
// drop if session expired
u64 sess_timeout_time;
sess_timeout_time =
@@ -981,10 +966,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
break;
case IP_PROTOCOL_UDP:
@@ -1028,6 +1009,7 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm,
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
t->translation_via_i2of = (&s0->i2o == f);
+ t->tcp_state = s0->tcp_state;
}
else
{
@@ -1170,13 +1152,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
s0 =
pool_elt_at_index (tsm->sessions,
ed_value_get_session_index (&value0));
-
- if (s0->tcp_closed_timestamp && now >= s0->tcp_closed_timestamp)
- {
- nat44_ed_free_session_data (sm, s0, thread_index, 0);
- nat_ed_session_delete (sm, s0, thread_index, 1);
- s0 = NULL;
- }
}
if (!s0)
@@ -1264,10 +1239,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
nat44_set_tcp_session_state_o2i (sm, now, s0,
vnet_buffer (b0)->ip.
reass.icmp_type_or_tcp_flags,
- vnet_buffer (b0)->ip.
- reass.tcp_ack_number,
- vnet_buffer (b0)->ip.
- reass.tcp_seq_number,
thread_index);
}
else
@@ -1300,6 +1271,7 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm,
t->session_index = s0 - tsm->sessions;
clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of));
clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if));
+ t->tcp_state = s0->tcp_state;
}
else
{
diff --git a/src/plugins/nat/nat44-ed/tcp_conn_track.rst b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
new file mode 100644
index 00000000000..faf0dec8b06
--- /dev/null
+++ b/src/plugins/nat/nat44-ed/tcp_conn_track.rst
@@ -0,0 +1,65 @@
+NAT44ED TCP connection tracking
+===============================
+
+TCP connection tracking in endpoint-dependent NAT is based on RFC 7857
+and RFC 6146, which RFC 7857 references.
+
+See RFC 7857 for the original graph - our graph is slightly different,
+allowing creation of new session, while an old session is in transitory
+timeout after seeing FIN packets from both sides:
+
+After discussion on vpp-dev and with Andrew Yourtschenko we agreed that
+it's friendly behaviour to allow creating a new session while the old
+one is closed and in transitory timeout. The alternative means VPP is
+insisting that a 5-tuple connection cannot be created while an old one
+is finished and timing out. There is no apparent reason why our change
+would break anything and we agreed that it could only help users.
+
+::
+
+
+ +------------transitory timeout----------------+
+ | |
+ | +-------------+ |
+ | session created---->+ CLOSED | |
+ | +-------------+ |
+ | | | |
++-----+ | SYN SYN |
+| v v IN2OUT OUT2IN |
+| +->session removed | | |
+| | ^ ^ ^ ^ ^ v v |
+| | | | | | | +-------+ +-------+ |
+| | | | | | +----transitory timeout---+SYN_I2O| |SYN_O2I+--+
+| | | | | | +---------+ |-------| |-------|
+| | | | | +-transitory---+RST_TRANS| | |
+| | | | | timeout +---------+ SYN SYN
+| | | | | | ^ OUT2IN IN2OUT
+| | | | | | | | |
+| | | | | | | v v
+| | | | | | | +-----------+
+| | | | | | +--RST----+ESTABLISHED+<-SYN IN2OUT-+
+| | | | | | +-----------+ |
+| | | | | +---data pkt-----^ | | | ^ |
+| | | | | | | | | |
+| | | | +----established timeout---------------+ | | | |
+| | | | | | | |
+| | | | +-----FIN IN2OUT---------+ | | |
+| | | | v | | |
+| | | | +-------+ +--FIN OUT2IN----+ | |
+| | | +--established---+FIN_I2O| | | |
+| | | timeout +-------+ v +-SYN OUT2IN-+ |
+| | | | +-------+ | |
+| | +----established-------------+FIN_O2I| +--------------+ |
+| | timeout | +-------+ |REOPEN_SYN_I2O| +--------------+
+| | | | +--------------+ |REOPEN_SYN_O2I|
+| | FIN FIN ^ | +--------------+
+| | OUT2IN IN2OUT | | ^ |
+| | | | | | | |
+| | v v | | | |
+| | +-------------+ | | | |
+| +--transitory timeout---+ FIN_TRANS +-SYN IN2OUT-+ | | |
+| +-------------+ | | |
+| | | | |
+| +--------SYN OUT2IN----|-----------+ |
+| v |
++------------------transitory timeout-------------------+<-------------+