diff options
author | Klement Sekera <ksekera@cisco.com> | 2020-12-15 18:47:05 +0100 |
---|---|---|
committer | Ole Tr�an <otroan@employees.org> | 2021-01-18 08:36:26 +0000 |
commit | 4881cb4c6f0d9c6276eb7a45ed355f9fc3d729b3 (patch) | |
tree | 07959eb6fc99b88b30e6f81f4620d8d6c70110e2 /src | |
parent | 4a58e49cfe03150034a65e147a2ffe8d24391b86 (diff) |
nat: deal with flows instead of sessions
This change introduces flow concept to endpoint-dependent NAT. Instead
of having a session and a plethora of special cases in code for e.g.
hairpinning, twice-nat and others, figure all this out and store it in
flow logic. Every flow has a match and a rewrite part. This unifies all
the NAT packet processing cases into one - match a flow and rewrite the
packet based on that flow. It also provides a cure for hairpinning
dilemma where one part of the flow is on one worker and another on
a different one. These cases are also sped up by not requiring
destination adress lookup every single time to be able to rewrite source
nat as this is now part of flow rewrite logic.
Type: improvement
Change-Id: Ib60c992e16792ea4d4129bc10202ebb99a73b5be
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/plugins/nat/in2out_ed.c | 1202 | ||||
-rw-r--r-- | src/plugins/nat/nat.c | 602 | ||||
-rw-r--r-- | src/plugins/nat/nat.h | 178 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_ha.h | 2 | ||||
-rw-r--r-- | src/plugins/nat/nat44-ei/nat44_ei_in2out.c | 8 | ||||
-rw-r--r-- | src/plugins/nat/nat44/ed_inlines.h | 116 | ||||
-rw-r--r-- | src/plugins/nat/nat44_classify.c | 31 | ||||
-rw-r--r-- | src/plugins/nat/nat44_cli.c | 4 | ||||
-rw-r--r-- | src/plugins/nat/nat44_hairpinning.c | 261 | ||||
-rw-r--r-- | src/plugins/nat/nat44_handoff.c | 13 | ||||
-rw-r--r-- | src/plugins/nat/nat_format.c | 15 | ||||
-rw-r--r-- | src/plugins/nat/nat_inlines.h | 125 | ||||
-rw-r--r-- | src/plugins/nat/out2in_ed.c | 1255 | ||||
-rw-r--r-- | src/plugins/nat/test/test_nat44_ed.py | 8 | ||||
-rw-r--r-- | src/vnet/buffer.h | 8 |
15 files changed, 1865 insertions, 1963 deletions
diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index ed9ad04bdb2..9dc68576fd4 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -48,7 +48,13 @@ typedef struct u32 sw_if_index; u32 next_index; u32 session_index; - u32 is_slow_path; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; } nat_in2out_ed_trace_t; static u8 * @@ -65,144 +71,39 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); - - return s; -} - -#ifndef CLIB_MARCH_VARIANT -int -nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) -{ - snat_main_t *sm = &snat_main; - nat44_is_idle_session_ctx_t *ctx = arg; - snat_session_t *s; - u64 sess_timeout_time; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index; - clib_bihash_kv_16_8_t ed_kv; - int i; - snat_address_t *a; - snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, - ctx->thread_index); - - ASSERT (ctx->thread_index == ed_value_get_thread_index (kv)); - s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv)); - sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (ctx->now >= sess_timeout_time) + if (~0 != t->session_index) { - if (is_fwd_bypass_session (s)) - goto delete; - - l_addr = &s->out2in.addr; - r_addr = &s->ext_host_addr; - fib_index = s->out2in.fib_index; - if (snat_is_unk_proto_session (s)) + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) { - proto = s->in2out.port; - r_port = 0; - l_port = 0; + s = format (s, "\n lookup skipped - cached session index used"); } else { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->out2in.port; - r_port = s->ext_host_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)) - nat_elog_warn ("out2in_ed key del failed"); - - if (snat_is_unk_proto_session (s)) - goto delete; - - nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, - &s->in2out.addr, s->in2out.port, - &s->ext_host_nat_addr, s->ext_host_nat_port, - &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, - s->nat_proto, is_twice_nat_session (s)); - - if (is_twice_nat_session (s)) - { - for (i = 0; i < vec_len (sm->twice_nat_addresses); i++) - { - // TODO FIXME this is obviously broken - which address should be - // freed here?! - a = sm->twice_nat_addresses + i; - if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32) - { - snat_free_outside_address_and_port (sm->twice_nat_addresses, - ctx->thread_index, - &s->ext_host_nat_addr, - s->ext_host_nat_port, - s->nat_proto); - break; - } - } + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); } - - if (snat_is_session_static (s)) - goto delete; - - snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, - &s->out2in.addr, s->out2in.port, - s->nat_proto); - delete: - nat_ed_session_delete (sm, s, ctx->thread_index, 1); - return 1; } - return 0; -} -#endif - -static inline u32 -icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - u32 sw_if_index0, u32 rx_fib_index0, - vlib_node_runtime_t * node, u32 next0, f64 now, - u32 thread_index, snat_session_t ** p_s0) -{ - vlib_main_t *vm = vlib_get_main (); - - next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, thread_index, p_s0, 0); - snat_session_t *s0 = *p_s0; - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0)) - { - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain - (vm, b0), thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - } - return next0; + return s; } static int -nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, - u32 nat_proto, u32 thread_index, - ip4_address_t r_addr, u16 r_port, u8 proto, - u16 port_per_thread, u32 snat_thread_index, - snat_session_t * s, - ip4_address_t * outside_addr, - u16 * outside_port, - clib_bihash_kv_16_8_t * out2in_ed_kv) +nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto, + u32 thread_index, ip4_address_t r_addr, u16 r_port, + u8 proto, u16 port_per_thread, + u32 snat_thread_index, snat_session_t *s, + ip4_address_t *outside_addr, u16 *outside_port) { int i; snat_address_t *a, *ga = 0; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024; @@ -211,48 +112,50 @@ nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, a = sm->addresses + i; switch (nat_proto) { -#define _(N, j, n, unused) \ - case NAT_PROTOCOL_##N: \ - if (a->fib_index == rx_fib_index) \ - { \ - /* first try port suggested by caller */ \ - u16 port = clib_net_to_host_u16 (*outside_port); \ - u16 port_offset = port - port_thread_offset; \ - if (port <= port_thread_offset || \ - port > port_thread_offset + port_per_thread) \ - { \ - /* need to pick a different port, suggested port doesn't fit in \ - * this thread's port range */ \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - } \ - u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ - do \ - { \ - init_ed_kv (out2in_ed_kv, a->addr, clib_host_to_net_u16 (port), \ - r_addr, r_port, s->out2in.fib_index, proto, \ - thread_index, s - tsm->sessions); \ - int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \ - 2 /* is_add */); \ - if (0 == rv) \ - { \ - ++a->busy_##n##_port_refcounts[port]; \ - a->busy_##n##_ports_per_thread[thread_index]++; \ - a->busy_##n##_ports++; \ - *outside_addr = a->addr; \ - *outside_port = clib_host_to_net_u16 (port); \ - return 0; \ - } \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - --attempts; \ - } \ - while (attempts > 0); \ - } \ - else if (a->fib_index == ~0) \ - { \ - ga = a; \ - } \ +#define _(N, j, n, unused) \ + case NAT_PROTOCOL_##N: \ + if (a->fib_index == rx_fib_index) \ + { \ + s->o2i.match.daddr = a->addr; \ + /* first try port suggested by caller */ \ + u16 port = clib_net_to_host_u16 (*outside_port); \ + u16 port_offset = port - port_thread_offset; \ + if (port <= port_thread_offset || \ + port > port_thread_offset + port_per_thread) \ + { \ + /* need to pick a different port, suggested port doesn't fit in \ + * this thread's port range */ \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + } \ + u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ + do \ + { \ + if (NAT_PROTOCOL_ICMP == nat_proto) \ + { \ + s->o2i.match.sport = clib_host_to_net_u16 (port); \ + } \ + s->o2i.match.dport = clib_host_to_net_u16 (port); \ + if (0 == \ + nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) \ + { \ + ++a->busy_##n##_port_refcounts[port]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *outside_addr = a->addr; \ + *outside_port = clib_host_to_net_u16 (port); \ + return 0; \ + } \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + --attempts; \ + } \ + while (attempts > 0); \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ break; foreach_nat_protocol; @@ -311,28 +214,51 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) return ~0; } +static_always_inline int +nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr, + u16 match_port, nat_protocol_t match_protocol, + u32 match_fib_index, ip4_address_t *daddr, + u16 *dport) +{ + clib_bihash_kv_8_8_t kv, value; + init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + return 0; + } + + snat_static_mapping_t *m = + pool_elt_at_index (sm->static_mappings, value.value); + *daddr = m->local_addr; + if (dport) + { + /* Address only mapping doesn't change port */ + *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port; + } + return 1; +} + static u32 -slow_path_ed (snat_main_t * sm, - vlib_buffer_t * b, - ip4_address_t l_addr, - ip4_address_t r_addr, - u16 l_port, - u16 r_port, - u8 proto, - u32 rx_fib_index, - snat_session_t ** sessionp, - vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now) +slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, + ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto, + u32 rx_fib_index, snat_session_t **sessionp, + vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - clib_bihash_kv_16_8_t out2in_ed_kv; - nat44_is_idle_session_ctx_t ctx; ip4_address_t outside_addr; u16 outside_port; - u8 identity_nat; + u32 outside_fib_index; + u8 is_identity_nat; u32 nat_proto = ip_proto_to_nat_proto (proto); snat_session_t *s = NULL; lb_nat_type_t lb = 0; + ip4_address_t daddr = r_addr; + u16 dport = r_port; if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP)) { @@ -358,43 +284,81 @@ slow_path_ed (snat_main_t * sm, } } + outside_fib_index = sm->outside_fib_index; + + switch (vec_len (sm->outside_fibs)) + { + case 0: + outside_fib_index = sm->outside_fib_index; + break; + case 1: + outside_fib_index = sm->outside_fibs[0].fib_index; + break; + default: + outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr); + break; + } + ip4_address_t sm_addr; u16 sm_port; u32 sm_fib_index; /* First try to match static mapping by local address and port */ - if (snat_static_mapping_match - (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port, - &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat, 0)) + int is_sm; + if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto, + &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0, + &lb, 0, &is_identity_nat, 0)) + { + is_sm = 0; + } + else + { + is_sm = 1; + } + + if (PREDICT_FALSE (is_sm && is_identity_nat)) + { + *sessionp = NULL; + return next; + } + + s = nat_ed_session_alloc (sm, thread_index, now, proto); + ASSERT (s); + + if (!is_sm) { - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; + s->out2in.fib_index = outside_fib_index; + + // suggest using local port to allocation function + outside_port = l_port; - switch (vec_len (sm->outside_fibs)) + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + // destination addr/port updated with real values in + // nat_ed_alloc_addr_and_port + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0, + s->out2in.fib_index, proto); + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); } + else + { + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); - /* Try to create dynamic translation */ - outside_port = l_port; // suggest using local port to allocation function - if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto, - thread_index, r_addr, r_port, proto, - sm->port_per_thread, - tsm->snat_thread_index, s, - &outside_addr, - &outside_port, &out2in_ed_kv)) + if (nat_ed_alloc_addr_and_port ( + sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto, + sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr, + &outside_port)) { nat_elog_notice ("addresses exhausted"); b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS]; @@ -406,42 +370,40 @@ slow_path_ed (snat_main_t * sm, } else { - if (PREDICT_FALSE (identity_nat)) - { - *sessionp = NULL; - return next; - } - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); - s->out2in.addr = sm_addr; - s->out2in.port = sm_port; + // static mapping + s->out2in.addr = outside_addr = sm_addr; + s->out2in.port = outside_port = sm_port; s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; - switch (vec_len (sm->outside_fibs)) - { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; - } - + s->out2in.fib_index = outside_fib_index; s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port, - s->out2in.fib_index, proto, thread_index, - s - tsm->sessions); - if (clib_bihash_add_or_overwrite_stale_16_8 - (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("out2in-ed key add failed"); + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) + { + nat_elog_notice ("out2in key add failed"); + goto error; + } } if (lb) @@ -450,17 +412,26 @@ slow_path_ed (snat_main_t * sm, s->ext_host_addr = r_addr; s->ext_host_port = r_port; - clib_bihash_kv_16_8_t in2out_ed_kv; - init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index, - proto, thread_index, s - tsm->sessions); - ctx.now = now; - ctx.thread_index = thread_index; - if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv, - nat44_i2o_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("in2out-ed key add failed"); + nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port, + rx_fib_index, proto); + nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port); + nat_6t_flow_dport_rewrite_set (&s->i2o, dport); + } + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); - *sessionp = s; + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out key add failed"); + goto error; + } /* log NAT event */ nat_ipfix_logging_nat44_ses_create (thread_index, @@ -479,7 +450,21 @@ slow_path_ed (snat_main_t * sm, per_vrf_sessions_register_session (s, thread_index); + *sessionp = s; return next; +error: + if (s) + { + if (!is_sm) + { + snat_free_outside_address_and_port (sm->addresses, thread_index, + &outside_addr, outside_port, + nat_proto); + } + nat_ed_session_delete (sm, s, thread_index, 1); + } + *sessionp = s = NULL; + return NAT_NEXT_DROP; } static_always_inline int @@ -494,7 +479,7 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, udp->src_port, sm->outside_fib_index, ip->protocol); /* NAT packet aimed at external address if has active sessions */ - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { /* or is static mappings */ ip4_address_t placeholder_addr; @@ -530,8 +515,15 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv)) + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) return 0; + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + 0, lookup_protocol); } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { @@ -545,12 +537,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, ip->protocol); } - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) { if (ip->protocol == IP_PROTOCOL_TCP) @@ -588,7 +581,7 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* src NAT check */ init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, tx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = @@ -606,12 +599,13 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* dst NAT check */ init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port, rx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) return 0; @@ -629,141 +623,115 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, return 0; } -#ifndef CLIB_MARCH_VARIANT -u32 -icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b, - ip4_header_t * ip, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate) +static inline u32 +icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + nat_protocol_t nat_proto, snat_session_t **s_p) { - u32 sw_if_index; - u32 rx_fib_index; - clib_bihash_kv_16_8_t kv, value; - u32 next = ~0; + vlib_main_t *vm = vlib_get_main (); + u16 checksum; int err; snat_session_t *s = NULL; - u16 l_port = 0, r_port = 0; // initialize to workaround gcc warning - vlib_main_t *vm = vlib_get_main (); - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - *dont_translate = 0; - f64 now = vlib_time_now (vm); - - sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + u8 lookup_protocol = ip->protocol; + u16 lookup_sport, lookup_dport; + ip4_address_t lookup_saddr, lookup_daddr; - err = - get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, - &r_port, &kv); + err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); if (err != 0) { b->error = node->errors[err]; - next = NAT_NEXT_DROP; - goto out; + return NAT_NEXT_DROP; } - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) { - if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) + if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( + sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, + vnet_buffer (b)->sw_if_index[VLIB_TX], now))) { - if (PREDICT_FALSE - (nat44_ed_not_translate_output_feature - (sm, ip, l_port, r_port, thread_index, - sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX], now))) - { - *dont_translate = 1; - goto out; - } + return next; } - else + } + else + { + if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip, + NAT_PROTOCOL_ICMP, + rx_fib_index, thread_index))) { - if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, - ip, NAT_PROTOCOL_ICMP, - rx_fib_index, - thread_index))) - { - *dont_translate = 1; - goto out; - } + return next; } + } - if (PREDICT_FALSE - (icmp_type_is_error_message - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } + if (PREDICT_FALSE (icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + return NAT_NEXT_DROP; + } - next = - slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port, - ip->protocol, rx_fib_index, &s, node, next, - thread_index, vlib_time_now (vm)); + next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport, + lookup_dport, ip->protocol, rx_fib_index, &s, node, + next, thread_index, vlib_time_now (vm)); - if (PREDICT_FALSE (next == NAT_NEXT_DROP)) - goto out; + if (NAT_NEXT_DROP == next) + goto out; - if (!s) - { - *dont_translate = 1; - goto out; - } - } - else + if (PREDICT_TRUE (!ip4_is_fragment (ip))) { - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request - && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && !icmp_type_is_error_message (vnet_buffer (b)->ip. - reass.icmp_type_or_tcp_flags))) + ip_csum_t sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff)) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; goto out; } - - ASSERT (thread_index == ed_value_get_thread_index (&value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&value)); } + out: - if (s) + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) { - *addr = s->out2in.addr; - *port = s->out2in.port; - *fib_index = s->out2in.fib_index; - } - if (d) - { - *(snat_session_t **) d = s; + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); } + *s_p = s; return next; } -#endif static snat_session_t * -nat44_ed_in2out_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, - ip4_header_t * ip, - u32 rx_fib_index, - u32 thread_index, - f64 now, - vlib_main_t * vm, vlib_node_runtime_t * node) +nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - u32 old_addr, new_addr = 0; - ip_csum_t sum; + snat_static_mapping_t *m = NULL; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - snat_session_t *s; + snat_session_t *s = NULL; u32 outside_fib_index = sm->outside_fib_index; int i; - u8 is_sm = 0; + ip4_address_t new_src_addr = { 0 }; + ip4_address_t new_dst_addr = ip->dst_address; + + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice ("maximum sessions exceeded"); + return 0; + } switch (vec_len (sm->outside_fibs)) { @@ -777,112 +745,105 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address); break; } - old_addr = ip->src_address.as_u32; - init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol); + init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value)) + /* Try to find static mapping first */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) { - ASSERT (thread_index == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&s_value)); - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + m = pool_elt_at_index (sm->static_mappings, value.value); + new_src_addr = m->external_addr; } else { - if (PREDICT_FALSE - (nat44_ed_maximum_sessions_exceeded - (sm, rx_fib_index, thread_index))) + pool_foreach (s, tsm->sessions) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (thread_index, - sm->max_translations_per_thread); - nat_elog_notice ("maximum sessions exceeded"); - return 0; + if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) + { + init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, + outside_fib_index, ip->protocol); + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) + { + new_src_addr = s->out2in.addr; + } + break; + } } - init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - - /* Try to find static mapping first */ - if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) - { - m = pool_elt_at_index (sm->static_mappings, value.value); - new_addr = ip->src_address.as_u32 = m->external_addr.as_u32; - is_sm = 1; - goto create_ses; - } - else + if (!new_src_addr.as_u32) { - /* *INDENT-OFF* */ - pool_foreach (s, tsm->sessions) { - if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) - { - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; - - init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) - goto create_ses; - - break; - } - } - /* *INDENT-ON* */ - for (i = 0; i < vec_len (sm->addresses); i++) { init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) { - new_addr = ip->src_address.as_u32 = - sm->addresses[i].addr.as_u32; - goto create_ses; + new_src_addr = sm->addresses[i].addr; } } - return 0; } + } - create_ses: - s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); - if (!s) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_elog_warn ("create NAT session failed"); - return 0; - } + if (!new_src_addr.as_u32) + { + // could not allocate address for translation ... + return 0; + } - s->ext_host_addr.as_u32 = ip->dst_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; - s->out2in.addr.as_u32 = new_addr; - s->out2in.fib_index = outside_fib_index; - s->in2out.addr.as_u32 = old_addr; - s->in2out.fib_index = rx_fib_index; - s->in2out.port = s->out2in.port = ip->protocol; - if (is_sm) - s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - - /* Add to lookup tables */ - init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol, thread_index, s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1)) - nat_elog_notice ("in2out key add failed"); - - init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, - outside_fib_index, ip->protocol, thread_index, - s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) - nat_elog_notice ("out2in key add failed"); - - per_vrf_sessions_register_session (s, thread_index); + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn ("create NAT session failed"); + return 0; } - /* Update IP checksum */ - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); - ip->checksum = ip_csum_fold (sum); + nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + // hairpinning? + int is_hairpinning = + nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER, + outside_fib_index, &new_dst_addr, NULL); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0, + outside_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = new_src_addr.as_u32; + s->out2in.fib_index = outside_fib_index; + s->in2out.addr.as_u32 = ip->src_address.as_u32; + s->in2out.fib_index = rx_fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + if (m) + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + per_vrf_sessions_register_session (s, thread_index); /* Accounting */ nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), @@ -890,13 +851,6 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, /* Per-user LRU list maintenance */ nat44_session_update_lru (sm, s, thread_index); - /* Hairpinning */ - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - nat44_ed_hairpinning_unknown_proto (sm, b, ip); - - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index; - return s; } @@ -924,15 +878,17 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; - udp_header_t *udp0; - tcp_header_t *tcp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; b0 = *b; b++; @@ -973,16 +929,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) - { - next[0] = def_slow; - goto trace0; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -993,27 +941,79 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = def_slow; - goto trace0; + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + else + { + lookup_protocol = ip0->protocol; + lookup_saddr = ip0->src_address; + lookup_daddr = ip0->dst_address; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; } - init_ed_k (&kv0, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, - ip0->protocol); + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) + { + s0 = pool_elt_at_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) + // for some hairpinning cases there are two "i2i" flows instead + // of i2o and o2i as both hosts are on inside + || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match ( + &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, lookup_protocol, rx_fib_index0)))) + { + /* yes, this is the droid we're looking for */ + lookup_skipped = 1; + goto skip_lookup; + } + s0 = NULL; + } + + init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address, + lookup_dport, rx_fib_index0, lookup_protocol); - // lookup for session - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { - // session does not exist go slow path + // flow does not exist go slow path next[0] = def_slow; goto trace0; } + ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); + skip_lookup: + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) { // session is closed, go slow path @@ -1054,96 +1054,58 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->i2o; + } + else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else + { + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + switch (proto0) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } + case NAT_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); - } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.udp, thread_index, sw_if_index0, 1); - } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.fastpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.icmp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter ( + &sm->counters.fastpath.in2out_ed.other, thread_index, sw_if_index0, + 1); + break; } /* Accounting */ @@ -1163,11 +1125,21 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; + t->translation_error = translation_error; + t->lookup_skipped = lookup_skipped; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1208,16 +1180,14 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; - tcp_header_t *tcp0; icmp46_header_t *icmp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + int translation_error = NAT_ED_TRNSL_ERR_SUCCESS; b0 = *b; @@ -1244,18 +1214,23 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, } udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { - s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0, - rx_fib_index0, - thread_index, now, vm, node); + s0 = nat44_ed_in2out_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next[0] = NAT_NEXT_DROP; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.other, thread_index, sw_if_index0, 1); @@ -1264,10 +1239,17 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = - icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, - rx_fib_index0, node, next[0], now, - thread_index, &s0); + next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next[0], + now, thread_index, proto0, &s0); + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.icmp, thread_index, sw_if_index0, 1); @@ -1278,7 +1260,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = @@ -1342,95 +1324,27 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + goto trace0; + } if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) + else { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.udp, thread_index, sw_if_index0, 1); } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.slowpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } - } /* Accounting */ nat44_session_update_counters (s0, now, @@ -1448,11 +1362,21 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = 1; + } + else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) diff --git a/src/plugins/nat/nat.c b/src/plugins/nat/nat.c index 81af143ab2c..245689db45d 100644 --- a/src/plugins/nat/nat.c +++ b/src/plugins/nat/nat.c @@ -37,6 +37,7 @@ #include <nat/nat44-ei/nat44_ei.h> #include <vpp/app/version.h> +#include <nat/lib/nat_inlines.h> snat_main_t snat_main; @@ -152,11 +153,6 @@ VNET_FEATURE_INIT (ip4_snat_hairpin_dst, static) = { .node_name = "nat44-hairpin-dst", .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_dst, static) = { - .arc_name = "ip4-unicast", - .node_name = "nat44-ed-hairpin-dst", - .runs_after = VNET_FEATURES ("acl-plugin-in-ip4-fa","ip4-sv-reassembly-feature"), -}; /* Hook up output features */ VNET_FEATURE_INIT (ip4_snat_in2out_output, static) = { @@ -186,12 +182,6 @@ VNET_FEATURE_INIT (ip4_nat44_ed_in2out_output, static) = { .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpin_src, static) = { - .arc_name = "ip4-output", - .node_name = "nat44-ed-hairpin-src", - .runs_after = VNET_FEATURES ("ip4-sv-reassembly-output-feature"), - .runs_before = VNET_FEATURES ("acl-plugin-out-ip4-fa"), -}; /* Hook up ip4-local features */ VNET_FEATURE_INIT (ip4_nat_hairpinning, static) = @@ -200,13 +190,6 @@ VNET_FEATURE_INIT (ip4_nat_hairpinning, static) = .node_name = "nat44-hairpinning", .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), }; -VNET_FEATURE_INIT (ip4_nat44_ed_hairpinning, static) = -{ - .arc_name = "ip4-local", - .node_name = "nat44-ed-hairpinning", - .runs_before = VNET_FEATURES("ip4-local-end-of-arc"), -}; - VLIB_PLUGIN_REGISTER () = { .version = VPP_BUILD_VER, @@ -275,13 +258,13 @@ format_ed_session_kvp (u8 * s, va_list * args) u32 fib_index; split_ed_kv (v, &l_addr, &r_addr, &proto, &fib_index, &l_port, &r_port); - s = - format (s, - "local %U:%d remote %U:%d proto %U fib %d thread-index %u session-index %u", - format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port), - format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port), - format_ip_protocol, proto, fib_index, - ed_value_get_session_index (v), ed_value_get_thread_index (v)); + s = format (s, + "local %U:%d remote %U:%d proto %U fib %d thread-index %u " + "session-index %u", + format_ip4_address, &l_addr, clib_net_to_host_u16 (l_port), + format_ip4_address, &r_addr, clib_net_to_host_u16 (r_port), + format_ip_protocol, proto, fib_index, + ed_value_get_thread_index (v), ed_value_get_session_index (v)); return s; } @@ -291,39 +274,22 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index, u8 is_ha) { clib_bihash_kv_8_8_t kv; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index = 0; - clib_bihash_kv_16_8_t ed_kv; snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, thread_index); if (is_ed_session (s)) { per_vrf_sessions_unregister_session (s, thread_index); + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn ("flow hash del failed"); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + nat_elog_warn ("flow hash del failed"); } if (is_fwd_bypass_session (s)) { - if (snat_is_unk_proto_session (s)) - { - init_ed_k (&ed_kv, s->in2out.addr, 0, s->ext_host_addr, 0, 0, - s->in2out.port); - } - else - { - l_port = s->in2out.port; - r_port = s->ext_host_port; - l_addr = &s->in2out.addr; - r_addr = &s->ext_host_addr; - proto = nat_proto_to_ip_proto (s->nat_proto); - fib_index = s->in2out.fib_index; - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, - proto); - } - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); return; } @@ -333,36 +299,6 @@ nat_free_session_data (snat_main_t * sm, snat_session_t * s, u32 thread_index, if (is_affinity_sessions (s)) nat_affinity_unlock (s->ext_host_addr, s->out2in.addr, s->nat_proto, s->out2in.port); - l_addr = &s->out2in.addr; - r_addr = &s->ext_host_addr; - fib_index = s->out2in.fib_index; - if (snat_is_unk_proto_session (s)) - { - proto = s->in2out.port; - r_port = 0; - l_port = 0; - } - else - { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->out2in.port; - r_port = s->ext_host_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)) - nat_elog_warn ("out2in_ed key del failed"); - l_addr = &s->in2out.addr; - fib_index = s->in2out.fib_index; - if (!snat_is_unk_proto_session (s)) - l_port = s->in2out.port; - if (is_twice_nat_session (s)) - { - r_addr = &s->ext_host_nat_addr; - r_port = s->ext_host_nat_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); if (!is_ha) nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, @@ -1985,18 +1921,14 @@ feature_set: sw_if_index, 1, 0, 0); if (!is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", - "nat44-ed-hairpinning", - sw_if_index, 1, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", - "nat44-hairpinning", - sw_if_index, 1, 0, 0); - } - } - else - { + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ("ip4-local", + "nat44-hairpinning", + sw_if_index, 1, 0, 0); + } + } + else + { int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 0); if (rv) return rv; @@ -2005,41 +1937,38 @@ feature_set: pool_put (sm->interfaces, i); if (is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", - "nat44-ed-hairpinning", - sw_if_index, 0, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", - "nat44-hairpinning", - sw_if_index, 0, 0, 0); - } - } - } - else - { - if ((nat_interface_is_inside(i) && is_inside) || - (nat_interface_is_outside(i) && !is_inside)) - return 0; + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ("ip4-local", + "nat44-hairpinning", + sw_if_index, 0, 0, 0); + } + } + } + else + { + if ((nat_interface_is_inside (i) && is_inside) || + (nat_interface_is_outside (i) && !is_inside)) + return 0; - if (sm->num_workers > 1) - { - del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" : - "nat44-out2in-worker-handoff"; - feature_name = "nat44-handoff-classify"; - } - else if (sm->endpoint_dependent) - { - del_feature_name = !is_inside ? "nat-pre-in2out" : - "nat-pre-out2in"; + if (sm->num_workers > 1) + { + del_feature_name = !is_inside ? "nat44-in2out-worker-handoff" : + "nat44-out2in-worker-handoff"; + feature_name = "nat44-handoff-classify"; + } + else if (sm->endpoint_dependent) + { + del_feature_name = + !is_inside ? "nat-pre-in2out" : "nat-pre-out2in"; - feature_name = "nat44-ed-classify"; - } - else - { - del_feature_name = !is_inside ? "nat44-in2out" : "nat44-out2in"; - feature_name = "nat44-classify"; - } + feature_name = "nat44-ed-classify"; + } + else + { + del_feature_name = + !is_inside ? "nat44-in2out" : "nat44-out2in"; + feature_name = "nat44-classify"; + } int rv = ip4_sv_reass_enable_disable_with_refcnt (sw_if_index, 1); if (rv) @@ -2050,17 +1979,14 @@ feature_set: sw_if_index, 1, 0, 0); if (!is_inside) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning", - sw_if_index, 0, 0, 0); - else - vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning", - sw_if_index, 0, 0, 0); - } - goto set_flags; - } + if (!sm->endpoint_dependent) + vnet_feature_enable_disable ( + "ip4-local", "nat44-hairpinning", sw_if_index, 0, 0, 0); + } + goto set_flags; + } - goto fib; + goto fib; } } /* *INDENT-ON* */ @@ -2085,10 +2011,7 @@ feature_set: if (is_inside && !sm->out2in_dpo) { - if (sm->endpoint_dependent) - vnet_feature_enable_disable ("ip4-local", "nat44-ed-hairpinning", - sw_if_index, 1, 0, 0); - else + if (!sm->endpoint_dependent) vnet_feature_enable_disable ("ip4-local", "nat44-hairpinning", sw_if_index, 1, 0, 0); } @@ -2199,10 +2122,6 @@ feature_set: !is_del); if (rv) return rv; - vnet_feature_enable_disable ("ip4-unicast", "nat44-ed-hairpin-dst", - sw_if_index, !is_del, 0, 0); - vnet_feature_enable_disable ("ip4-output", "nat44-ed-hairpin-src", - sw_if_index, !is_del, 0, 0); } else { @@ -2575,12 +2494,6 @@ nat44_set_node_indexes (snat_main_t * sm, vlib_main_t * vm) sm->hairpin_dst_node_index = node->index; node = vlib_get_node_by_name (vm, (u8 *) "nat44-hairpin-src"); sm->hairpin_src_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpinning"); - sm->ed_hairpinning_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-dst"); - sm->ed_hairpin_dst_node_index = node->index; - node = vlib_get_node_by_name (vm, (u8 *) "nat44-ed-hairpin-src"); - sm->ed_hairpin_src_node_index = node->index; } #define nat_init_simple_counter(c, n, sn) \ @@ -2778,8 +2691,8 @@ nat44_ed_plugin_enable (nat44_config_t c) if (sm->pat) { - sm->icmp_match_in2out_cb = icmp_match_in2out_ed; - sm->icmp_match_out2in_cb = icmp_match_out2in_ed; + sm->icmp_match_in2out_cb = NULL; + sm->icmp_match_out2in_cb = NULL; } else { @@ -2907,8 +2820,6 @@ nat44_ed_plugin_disable () vec_free (sm->max_translations_per_fib); - nat_affinity_disable (); - nat44_ed_db_free (); nat44_addresses_free (&sm->addresses); @@ -3258,8 +3169,8 @@ nat44_ed_get_worker_in2out_cb (ip4_header_t *ip, u32 rx_fib_index, init_ed_k (&kv16, ip->src_address, udp->src_port, ip->dst_address, udp->dst_port, fib_index, ip->protocol); - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, @@ -3327,13 +3238,13 @@ nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, init_ed_k (&kv16, ip->dst_address, udp->dst_port, ip->src_address, udp->src_port, rx_fib_index, ip->protocol); - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, ed_value_get_thread_index (&value16)); - vnet_buffer2 (b)->nat.ed_out2in_nat_session_index = + vnet_buffer2 (b)->nat.cached_session_index = ed_value_get_session_index (&value16); next_worker_index = sm->first_worker_index + tsm->thread_index; nat_elog_debug_handoff ("HANDOFF OUT2IN (session)", @@ -3347,10 +3258,17 @@ nat44_ed_get_worker_out2in_cb (vlib_buffer_t * b, ip4_header_t * ip, } else if (proto == NAT_PROTOCOL_ICMP) { - if (!get_icmp_o2i_ed_key (b, ip, rx_fib_index, ~0, ~0, 0, 0, 0, &kv16)) + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (!nat_get_icmp_session_lookup_values ( + b, ip, &lookup_saddr, &lookup_sport, &lookup_daddr, &lookup_dport, + &lookup_protocol)) { - if (PREDICT_TRUE (!clib_bihash_search_16_8 (&sm->out2in_ed, - &kv16, &value16))) + init_ed_k (&kv16, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, rx_fib_index, lookup_protocol); + if (PREDICT_TRUE ( + !clib_bihash_search_16_8 (&sm->flow_hash, &kv16, &value16))) { tsm = vec_elt_at_index (sm->per_thread_data, @@ -3558,9 +3476,6 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations, tsm->unk_proto_lru_head_index = head - tsm->lru_pool; clib_dlist_init (tsm->lru_pool, tsm->unk_proto_lru_head_index); - clib_bihash_init_16_8 (&tsm->in2out_ed, "in2out-ed", translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&tsm->in2out_ed, format_ed_session_kvp); - // TODO: ED nat is not using these // before removal large refactor required pool_alloc (tsm->list_pool, translations); @@ -3569,6 +3484,17 @@ nat44_ed_worker_db_init (snat_main_per_thread_data_t *tsm, u32 translations, } static void +reinit_ed_flow_hash () +{ + snat_main_t *sm = &snat_main; + // we expect 2 flows per session, so multiply translation_buckets by 2 + clib_bihash_init_16_8 ( + &sm->flow_hash, "ed-flow-hash", + clib_max (1, sm->num_workers) * 2 * sm->translation_buckets, 0); + clib_bihash_set_kvp_format_fn_16_8 (&sm->flow_hash, format_ed_session_kvp); +} + +static void nat44_ed_db_init (u32 translations, u32 translation_buckets, u32 user_buckets) { snat_main_t *sm = &snat_main; @@ -3576,8 +3502,7 @@ nat44_ed_db_init (u32 translations, u32 translation_buckets, u32 user_buckets) u32 static_mapping_buckets = 1024; u32 static_mapping_memory_size = 64 << 20; - clib_bihash_init_16_8 (&sm->out2in_ed, "out2in-ed", translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed, format_ed_session_kvp); + reinit_ed_flow_hash (); clib_bihash_init_8_8 (&sm->static_mapping_by_local, "static_mapping_by_local", static_mapping_buckets, @@ -3607,7 +3532,6 @@ nat44_ed_worker_db_free (snat_main_per_thread_data_t *tsm) pool_free (tsm->sessions); pool_free (tsm->lru_pool); - clib_bihash_free_16_8 (&tsm->in2out_ed); vec_free (tsm->per_vrf_sessions_vec); // TODO: resolve static mappings (put only to !ED) @@ -3623,7 +3547,7 @@ nat44_ed_db_free () snat_main_per_thread_data_t *tsm; pool_free (sm->static_mappings); - clib_bihash_free_16_8 (&sm->out2in_ed); + clib_bihash_free_16_8 (&sm->flow_hash); clib_bihash_free_8_8 (&sm->static_mapping_by_local); clib_bihash_free_8_8 (&sm->static_mapping_by_external); @@ -3642,11 +3566,7 @@ nat44_ed_sessions_clear () snat_main_t *sm = &snat_main; snat_main_per_thread_data_t *tsm; - clib_bihash_free_16_8 (&sm->out2in_ed); - clib_bihash_init_16_8 ( - &sm->out2in_ed, "out2in-ed", - clib_max (1, sm->num_workers) * sm->translation_buckets, 0); - clib_bihash_set_kvp_format_fn_16_8 (&sm->out2in_ed, format_ed_session_kvp); + reinit_ed_flow_hash (); if (sm->pat) { @@ -3896,7 +3816,6 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, u32 vrf_id, int is_in) { ip4_header_t ip; - clib_bihash_16_8_t *t; clib_bihash_kv_16_8_t kv, value; u32 fib_index = fib_table_find (FIB_PROTOCOL_IP4, vrf_id); snat_session_t *s; @@ -3913,16 +3832,15 @@ nat44_del_ed_session (snat_main_t * sm, ip4_address_t * addr, u16 port, else tsm = vec_elt_at_index (sm->per_thread_data, sm->num_workers); - t = is_in ? &tsm->in2out_ed : &sm->out2in_ed; init_ed_k (&kv, *addr, port, *eh_addr, eh_port, fib_index, proto); - if (clib_bihash_search_16_8 (t, &kv, &value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { return VNET_API_ERROR_NO_SUCH_ENTRY; } - if (pool_is_free_index (tsm->sessions, value.value)) + if (pool_is_free_index (tsm->sessions, ed_value_get_session_index (&value))) return VNET_API_ERROR_UNSPECIFIED; - s = pool_elt_at_index (tsm->sessions, value.value); + s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); nat_free_session_data (sm, s, tsm - sm->per_thread_data, 0); nat_ed_session_delete (sm, s, tsm - sm->per_thread_data, 1); return 0; @@ -3952,13 +3870,343 @@ VLIB_REGISTER_NODE (nat_default_node) = { [NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH] = "nat44-ed-in2out-output-slowpath", [NAT_NEXT_OUT2IN_ED_FAST_PATH] = "nat44-ed-out2in", [NAT_NEXT_OUT2IN_ED_SLOW_PATH] = "nat44-ed-out2in-slowpath", - [NAT_NEXT_OUT2IN_ED_HANDOFF] = "nat44-ed-out2in-handoff", [NAT_NEXT_IN2OUT_CLASSIFY] = "nat44-in2out-worker-handoff", [NAT_NEXT_OUT2IN_CLASSIFY] = "nat44-out2in-worker-handoff", }, }; /* *INDENT-ON* */ +void +nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f) +{ + f->l3_csum_delta = 0; + f->l4_csum_delta = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE && + f->rewrite.saddr.as_u32 != f->match.saddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.saddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.saddr.as_u32); + } + else + { + f->rewrite.saddr.as_u32 = f->match.saddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE && + f->rewrite.daddr.as_u32 != f->match.daddr.as_u32) + { + f->l3_csum_delta = + ip_csum_add_even (f->l3_csum_delta, f->rewrite.daddr.as_u32); + f->l3_csum_delta = + ip_csum_sub_even (f->l3_csum_delta, f->match.daddr.as_u32); + } + else + { + f->rewrite.daddr.as_u32 = f->match.daddr.as_u32; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE && f->rewrite.sport != f->match.sport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.sport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.sport); + } + else + { + f->rewrite.sport = f->match.sport; + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE && f->rewrite.dport != f->match.dport) + { + f->l4_csum_delta = ip_csum_add_even (f->l4_csum_delta, f->rewrite.dport); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.dport); + } + else + { + f->rewrite.dport = f->match.dport; + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE && + f->rewrite.icmp_id != f->match.icmp_id) + { + f->l4_csum_delta = + ip_csum_add_even (f->l4_csum_delta, f->rewrite.icmp_id); + f->l4_csum_delta = ip_csum_sub_even (f->l4_csum_delta, f->match.icmp_id); + } + else + { + f->rewrite.icmp_id = f->match.icmp_id; + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + } + else + { + f->rewrite.fib_index = f->match.fib_index; + } +} + +static_always_inline int nat_6t_flow_icmp_translate (snat_main_t *sm, + vlib_buffer_t *b, + ip4_header_t *ip, + nat_6t_flow_t *f); + +static_always_inline void +nat_6t_flow_ip4_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_icmp_inner_ip4) +{ + udp_header_t *udp = ip4_next_header (ip); + tcp_header_t *tcp = (tcp_header_t *) udp; + + if ((NAT_PROTOCOL_TCP == proto || NAT_PROTOCOL_UDP == proto) && + !vnet_buffer (b)->ip.reass.is_non_first_fragment) + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + udp->src_port = f->rewrite.sport; + udp->dst_port = f->rewrite.dport; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + udp->src_port = f->rewrite.dport; + udp->dst_port = f->rewrite.sport; + } + + if (NAT_PROTOCOL_TCP == proto) + { + ip_csum_t tcp_sum = tcp->checksum; + tcp_sum = ip_csum_sub_even (tcp_sum, f->l3_csum_delta); + tcp_sum = ip_csum_sub_even (tcp_sum, f->l4_csum_delta); + mss_clamping (sm->mss_clamping, tcp, &tcp_sum); + tcp->checksum = ip_csum_fold (tcp_sum); + } + else if (proto == NAT_PROTOCOL_UDP && udp->checksum) + { + ip_csum_t udp_sum = udp->checksum; + udp_sum = ip_csum_sub_even (udp_sum, f->l3_csum_delta); + udp_sum = ip_csum_sub_even (udp_sum, f->l4_csum_delta); + udp->checksum = ip_csum_fold (udp_sum); + } + } + else + { + if (!is_icmp_inner_ip4) + { // regular case + ip->src_address = f->rewrite.saddr; + ip->dst_address = f->rewrite.daddr; + } + else + { // icmp inner ip4 - reversed saddr/daddr + ip->src_address = f->rewrite.daddr; + ip->dst_address = f->rewrite.saddr; + } + } + + ip_csum_t ip_sum = ip->checksum; + ip_sum = ip_csum_sub_even (ip_sum, f->l3_csum_delta); + ip->checksum = ip_csum_fold (ip_sum); + ASSERT (ip->checksum == ip4_header_checksum (ip)); +} + +static_always_inline int +nat_6t_flow_icmp_translate (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, nat_6t_flow_t *f) +{ + if (IP_PROTOCOL_ICMP != ip->protocol) + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + + icmp46_header_t *icmp = ip4_next_header (ip); + icmp_echo_header_t *echo = (icmp_echo_header_t *) (icmp + 1); + + if ((!vnet_buffer (b)->ip.reass.is_non_first_fragment)) + { + if (icmp->checksum == 0) + icmp->checksum = 0xffff; + + if (!icmp_type_is_error_message (icmp->type)) + { + if ((f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) && + (f->rewrite.icmp_id != echo->identifier)) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update (sum, echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, + identifier /* changed member */); + echo->identifier = f->rewrite.icmp_id; + icmp->checksum = ip_csum_fold (sum); + } + } + else + { + // errors are not fragmented + ip4_header_t *inner_ip = (ip4_header_t *) (echo + 1); + + if (!ip4_header_checksum_is_valid (inner_ip)) + { + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + + nat_protocol_t inner_proto = + ip_proto_to_nat_proto (inner_ip->protocol); + + ip_csum_t icmp_sum = icmp->checksum; + + switch (inner_proto) + { + case NAT_PROTOCOL_UDP: + case NAT_PROTOCOL_TCP: + nat_6t_flow_ip4_translate (sm, b, inner_ip, f, inner_proto, + 1 /* is_icmp_inner_ip4 */); + icmp_sum = ip_csum_sub_even (icmp_sum, f->l3_csum_delta); + icmp->checksum = ip_csum_fold (icmp_sum); + break; + case NAT_PROTOCOL_ICMP: + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + icmp46_header_t *inner_icmp = ip4_next_header (inner_ip); + icmp_echo_header_t *inner_echo = + (icmp_echo_header_t *) (inner_icmp + 1); + if (f->rewrite.icmp_id != inner_echo->identifier) + { + ip_csum_t sum = icmp->checksum; + sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + icmp->checksum = ip_csum_fold (sum); + ip_csum_t inner_sum = inner_icmp->checksum; + inner_sum = ip_csum_update ( + sum, inner_echo->identifier, f->rewrite.icmp_id, + icmp_echo_header_t, identifier /* changed member */); + inner_icmp->checksum = ip_csum_fold (inner_sum); + inner_echo->identifier = f->rewrite.icmp_id; + } + } + break; + default: + clib_warning ("unexpected NAT protocol value `%d'", inner_proto); + return NAT_ED_TRNSL_ERR_TRANSLATION_FAILED; + } + } + } + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature) +{ + if (!is_output_feature && f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + vnet_buffer (b)->sw_if_index[VLIB_TX] = f->rewrite.fib_index; + } + + nat_6t_flow_ip4_translate (sm, b, ip, f, proto, 0 /* is_icmp_inner_ip4 */); + + if (NAT_PROTOCOL_ICMP == proto) + { + return nat_6t_flow_icmp_translate (sm, b, ip, f); + } + + return NAT_ED_TRNSL_ERR_SUCCESS; +} + +u8 * +format_nat_6t (u8 *s, va_list *args) +{ + nat_6t_t *t = va_arg (*args, nat_6t_t *); + + s = format (s, "saddr %U sport %u daddr %U dport %u proto %U fib_idx %u", + format_ip4_address, t->saddr.as_u8, + clib_net_to_host_u16 (t->sport), format_ip4_address, + t->daddr.as_u8, clib_net_to_host_u16 (t->dport), + format_ip_protocol, t->proto, t->fib_index); + return s; +} + +u8 * +format_nat_ed_translation_error (u8 *s, va_list *args) +{ + nat_translation_error_e e = va_arg (*args, nat_translation_error_e); + + switch (e) + { + case NAT_ED_TRNSL_ERR_SUCCESS: + s = format (s, "success"); + break; + case NAT_ED_TRNSL_ERR_TRANSLATION_FAILED: + s = format (s, "translation-failed"); + break; + case NAT_ED_TRNSL_ERR_FLOW_MISMATCH: + s = format (s, "flow-mismatch"); + break; + } + return s; +} + +u8 * +format_nat_6t_flow (u8 *s, va_list *args) +{ + nat_6t_flow_t *f = va_arg (*args, nat_6t_flow_t *); + + s = format (s, "match: %U ", format_nat_6t, &f->match); + int r = 0; + if (f->ops & NAT_FLOW_OP_SADDR_REWRITE) + { + s = format (s, "rewrite: saddr %U ", format_ip4_address, + f->rewrite.saddr.as_u8); + r = 1; + } + if (f->ops & NAT_FLOW_OP_SPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "sport %u ", clib_net_to_host_u16 (f->rewrite.sport)); + } + if (f->ops & NAT_FLOW_OP_DADDR_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "daddr %U ", format_ip4_address, f->rewrite.daddr.as_u8); + } + if (f->ops & NAT_FLOW_OP_DPORT_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "dport %u ", clib_net_to_host_u16 (f->rewrite.dport)); + } + if (f->ops & NAT_FLOW_OP_ICMP_ID_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "icmp-id %u ", clib_net_to_host_u16 (f->rewrite.icmp_id)); + } + if (f->ops & NAT_FLOW_OP_TXFIB_REWRITE) + { + if (!r) + { + s = format (s, "rewrite: "); + r = 1; + } + s = format (s, "txfib %u ", f->rewrite.fib_index); + } + return s; +} + /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat.h b/src/plugins/nat/nat.h index 58883d491aa..7fa1ef79c3d 100644 --- a/src/plugins/nat/nat.h +++ b/src/plugins/nat/nat.h @@ -88,7 +88,6 @@ typedef enum NAT_NEXT_IN2OUT_ED_OUTPUT_SLOW_PATH, NAT_NEXT_OUT2IN_ED_FAST_PATH, NAT_NEXT_OUT2IN_ED_SLOW_PATH, - NAT_NEXT_OUT2IN_ED_HANDOFF, NAT_NEXT_IN2OUT_CLASSIFY, NAT_NEXT_OUT2IN_CLASSIFY, NAT_N_NEXT, @@ -163,29 +162,17 @@ typedef enum NAT_IN2OUT_ED_N_ERROR, } nat_in2out_ed_error_t; -#define foreach_nat44_handoff_error \ -_(CONGESTION_DROP, "congestion drop") \ -_(SAME_WORKER, "same worker") \ -_(DO_HANDOFF, "do handoff") - -typedef enum -{ -#define _(sym,str) NAT44_HANDOFF_ERROR_##sym, - foreach_nat44_handoff_error -#undef _ - NAT44_HANDOFF_N_ERROR, -} nat44_handoff_error_t; - -#define foreach_nat_out2in_ed_error \ -_(UNSUPPORTED_PROTOCOL, "unsupported protocol") \ -_(OUT_OF_PORTS, "out of ports") \ -_(BAD_ICMP_TYPE, "unsupported ICMP type") \ -_(NO_TRANSLATION, "no translation") \ -_(MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ -_(MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \ -_(CANNOT_CREATE_USER, "cannot create NAT user") \ -_(NON_SYN, "non-SYN packet try to create session") \ -_(TCP_CLOSED, "drops due to TCP in transitory timeout") +#define foreach_nat_out2in_ed_error \ + _ (UNSUPPORTED_PROTOCOL, "unsupported protocol") \ + _ (OUT_OF_PORTS, "out of ports") \ + _ (BAD_ICMP_TYPE, "unsupported ICMP type") \ + _ (NO_TRANSLATION, "no translation") \ + _ (MAX_SESSIONS_EXCEEDED, "maximum sessions exceeded") \ + _ (MAX_USER_SESS_EXCEEDED, "max user sessions exceeded") \ + _ (CANNOT_CREATE_USER, "cannot create NAT user") \ + _ (NON_SYN, "non-SYN packet try to create session") \ + _ (TCP_CLOSED, "drops due to TCP in transitory timeout") \ + _ (HASH_ADD_FAILED, "hash table add failed") typedef enum { @@ -206,14 +193,15 @@ typedef enum #define NAT44_SES_RST 64 /* Session flags */ -#define SNAT_SESSION_FLAG_STATIC_MAPPING 1 -#define SNAT_SESSION_FLAG_UNKNOWN_PROTO 2 -#define SNAT_SESSION_FLAG_LOAD_BALANCING 4 -#define SNAT_SESSION_FLAG_TWICE_NAT 8 -#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT 16 -#define SNAT_SESSION_FLAG_FWD_BYPASS 32 -#define SNAT_SESSION_FLAG_AFFINITY 64 -#define SNAT_SESSION_FLAG_EXACT_ADDRESS 128 +#define SNAT_SESSION_FLAG_STATIC_MAPPING (1 << 0) +#define SNAT_SESSION_FLAG_UNKNOWN_PROTO (1 << 1) +#define SNAT_SESSION_FLAG_LOAD_BALANCING (1 << 2) +#define SNAT_SESSION_FLAG_TWICE_NAT (1 << 3) +#define SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT (1 << 4) +#define SNAT_SESSION_FLAG_FWD_BYPASS (1 << 5) +#define SNAT_SESSION_FLAG_AFFINITY (1 << 6) +#define SNAT_SESSION_FLAG_EXACT_ADDRESS (1 << 7) +#define SNAT_SESSION_FLAG_HAIRPINNING (1 << 8) /* NAT interface flags */ #define NAT_INTERFACE_FLAG_IS_INSIDE 1 @@ -240,6 +228,72 @@ typedef CLIB_PACKED(struct }) per_vrf_sessions_t; /* *INDENT-ON* */ +typedef struct +{ + ip4_address_t saddr, daddr; + u32 fib_index; + u16 sport, dport; + u16 icmp_id; + u8 proto; +} nat_6t_t; + +typedef struct +{ +#define NAT_FLOW_OP_SADDR_REWRITE (1 << 1) +#define NAT_FLOW_OP_SPORT_REWRITE (1 << 2) +#define NAT_FLOW_OP_DADDR_REWRITE (1 << 3) +#define NAT_FLOW_OP_DPORT_REWRITE (1 << 4) +#define NAT_FLOW_OP_ICMP_ID_REWRITE (1 << 5) +#define NAT_FLOW_OP_TXFIB_REWRITE (1 << 6) + int ops; + nat_6t_t match; + nat_6t_t rewrite; + uword l3_csum_delta; + uword l4_csum_delta; +} nat_6t_flow_t; + +always_inline void +nat_6t_flow_saddr_rewrite_set (nat_6t_flow_t *f, u32 saddr) +{ + f->ops |= NAT_FLOW_OP_SADDR_REWRITE; + f->rewrite.saddr.as_u32 = saddr; +} + +always_inline void +nat_6t_flow_daddr_rewrite_set (nat_6t_flow_t *f, u32 daddr) +{ + f->ops |= NAT_FLOW_OP_DADDR_REWRITE; + f->rewrite.daddr.as_u32 = daddr; +} + +always_inline void +nat_6t_flow_sport_rewrite_set (nat_6t_flow_t *f, u32 sport) +{ + f->ops |= NAT_FLOW_OP_SPORT_REWRITE; + f->rewrite.sport = sport; +} + +always_inline void +nat_6t_flow_dport_rewrite_set (nat_6t_flow_t *f, u32 dport) +{ + f->ops |= NAT_FLOW_OP_DPORT_REWRITE; + f->rewrite.dport = dport; +} + +always_inline void +nat_6t_flow_txfib_rewrite_set (nat_6t_flow_t *f, u32 tx_fib_index) +{ + f->ops |= NAT_FLOW_OP_TXFIB_REWRITE; + f->rewrite.fib_index = tx_fib_index; +} + +always_inline void +nat_6t_flow_icmp_id_rewrite_set (nat_6t_flow_t *f, u16 id) +{ + f->ops |= NAT_FLOW_OP_ICMP_ID_REWRITE; + f->rewrite.icmp_id = id; +} + /* *INDENT-OFF* */ typedef CLIB_PACKED(struct { @@ -261,6 +315,9 @@ typedef CLIB_PACKED(struct nat_protocol_t nat_proto; + nat_6t_flow_t i2o; + nat_6t_flow_t o2i; + /* Flags */ u32 flags; @@ -439,9 +496,6 @@ typedef struct clib_bihash_8_8_t out2in; clib_bihash_8_8_t in2out; - /* Endpoint dependent sessions lookup tables */ - clib_bihash_16_8_t in2out_ed; - /* Find-a-user => src address lookup */ clib_bihash_8_8_t user_hash; @@ -536,8 +590,8 @@ typedef struct snat_main_s /* Static mapping pool */ snat_static_mapping_t *static_mappings; - /* Endpoint-dependent out2in mappings */ - clib_bihash_16_8_t out2in_ed; + /* Endpoint dependent lookup table */ + clib_bihash_16_8_t flow_hash; /* Interface pool */ snat_interface_t *interfaces; @@ -616,9 +670,6 @@ typedef struct snat_main_s u32 hairpinning_node_index; u32 hairpin_dst_node_index; u32 hairpin_src_node_index; - u32 ed_hairpinning_node_index; - u32 ed_hairpin_dst_node_index; - u32 ed_hairpin_src_node_index; nat44_config_t rconfig; //nat44_config_t cconfig; @@ -1103,18 +1154,6 @@ u32 icmp_match_out2in_slow (snat_main_t * sm, vlib_node_runtime_t * node, nat_protocol_t * proto, void *d, void *e, u8 * dont_translate); -/* ICMP endpoint-dependent session match functions */ -u32 icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b0, - ip4_header_t * ip0, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate); -u32 icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b0, - ip4_header_t * ip0, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate); - u32 icmp_in2out (snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, icmp46_header_t * icmp0, u32 sw_if_index0, u32 rx_fib_index0, vlib_node_runtime_t * node, u32 next0, u32 thread_index, @@ -1126,22 +1165,17 @@ u32 icmp_out2in (snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, void *d, void *e); /* hairpinning functions */ -u32 snat_icmp_hairpinning (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - int is_ed); +u32 snat_icmp_hairpinning (snat_main_t *sm, vlib_buffer_t *b0, + ip4_header_t *ip0, icmp46_header_t *icmp0); + void nat_hairpinning_sm_unknown_proto (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip); -void nat44_ed_hairpinning_unknown_proto (snat_main_t * sm, vlib_buffer_t * b, - ip4_header_t * ip); -int snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, - snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, udp_header_t * udp0, - tcp_header_t * tcp0, u32 proto0, int is_ed, +int snat_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node, + snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, + udp_header_t *udp0, tcp_header_t *tcp0, u32 proto0, int do_trace); /* Call back functions for clib_bihash_add_or_overwrite_stale */ -int nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg); -int nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg); int nat44_i2o_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg); int nat44_o2i_is_idle_session_cb (clib_bihash_kv_8_8_t * kv, void *arg); @@ -1496,6 +1530,24 @@ u32 nat_calc_bihash_buckets (u32 n_elts); void nat44_addresses_free (snat_address_t **addresses); +typedef enum +{ + NAT_ED_TRNSL_ERR_SUCCESS = 0, + NAT_ED_TRNSL_ERR_TRANSLATION_FAILED = 1, + NAT_ED_TRNSL_ERR_FLOW_MISMATCH = 2, +} nat_translation_error_e; + +nat_translation_error_e +nat_6t_flow_buf_translate (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + nat_6t_flow_t *f, nat_protocol_t proto, + int is_output_feature); + +void nat_6t_l3_l4_csum_calc (nat_6t_flow_t *f); + +format_function_t format_nat_ed_translation_error; +format_function_t format_nat_6t_flow; +format_function_t format_ed_session_kvp; + #endif /* __included_nat_h__ */ /* * fd.io coding-style-patch-verification: ON diff --git a/src/plugins/nat/nat44-ei/nat44_ei_ha.h b/src/plugins/nat/nat44-ei/nat44_ei_ha.h index c466d4c9288..5639c8d0239 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_ha.h +++ b/src/plugins/nat/nat44-ei/nat44_ei_ha.h @@ -22,6 +22,7 @@ #include <vnet/vnet.h> #include <vnet/ip/ip.h> +#include <nat/nat.h> /* Call back functions for received HA events on passive/failover */ typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port, @@ -30,6 +31,7 @@ typedef void (*nat_ha_sadd_cb_t) (ip4_address_t * in_addr, u16 in_port, ip4_address_t * ehn_addr, u16 ehn_port, u8 proto, u32 fib_index, u16 flags, u32 thread_index); + typedef void (*nat_ha_sdel_cb_t) (ip4_address_t * out_addr, u16 out_port, ip4_address_t * eh_addr, u16 eh_port, u8 proto, u32 fib_index, u32 thread_index); diff --git a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c index 892518fff97..303c588d34e 100644 --- a/src/plugins/nat/nat44-ei/nat44_ei_in2out.c +++ b/src/plugins/nat/nat44-ei/nat44_ei_in2out.c @@ -785,8 +785,7 @@ icmp_in2out (snat_main_t * sm, if (vnet_buffer (b0)->sw_if_index[VLIB_TX] == ~0) { - if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0, - sm->endpoint_dependent)) + if (0 != snat_icmp_hairpinning (sm, b0, ip0, icmp0)) vnet_buffer (b0)->sw_if_index[VLIB_TX] = fib_index; } @@ -1904,9 +1903,8 @@ VLIB_NODE_FN (snat_in2out_fast_node) (vlib_main_t * vm, } /* Hairpinning */ - is_hairpinning = - snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, 0, - 0 /* do_trace */ ); + is_hairpinning = snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, + proto0, 0 /* do_trace */); trace0: if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE) diff --git a/src/plugins/nat/nat44/ed_inlines.h b/src/plugins/nat/nat44/ed_inlines.h index 1b4df4d02fd..87de25e990b 100644 --- a/src/plugins/nat/nat44/ed_inlines.h +++ b/src/plugins/nat/nat44/ed_inlines.h @@ -51,6 +51,60 @@ nat_ed_lru_insert (snat_main_per_thread_data_t * tsm, return 1; } +static_always_inline void +nat_6t_flow_to_ed_k (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f) +{ + init_ed_k (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto); +} + +static_always_inline void +nat_6t_flow_to_ed_kv (clib_bihash_kv_16_8_t *kv, nat_6t_flow_t *f, + u32 thread_idx, u32 session_idx) +{ + init_ed_kv (kv, f->match.saddr, f->match.sport, f->match.daddr, + f->match.dport, f->match.fib_index, f->match.proto, thread_idx, + session_idx); +} + +static_always_inline int +nat_ed_ses_i2o_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->i2o); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->i2o, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->i2o); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + +static_always_inline int +nat_ed_ses_o2i_flow_hash_add_del (snat_main_t *sm, u32 thread_idx, + snat_session_t *s, int is_add) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + clib_bihash_kv_16_8_t kv; + if (0 == is_add) + { + nat_6t_flow_to_ed_k (&kv, &s->o2i); + } + else + { + nat_6t_flow_to_ed_kv (&kv, &s->o2i, thread_idx, s - tsm->sessions); + nat_6t_l3_l4_csum_calc (&s->o2i); + } + return clib_bihash_add_del_16_8 (&sm->flow_hash, &kv, is_add); +} + always_inline void nat_ed_session_delete (snat_main_t * sm, snat_session_t * ses, u32 thread_index, int lru_delete @@ -64,6 +118,10 @@ nat_ed_session_delete (snat_main_t * sm, snat_session_t * ses, clib_dlist_remove (tsm->lru_pool, ses->lru_index); } pool_put_index (tsm->lru_pool, ses->lru_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn ("flow hash del failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, ses, 0)) + nat_elog_warn ("flow hash del failed"); pool_put (tsm->sessions, ses); vlib_set_simple_counter (&sm->total_sessions, thread_index, 0, pool_elts (tsm->sessions)); @@ -225,10 +283,10 @@ per_vrf_sessions_unregister_session (snat_session_t * s, u32 thread_index) per_vrf_sessions_t *per_vrf_sessions; ASSERT (s->per_vrf_sessions_index != ~0); - + tsm = vec_elt_at_index (sm->per_thread_data, thread_index); - per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec, - s->per_vrf_sessions_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); ASSERT (per_vrf_sessions->ses_count != 0); @@ -247,9 +305,57 @@ per_vrf_sessions_is_expired (snat_session_t * s, u32 thread_index) ASSERT (s->per_vrf_sessions_index != ~0); tsm = vec_elt_at_index (sm->per_thread_data, thread_index); - per_vrf_sessions = vec_elt_at_index (tsm->per_vrf_sessions_vec, - s->per_vrf_sessions_index); + per_vrf_sessions = + vec_elt_at_index (tsm->per_vrf_sessions_vec, s->per_vrf_sessions_index); return per_vrf_sessions->expired; } +static_always_inline void +nat_6t_flow_init (nat_6t_flow_t *f, u32 thread_idx, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u32 fib_index, + u8 proto, u32 session_idx) +{ + clib_memset (f, 0, sizeof (*f)); + f->match.saddr = saddr; + f->match.sport = sport; + f->match.daddr = daddr; + f->match.dport = dport; + f->match.proto = proto; + f->match.fib_index = fib_index; +} + +static_always_inline void +nat_6t_i2o_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->i2o, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline void +nat_6t_o2i_flow_init (snat_main_t *sm, u32 thread_idx, snat_session_t *s, + ip4_address_t saddr, u16 sport, ip4_address_t daddr, + u16 dport, u32 fib_index, u8 proto) +{ + snat_main_per_thread_data_t *tsm = + vec_elt_at_index (sm->per_thread_data, thread_idx); + nat_6t_flow_init (&s->o2i, thread_idx, saddr, sport, daddr, dport, fib_index, + proto, s - tsm->sessions); +} + +static_always_inline int +nat_6t_flow_match (nat_6t_flow_t *f, vlib_buffer_t *b, ip4_address_t saddr, + u16 sport, ip4_address_t daddr, u16 dport, u8 protocol, + u32 fib_index) +{ + return f->match.daddr.as_u32 == daddr.as_u32 && + f->match.dport == vnet_buffer (b)->ip.reass.l4_dst_port && + f->match.proto == protocol && f->match.fib_index == fib_index && + f->match.saddr.as_u32 == saddr.as_u32 && + f->match.sport == vnet_buffer (b)->ip.reass.l4_src_port; +} + #endif diff --git a/src/plugins/nat/nat44_classify.c b/src/plugins/nat/nat44_classify.c index 6cdb57721aa..85f8c64afd5 100644 --- a/src/plugins/nat/nat44_classify.c +++ b/src/plugins/nat/nat44_classify.c @@ -22,6 +22,7 @@ #include <vnet/fib/ip4_fib.h> #include <nat/nat.h> #include <nat/nat_inlines.h> +#include <nat/nat44/ed_inlines.h> #define foreach_nat44_classify_error \ _(NEXT_IN2OUT, "next in2out") \ @@ -294,8 +295,6 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, nat44_classify_next_t next_index; snat_main_t *sm = &snat_main; snat_static_mapping_t *m; - u32 thread_index = vm->thread_index; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; u32 next_in2out = 0, next_out2in = 0; from = vlib_frame_vector_args (frame); @@ -347,9 +346,31 @@ nat44_ed_classify_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); /* process whole packet */ - if (!clib_bihash_search_16_8 - (&tsm->in2out_ed, &ed_kv0, &ed_value0)) - goto enqueue0; + if (!clib_bihash_search_16_8 (&sm->flow_hash, &ed_kv0, + &ed_value0)) + { + ASSERT (vm->thread_index == + ed_value_get_thread_index (&ed_value0)); + snat_main_per_thread_data_t *tsm = + &sm->per_thread_data[vm->thread_index]; + snat_session_t *s = pool_elt_at_index ( + tsm->sessions, ed_value_get_session_index (&ed_value0)); + clib_bihash_kv_16_8_t i2o_kv; + nat_6t_flow_to_ed_k (&i2o_kv, &s->i2o); + vnet_buffer2 (b0)->nat.cached_session_index = + ed_value_get_session_index (&ed_value0); + if (i2o_kv.key[0] == ed_kv0.key[0] && + i2o_kv.key[1] == ed_kv0.key[1]) + { + next0 = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + next0 = NAT_NEXT_OUT2IN_ED_FAST_PATH; + } + + goto enqueue0; + } /* session doesn't exist so continue in code */ } diff --git a/src/plugins/nat/nat44_cli.c b/src/plugins/nat/nat44_cli.c index adcf324850d..d1a08718ed7 100644 --- a/src/plugins/nat/nat44_cli.c +++ b/src/plugins/nat/nat44_cli.c @@ -300,7 +300,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, vlib_cli_output (vm, "%U", format_bihash_8_8, &sm->static_mapping_by_external, verbose); - vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->out2in_ed, verbose); + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); vec_foreach_index (i, sm->per_thread_data) { tsm = vec_elt_at_index (sm->per_thread_data, i); @@ -308,7 +308,7 @@ nat44_show_hash_command_fn (vlib_main_t * vm, unformat_input_t * input, i, vlib_worker_threads[i].name); if (sm->endpoint_dependent) { - vlib_cli_output (vm, "%U", format_bihash_16_8, &tsm->in2out_ed, + vlib_cli_output (vm, "%U", format_bihash_16_8, &sm->flow_hash, verbose); } else diff --git a/src/plugins/nat/nat44_hairpinning.c b/src/plugins/nat/nat44_hairpinning.c index 9432f554246..37dfd7827f6 100644 --- a/src/plugins/nat/nat44_hairpinning.c +++ b/src/plugins/nat/nat44_hairpinning.c @@ -94,10 +94,9 @@ is_hairpinning (snat_main_t * sm, ip4_address_t * dst_addr) #ifndef CLIB_MARCH_VARIANT int -snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, - snat_main_t * sm, vlib_buffer_t * b0, ip4_header_t * ip0, - udp_header_t * udp0, tcp_header_t * tcp0, u32 proto0, - int is_ed, int do_trace) +snat_hairpinning (vlib_main_t *vm, vlib_node_runtime_t *node, snat_main_t *sm, + vlib_buffer_t *b0, ip4_header_t *ip0, udp_header_t *udp0, + tcp_header_t *tcp0, u32 proto0, int do_trace) { snat_session_t *s0 = NULL; clib_bihash_kv_8_8_t kv0, value0; @@ -128,32 +127,17 @@ snat_hairpinning (vlib_main_t * vm, vlib_node_runtime_t * node, else ti = sm->num_workers; - if (is_ed) - { - clib_bihash_kv_16_8_t ed_kv, ed_value; - init_ed_k (&ed_kv, ip0->dst_address, udp0->dst_port, - ip0->src_address, udp0->src_port, sm->outside_fib_index, - ip0->protocol); - rv = clib_bihash_search_16_8 (&sm->out2in_ed, &ed_kv, &ed_value); - ASSERT (ti == ed_value_get_thread_index (&ed_value)); - si = ed_value_get_session_index (&ed_value); - } - else - { - - init_nat_k (&kv0, ip0->dst_address, udp0->dst_port, - sm->outside_fib_index, proto0); - rv = - clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, - &value0); - si = value0.value; - } + init_nat_k (&kv0, ip0->dst_address, udp0->dst_port, + sm->outside_fib_index, proto0); + rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0); if (rv) { rv = 0; goto trace; } + si = value0.value; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; new_dst_port0 = s0->in2out.port; @@ -237,9 +221,8 @@ trace: #ifndef CLIB_MARCH_VARIANT u32 -snat_icmp_hairpinning (snat_main_t * sm, - vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, int is_ed) +snat_icmp_hairpinning (snat_main_t *sm, vlib_buffer_t *b0, ip4_header_t *ip0, + icmp46_header_t *icmp0) { clib_bihash_kv_8_8_t kv0, value0; u32 old_dst_addr0, new_dst_addr0; @@ -264,26 +247,12 @@ snat_icmp_hairpinning (snat_main_t * sm, if (protocol != NAT_PROTOCOL_TCP && protocol != NAT_PROTOCOL_UDP) return 1; - if (is_ed) - { - clib_bihash_kv_16_8_t ed_kv, ed_value; - init_ed_k (&ed_kv, ip0->dst_address, l4_header->src_port, - ip0->src_address, l4_header->dst_port, - sm->outside_fib_index, inner_ip0->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &ed_kv, &ed_value)) - return 1; - ASSERT (ti == ed_value_get_thread_index (&ed_value)); - si = ed_value_get_session_index (&ed_value); - } - else - { - init_nat_k (&kv0, ip0->dst_address, l4_header->src_port, - sm->outside_fib_index, protocol); - if (clib_bihash_search_8_8 - (&sm->per_thread_data[ti].out2in, &kv0, &value0)) - return 1; - si = value0.value; - } + init_nat_k (&kv0, ip0->dst_address, l4_header->src_port, + sm->outside_fib_index, protocol); + if (clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, + &value0)) + return 1; + si = value0.value; s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); new_dst_addr0 = s0->in2out.addr.as_u32; vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; @@ -324,36 +293,29 @@ snat_icmp_hairpinning (snat_main_t * sm, if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv0, &value0)) { - if (!is_ed) + icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); + u16 icmp_id0 = echo0->identifier; + init_nat_k (&kv0, ip0->dst_address, icmp_id0, sm->outside_fib_index, + NAT_PROTOCOL_ICMP); + if (sm->num_workers > 1) + ti = + (clib_net_to_host_u16 (icmp_id0) - 1024) / sm->port_per_thread; + else + ti = sm->num_workers; + int rv = clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, + &kv0, &value0); + if (!rv) { - icmp_echo_header_t *echo0 = (icmp_echo_header_t *) (icmp0 + 1); - u16 icmp_id0 = echo0->identifier; - init_nat_k (&kv0, ip0->dst_address, icmp_id0, - sm->outside_fib_index, NAT_PROTOCOL_ICMP); - if (sm->num_workers > 1) - ti = - (clib_net_to_host_u16 (icmp_id0) - - 1024) / sm->port_per_thread; - else - ti = sm->num_workers; - int rv = - clib_bihash_search_8_8 (&sm->per_thread_data[ti].out2in, &kv0, - &value0); - if (!rv) - { - si = value0.value; - s0 = - pool_elt_at_index (sm->per_thread_data[ti].sessions, si); - new_dst_addr0 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = - s0->in2out.fib_index; - echo0->identifier = s0->in2out.port; - sum0 = icmp0->checksum; - sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, - icmp_echo_header_t, identifier); - icmp0->checksum = ip_csum_fold (sum0); - goto change_addr; - } + si = value0.value; + s0 = pool_elt_at_index (sm->per_thread_data[ti].sessions, si); + new_dst_addr0 = s0->in2out.addr.as_u32; + vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; + echo0->identifier = s0->in2out.port; + sum0 = icmp0->checksum; + sum0 = ip_csum_update (sum0, icmp_id0, s0->in2out.port, + icmp_echo_header_t, identifier); + icmp0->checksum = ip_csum_fold (sum0); + goto change_addr; } return 1; @@ -407,58 +369,9 @@ nat_hairpinning_sm_unknown_proto (snat_main_t * sm, } #endif -#ifndef CLIB_MARCH_VARIANT -void -nat44_ed_hairpinning_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, ip4_header_t * ip) -{ - u32 old_addr, new_addr = 0, ti = 0; - clib_bihash_kv_8_8_t kv, value; - clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - ip_csum_t sum; - snat_session_t *s; - - if (sm->num_workers > 1) - ti = sm->worker_out2in_cb (b, ip, sm->outside_fib_index, 0); - else - ti = sm->num_workers; - - old_addr = ip->dst_address.as_u32; - init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, - sm->outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) - { - init_nat_k (&kv, ip->dst_address, 0, 0, 0); - if (clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv, &value)) - return; - - m = pool_elt_at_index (sm->static_mappings, value.value); - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = m->fib_index; - new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; - } - else - { - ASSERT (ti == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (sm->per_thread_data[ti].sessions, - ed_value_get_session_index (&s_value)); - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; - new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; - } - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); - ip->checksum = ip_csum_fold (sum); -} -#endif - static inline uword -nat44_hairpinning_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +nat44_hairpinning_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; nat_hairpin_next_t next_index; @@ -507,9 +420,8 @@ nat44_hairpinning_fn_inline (vlib_main_t * vm, vnet_get_config_data (&cm->config_main, &b0->current_config_index, &next0, 0); - if (snat_hairpinning - (vm, node, sm, b0, ip0, udp0, tcp0, proto0, is_ed, - 1 /* do_trace */ )) + if (snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, + 1 /* do_trace */)) next0 = NAT_HAIRPIN_NEXT_LOOKUP; if (next0 != NAT_HAIRPIN_NEXT_DROP) @@ -535,7 +447,7 @@ VLIB_NODE_FN (nat44_hairpinning_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return nat44_hairpinning_fn_inline (vm, node, frame, 0); + return nat44_hairpinning_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -552,31 +464,9 @@ VLIB_REGISTER_NODE (nat44_hairpinning_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpinning_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat44_hairpinning_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpinning_node) = { - .name = "nat44-ed-hairpinning", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .format_trace = format_nat_hairpin_trace, - .n_next_nodes = NAT_HAIRPIN_N_NEXT, - .next_nodes = { - [NAT_HAIRPIN_NEXT_DROP] = "error-drop", - [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup", - }, -}; -/* *INDENT-ON* */ - static inline uword -snat_hairpin_dst_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +snat_hairpin_dst_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; nat_hairpin_next_t next_index; @@ -625,20 +515,17 @@ snat_hairpin_dst_fn_inline (vlib_main_t * vm, tcp_header_t *tcp0 = (tcp_header_t *) udp0; snat_hairpinning (vm, node, sm, b0, ip0, udp0, tcp0, proto0, - is_ed, 1 /* do_trace */ ); + 1 /* do_trace */); } else if (proto0 == NAT_PROTOCOL_ICMP) { icmp46_header_t *icmp0 = ip4_next_header (ip0); - snat_icmp_hairpinning (sm, b0, ip0, icmp0, is_ed); + snat_icmp_hairpinning (sm, b0, ip0, icmp0); } else { - if (is_ed) - nat44_ed_hairpinning_unknown_proto (sm, b0, ip0); - else - nat_hairpinning_sm_unknown_proto (sm, b0, ip0); + nat_hairpinning_sm_unknown_proto (sm, b0, ip0); } vnet_buffer (b0)->snat.flags = SNAT_FLAG_HAIRPINNING; @@ -668,7 +555,7 @@ VLIB_NODE_FN (snat_hairpin_dst_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return snat_hairpin_dst_fn_inline (vm, node, frame, 0); + return snat_hairpin_dst_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -685,31 +572,9 @@ VLIB_REGISTER_NODE (snat_hairpin_dst_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpin_dst_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return snat_hairpin_dst_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpin_dst_node) = { - .name = "nat44-ed-hairpin-dst", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .format_trace = format_nat_hairpin_trace, - .n_next_nodes = NAT_HAIRPIN_N_NEXT, - .next_nodes = { - [NAT_HAIRPIN_NEXT_DROP] = "error-drop", - [NAT_HAIRPIN_NEXT_LOOKUP] = "ip4-lookup", - }, -}; -/* *INDENT-ON* */ - static inline uword -snat_hairpin_src_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, int is_ed) +snat_hairpin_src_fn_inline (vlib_main_t *vm, vlib_node_runtime_t *node, + vlib_frame_t *frame) { u32 n_left_from, *from, *to_next; snat_hairpin_src_next_t next_index; @@ -787,7 +652,7 @@ VLIB_NODE_FN (snat_hairpin_src_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) { - return snat_hairpin_src_fn_inline (vm, node, frame, 0); + return snat_hairpin_src_fn_inline (vm, node, frame); } /* *INDENT-OFF* */ @@ -805,28 +670,6 @@ VLIB_REGISTER_NODE (snat_hairpin_src_node) = { }; /* *INDENT-ON* */ -VLIB_NODE_FN (nat44_ed_hairpin_src_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return snat_hairpin_src_fn_inline (vm, node, frame, 1); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_hairpin_src_node) = { - .name = "nat44-ed-hairpin-src", - .vector_size = sizeof (u32), - .type = VLIB_NODE_TYPE_INTERNAL, - .n_next_nodes = SNAT_HAIRPIN_SRC_N_NEXT, - .next_nodes = { - [SNAT_HAIRPIN_SRC_NEXT_DROP] = "error-drop", - [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT] = "nat44-ed-in2out-output", - [SNAT_HAIRPIN_SRC_NEXT_INTERFACE_OUTPUT] = "interface-output", - [SNAT_HAIRPIN_SRC_NEXT_SNAT_IN2OUT_WH] = "nat44-in2out-output-worker-handoff", - }, -}; -/* *INDENT-ON* */ - /* * fd.io coding-style-patch-verification: ON * diff --git a/src/plugins/nat/nat44_handoff.c b/src/plugins/nat/nat44_handoff.c index 651c8d1f4ac..8c1b967c020 100644 --- a/src/plugins/nat/nat44_handoff.c +++ b/src/plugins/nat/nat44_handoff.c @@ -33,6 +33,19 @@ typedef struct u8 output; } nat44_handoff_trace_t; +#define foreach_nat44_handoff_error \ + _ (CONGESTION_DROP, "congestion drop") \ + _ (SAME_WORKER, "same worker") \ + _ (DO_HANDOFF, "do handoff") + +typedef enum +{ +#define _(sym, str) NAT44_HANDOFF_ERROR_##sym, + foreach_nat44_handoff_error +#undef _ + NAT44_HANDOFF_N_ERROR, +} nat44_handoff_error_t; + static char *nat44_handoff_error_strings[] = { #define _(sym,string) string, foreach_nat44_handoff_error diff --git a/src/plugins/nat/nat_format.c b/src/plugins/nat/nat_format.c index 90faeb96e9b..2fbd7498f49 100644 --- a/src/plugins/nat/nat_format.c +++ b/src/plugins/nat/nat_format.c @@ -121,9 +121,9 @@ format_snat_session (u8 * s, va_list * args) s = format (s, " i2o %U proto %u fib %u\n", format_ip4_address, &sess->in2out.addr, sess->in2out.port, sess->in2out.fib_index); - s = format (s, " o2i %U proto %u fib %u\n", - format_ip4_address, &sess->out2in.addr, - sess->out2in.port, sess->out2in.fib_index); + s = + format (s, " o2i %U proto %u fib %u\n", format_ip4_address, + &sess->out2in.addr, sess->out2in.port, sess->out2in.fib_index); } else { @@ -132,10 +132,9 @@ format_snat_session (u8 * s, va_list * args) format_nat_protocol, sess->nat_proto, clib_net_to_host_u16 (sess->in2out.port), sess->in2out.fib_index); - s = format (s, " o2i %U proto %U port %d fib %d\n", - format_ip4_address, &sess->out2in.addr, - format_nat_protocol, sess->nat_proto, - clib_net_to_host_u16 (sess->out2in.port), + s = format (s, " o2i %U proto %U port %d fib %d\n", + format_ip4_address, &sess->out2in.addr, format_nat_protocol, + sess->nat_proto, clib_net_to_host_u16 (sess->out2in.port), sess->out2in.fib_index); } if (is_ed_session (sess) || is_fwd_bypass_session (sess)) @@ -155,6 +154,8 @@ format_snat_session (u8 * s, va_list * args) format_ip4_address, &sess->ext_host_addr, clib_net_to_host_u16 (sess->ext_host_port)); } + s = format (s, " i2o flow: %U\n", format_nat_6t_flow, &sess->i2o); + s = format (s, " o2i flow: %U\n", format_nat_6t_flow, &sess->o2i); } s = format (s, " index %llu\n", sess - tsm->sessions); s = format (s, " last heard %.2f\n", sess->last_heard); diff --git a/src/plugins/nat/nat_inlines.h b/src/plugins/nat/nat_inlines.h index 401f1e5747f..3408e533f69 100644 --- a/src/plugins/nat/nat_inlines.h +++ b/src/plugins/nat/nat_inlines.h @@ -29,7 +29,7 @@ calc_nat_key (ip4_address_t addr, u16 port, u32 fib_index, u8 proto) ASSERT (fib_index <= (1 << 14) - 1); ASSERT (proto <= (1 << 3) - 1); return (u64) addr.as_u32 << 32 | (u64) port << 16 | fib_index << 3 | - (proto & 0x7); + (proto & 0x7); } always_inline void @@ -518,15 +518,12 @@ split_ed_kv (clib_bihash_kv_16_8_t * kv, } static_always_inline int -get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, - u32 thread_index, u32 session_index, - nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port, - clib_bihash_kv_16_8_t * kv) +nat_get_icmp_session_lookup_values (vlib_buffer_t *b, ip4_header_t *ip0, + ip4_address_t *lookup_saddr, + u16 *lookup_sport, + ip4_address_t *lookup_daddr, + u16 *lookup_dport, u8 *lookup_protocol) { - u8 proto; - u16 _l_port, _r_port; - ip4_address_t *l_addr, *r_addr; - icmp46_header_t *icmp0; icmp_echo_header_t *echo0, *inner_echo0 = 0; ip4_header_t *inner_ip0 = 0; @@ -536,121 +533,43 @@ get_icmp_i2o_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, icmp0 = (icmp46_header_t *) ip4_next_header (ip0); echo0 = (icmp_echo_header_t *) (icmp0 + 1); - if (!icmp_type_is_error_message - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) - { - proto = IP_PROTOCOL_ICMP; - l_addr = &ip0->src_address; - r_addr = &ip0->dst_address; - _l_port = vnet_buffer (b)->ip.reass.l4_src_port; - _r_port = 0; - } - else - { - inner_ip0 = (ip4_header_t *) (echo0 + 1); - l4_header = ip4_next_header (inner_ip0); - proto = inner_ip0->protocol; - r_addr = &inner_ip0->src_address; - l_addr = &inner_ip0->dst_address; - switch (ip_proto_to_nat_proto (inner_ip0->protocol)) - { - case NAT_PROTOCOL_ICMP: - inner_icmp0 = (icmp46_header_t *) l4_header; - inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); - _r_port = 0; - _l_port = inner_echo0->identifier; - break; - case NAT_PROTOCOL_UDP: - case NAT_PROTOCOL_TCP: - _l_port = ((tcp_udp_header_t *) l4_header)->dst_port; - _r_port = ((tcp_udp_header_t *) l4_header)->src_port; - break; - default: - return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL; - } - } - init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto, - thread_index, session_index); - if (nat_proto) - { - *nat_proto = ip_proto_to_nat_proto (proto); - } - if (l_port) - { - *l_port = _l_port; - } - if (r_port) - { - *r_port = _r_port; - } - return 0; -} - -static_always_inline int -get_icmp_o2i_ed_key (vlib_buffer_t * b, ip4_header_t * ip0, u32 rx_fib_index, - u32 thread_index, u32 session_index, - nat_protocol_t * nat_proto, u16 * l_port, u16 * r_port, - clib_bihash_kv_16_8_t * kv) -{ - icmp46_header_t *icmp0; - u8 proto; - ip4_address_t *l_addr, *r_addr; - u16 _l_port, _r_port; - icmp_echo_header_t *echo0, *inner_echo0 = 0; - ip4_header_t *inner_ip0; - void *l4_header = 0; - icmp46_header_t *inner_icmp0; - - icmp0 = (icmp46_header_t *) ip4_next_header (ip0); - echo0 = (icmp_echo_header_t *) (icmp0 + 1); + // avoid warning about unused variables in caller by setting to bogus values + *lookup_sport = 0; + *lookup_dport = 0; if (!icmp_type_is_error_message (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags)) { - proto = IP_PROTOCOL_ICMP; - l_addr = &ip0->dst_address; - r_addr = &ip0->src_address; - _l_port = vnet_buffer (b)->ip.reass.l4_src_port; - _r_port = 0; + *lookup_protocol = IP_PROTOCOL_ICMP; + lookup_saddr->as_u32 = ip0->src_address.as_u32; + *lookup_sport = vnet_buffer (b)->ip.reass.l4_src_port; + lookup_daddr->as_u32 = ip0->dst_address.as_u32; + *lookup_dport = vnet_buffer (b)->ip.reass.l4_dst_port; } else { inner_ip0 = (ip4_header_t *) (echo0 + 1); l4_header = ip4_next_header (inner_ip0); - proto = inner_ip0->protocol; - l_addr = &inner_ip0->src_address; - r_addr = &inner_ip0->dst_address; + *lookup_protocol = inner_ip0->protocol; + lookup_saddr->as_u32 = inner_ip0->dst_address.as_u32; + lookup_daddr->as_u32 = inner_ip0->src_address.as_u32; switch (ip_proto_to_nat_proto (inner_ip0->protocol)) { case NAT_PROTOCOL_ICMP: inner_icmp0 = (icmp46_header_t *) l4_header; inner_echo0 = (icmp_echo_header_t *) (inner_icmp0 + 1); - _l_port = inner_echo0->identifier; - _r_port = 0; + *lookup_sport = inner_echo0->identifier; + *lookup_dport = inner_echo0->identifier; break; case NAT_PROTOCOL_UDP: case NAT_PROTOCOL_TCP: - _l_port = ((tcp_udp_header_t *) l4_header)->src_port; - _r_port = ((tcp_udp_header_t *) l4_header)->dst_port; + *lookup_sport = ((tcp_udp_header_t *) l4_header)->dst_port; + *lookup_dport = ((tcp_udp_header_t *) l4_header)->src_port; break; default: - return -1; + return NAT_IN2OUT_ED_ERROR_UNSUPPORTED_PROTOCOL; } } - init_ed_kv (kv, *l_addr, _l_port, *r_addr, _r_port, rx_fib_index, proto, - thread_index, session_index); - if (nat_proto) - { - *nat_proto = ip_proto_to_nat_proto (proto); - } - if (l_port) - { - *l_port = _l_port; - } - if (r_port) - { - *r_port = _r_port; - } return 0; } diff --git a/src/plugins/nat/out2in_ed.c b/src/plugins/nat/out2in_ed.c index beb259eee33..d6beadc61bc 100644 --- a/src/plugins/nat/out2in_ed.c +++ b/src/plugins/nat/out2in_ed.c @@ -42,14 +42,15 @@ typedef struct u32 sw_if_index; u32 next_index; u32 session_index; - u32 is_slow_path; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; } nat44_ed_out2in_trace_t; -typedef struct -{ - u16 thread_next; -} nat44_ed_out2in_handoff_trace_t; - static u8 * format_nat44_ed_out2in_trace (u8 * s, va_list * args) { @@ -64,130 +65,174 @@ format_nat44_ed_out2in_trace (u8 * s, va_list * args) s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); + if (~0 != t->session_index) + { + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) + { + s = format (s, "\n lookup skipped - cached session index used"); + } + else + { + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); + } + } return s; } +static int +next_src_nat (snat_main_t *sm, ip4_header_t *ip, u16 src_port, u16 dst_port, + u32 thread_index, u32 rx_fib_index) +{ + clib_bihash_kv_16_8_t kv, value; + + init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, + rx_fib_index, ip->protocol); + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) + return 1; + + return 0; +} + +static void create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, + snat_session_t *s, ip4_header_t *ip, + u32 rx_fib_index, u32 thread_index); + +static snat_session_t *create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping); + static inline u32 -icmp_out2in_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - u32 sw_if_index0, u32 rx_fib_index0, - vlib_node_runtime_t * node, u32 next0, f64 now, - u32 thread_index, snat_session_t ** p_s0) +icmp_out2in_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + snat_session_t **s_p) { vlib_main_t *vm = vlib_get_main (); - next0 = icmp_out2in (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, thread_index, p_s0, 0); - snat_session_t *s0 = *p_s0; - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0)) + ip_csum_t sum; + u16 checksum; + + snat_session_t *s = 0; + u8 is_addr_only, identity_nat; + ip4_address_t sm_addr; + u16 sm_port; + u32 sm_fib_index; + snat_static_mapping_t *m; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + + sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; + rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, &lookup_sport, + &lookup_daddr, &lookup_dport, + &lookup_protocol)) { - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain - (vm, b0), thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); + b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; + next = NAT_NEXT_DROP; + goto out; } - return next0; -} - -#ifndef CLIB_MARCH_VARIANT -int -nat44_o2i_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) -{ - snat_main_t *sm = &snat_main; - nat44_is_idle_session_ctx_t *ctx = arg; - snat_session_t *s; - u64 sess_timeout_time; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index; - clib_bihash_kv_16_8_t ed_kv; - int i; - //snat_address_t *a; - snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, - ctx->thread_index); - s = pool_elt_at_index (tsm->sessions, kv->value); - sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (ctx->now >= sess_timeout_time) + if (snat_static_mapping_match ( + sm, ip->dst_address, lookup_sport, rx_fib_index, + ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port, + &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m)) { - l_addr = &s->in2out.addr; - r_addr = &s->ext_host_addr; - fib_index = s->in2out.fib_index; - if (snat_is_unk_proto_session (s)) + // static mapping not matched + if (!sm->forwarding_enabled) { - proto = s->in2out.port; - r_port = 0; - l_port = 0; + /* Don't NAT packet aimed at the intfc address */ + if (!is_interface_addr (sm, node, sw_if_index, + ip->dst_address.as_u32)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next = NAT_NEXT_DROP; + } } else { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->in2out.port; - r_port = s->ext_host_port; - } - if (is_twice_nat_session (s)) - { - r_addr = &s->ext_host_nat_addr; - r_port = s->ext_host_nat_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &ed_kv, 0)) - nat_elog_warn ("in2out_ed key del failed"); - - if (snat_is_unk_proto_session (s)) - goto delete; - - nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, - &s->in2out.addr, s->in2out.port, - &s->ext_host_nat_addr, s->ext_host_nat_port, - &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, - s->nat_proto, is_twice_nat_session (s)); - - if (is_twice_nat_session (s)) - { - for (i = 0; i < vec_len (sm->twice_nat_addresses); i++) + if (next_src_nat (sm, ip, lookup_sport, lookup_dport, thread_index, + rx_fib_index)) { - // FIXME TODO this is obviously wrong code ... needs fix! - // key.protocol = s->nat_proto; - // key.port = s->ext_host_nat_port; - // a = sm->twice_nat_addresses + i; - // if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32) - // { - // snat_free_outside_address_and_port (sm->twice_nat_addresses, - // ctx->thread_index, - // &key); - // break; - // } + next = NAT_NEXT_IN2OUT_ED_FAST_PATH; + } + else + { + create_bypass_for_fwd (sm, b, s, ip, rx_fib_index, thread_index); } } + goto out; + } - if (snat_is_session_static (s)) - goto delete; + if (PREDICT_FALSE (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request || + !is_addr_only))) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next = NAT_NEXT_DROP; + goto out; + } - snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, - &s->out2in.addr, s->out2in.port, - s->nat_proto); - delete: - nat_ed_session_delete (sm, s, ctx->thread_index, 1); - return 1; + if (PREDICT_FALSE (identity_nat)) + { + goto out; } - return 0; + /* Create session initiated by host from external network */ + s = create_session_for_static_mapping_ed ( + sm, b, sm_addr, sm_port, sm_fib_index, ip->dst_address, lookup_sport, + rx_fib_index, ip_proto_to_nat_proto (lookup_protocol), node, rx_fib_index, + thread_index, 0, 0, vlib_time_now (vm), m); + if (!s) + next = NAT_NEXT_DROP; + + if (PREDICT_TRUE (!ip4_is_fragment (ip))) + { + sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (checksum != 0 && checksum != 0xffff) + { + next = NAT_NEXT_DROP; + goto out; + } + } + + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) + { + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); + } +out: + if (NAT_NEXT_DROP == next && s) + { + nat_ed_session_delete (sm, s, thread_index, 1); + s = 0; + } + *s_p = s; + return next; } -#endif // allocate exact address based on preference static_always_inline int @@ -317,28 +362,17 @@ nat44_ed_alloc_outside_addr_and_port (snat_address_t *addresses, u32 fib_index, } static snat_session_t * -create_session_for_static_mapping_ed (snat_main_t * sm, - vlib_buffer_t * b, - ip4_address_t i2o_addr, - u16 i2o_port, - u32 i2o_fib_index, - ip4_address_t o2i_addr, - u16 o2i_port, - u32 o2i_fib_index, - nat_protocol_t nat_proto, - vlib_node_runtime_t * node, - u32 rx_fib_index, - u32 thread_index, - twice_nat_type_t twice_nat, - lb_nat_type_t lb_nat, f64 now, - snat_static_mapping_t * mapping) +create_session_for_static_mapping_ed ( + snat_main_t *sm, vlib_buffer_t *b, ip4_address_t i2o_addr, u16 i2o_port, + u32 i2o_fib_index, ip4_address_t o2i_addr, u16 o2i_port, u32 o2i_fib_index, + nat_protocol_t nat_proto, vlib_node_runtime_t *node, u32 rx_fib_index, + u32 thread_index, twice_nat_type_t twice_nat, lb_nat_type_t lb_nat, f64 now, + snat_static_mapping_t *mapping) { snat_session_t *s; ip4_header_t *ip; udp_header_t *udp; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - clib_bihash_kv_16_8_t kv; - nat44_is_idle_session_ctx_t ctx; if (PREDICT_FALSE (nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) @@ -375,15 +409,29 @@ create_session_for_static_mapping_ed (snat_main_t * sm, s->in2out.fib_index = i2o_fib_index; s->nat_proto = nat_proto; - /* Add to lookup tables */ - init_ed_kv (&kv, o2i_addr, o2i_port, s->ext_host_addr, s->ext_host_port, - o2i_fib_index, ip->protocol, thread_index, s - tsm->sessions); - ctx.now = now; - ctx.thread_index = thread_index; - if (clib_bihash_add_or_overwrite_stale_16_8 (&sm->out2in_ed, &kv, - nat44_o2i_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("out2in-ed key add failed"); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, o2i_port, + o2i_addr, o2i_port, o2i_fib_index, ip->protocol); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, i2o_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, s->ext_host_addr, + s->ext_host_port, o2i_addr, o2i_port, + o2i_fib_index, ip->protocol); + nat_6t_flow_dport_rewrite_set (&s->o2i, i2o_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, i2o_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, i2o_fib_index); + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + b->error = node->errors[NAT_OUT2IN_ED_ERROR_HASH_ADD_FAILED]; + nat_ed_session_delete (sm, s, thread_index, 1); + nat_elog_warn ("out2in flow hash add failed"); + return 0; + } if (twice_nat == TWICE_NAT || (twice_nat == TWICE_NAT_SELF && ip->src_address.as_u32 == i2o_addr.as_u32)) @@ -427,27 +475,80 @@ create_session_for_static_mapping_ed (snat_main_t * sm, if (rc) { b->error = node->errors[NAT_OUT2IN_ED_ERROR_OUT_OF_PORTS]; + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn ("out2in flow hash del failed"); + } + snat_free_outside_address_and_port ( + sm->twice_nat_addresses, thread_index, &s->ext_host_nat_addr, + s->ext_host_nat_port, s->nat_proto); nat_ed_session_delete (sm, s, thread_index, 1); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &kv, 0)) - nat_elog_notice ("out2in-ed key del failed"); return 0; } s->flags |= SNAT_SESSION_FLAG_TWICE_NAT; - init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_nat_addr, - s->ext_host_nat_port, i2o_fib_index, ip->protocol, - thread_index, s - tsm->sessions); + + nat_6t_flow_saddr_rewrite_set (&s->o2i, s->ext_host_nat_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->o2i, s->ext_host_nat_port); + } + + nat_6t_l3_l4_csum_calc (&s->o2i); + + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_nat_addr, s->ext_host_nat_port, + i2o_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->i2o, s->ext_host_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, s->ext_host_port); + } + else + { + nat_6t_flow_dport_rewrite_set (&s->i2o, s->ext_host_port); + } + } + else + { + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, i2o_port, i2o_fib_index, + ip->protocol); + } + else + { + nat_6t_i2o_flow_init (sm, thread_index, s, i2o_addr, i2o_port, + s->ext_host_addr, s->ext_host_port, + i2o_fib_index, ip->protocol); + } + } + + nat_6t_flow_saddr_rewrite_set (&s->i2o, o2i_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, o2i_port); } else { - init_ed_kv (&kv, i2o_addr, i2o_port, s->ext_host_addr, - s->ext_host_port, i2o_fib_index, ip->protocol, - thread_index, s - tsm->sessions); + nat_6t_flow_sport_rewrite_set (&s->i2o, o2i_port); + } + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow hash add failed"); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 0)) + { + nat_elog_warn ("out2in flow hash del failed"); + } + nat_ed_session_delete (sm, s, thread_index, 1); + return 0; } - if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &kv, - nat44_i2o_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("in2out-ed key add failed"); nat_ipfix_logging_nat44_ses_create (thread_index, s->in2out.addr.as_u32, @@ -468,37 +569,24 @@ create_session_for_static_mapping_ed (snat_main_t * sm, return s; } -static int -next_src_nat (snat_main_t * sm, ip4_header_t * ip, u16 src_port, - u16 dst_port, u32 thread_index, u32 rx_fib_index) -{ - clib_bihash_kv_16_8_t kv, value; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - - init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, - rx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) - return 1; - - return 0; -} - static void -create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, - u32 rx_fib_index, u32 thread_index) +create_bypass_for_fwd (snat_main_t *sm, vlib_buffer_t *b, snat_session_t *s, + ip4_header_t *ip, u32 rx_fib_index, u32 thread_index) { clib_bihash_kv_16_8_t kv, value; udp_header_t *udp; - snat_session_t *s = 0; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; vlib_main_t *vm = vlib_get_main (); f64 now = vlib_time_now (vm); - u16 l_port, r_port; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + ip4_address_t lookup_saddr, lookup_daddr; if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_o2i_ed_key - (b, ip, rx_fib_index, ~0, ~0, 0, &l_port, &r_port, &kv)) + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) return; } else @@ -506,19 +594,23 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { udp = ip4_next_header (ip); - l_port = udp->dst_port; - r_port = udp->src_port; + lookup_sport = udp->dst_port; + lookup_dport = udp->src_port; } else { - l_port = 0; - r_port = 0; + lookup_sport = 0; + lookup_dport = 0; } - init_ed_k (&kv, ip->dst_address, l_port, ip->src_address, r_port, - rx_fib_index, ip->protocol); + lookup_saddr.as_u32 = ip->dst_address.as_u32; + lookup_daddr.as_u32 = ip->src_address.as_u32; + lookup_protocol = ip->protocol; } - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index, lookup_protocol); + + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = @@ -550,10 +642,10 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, proto = ip_proto_to_nat_proto (ip->protocol); s->ext_host_addr = ip->src_address; - s->ext_host_port = r_port; + s->ext_host_port = lookup_dport; s->flags |= SNAT_SESSION_FLAG_FWD_BYPASS; s->out2in.addr = ip->dst_address; - s->out2in.port = l_port; + s->out2in.port = lookup_sport; s->nat_proto = proto; if (proto == NAT_PROTOCOL_OTHER) { @@ -565,9 +657,16 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, s->in2out.port = s->out2in.port; s->in2out.fib_index = s->out2in.fib_index; - kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &kv, 1)) - nat_elog_notice ("in2out_ed key add failed"); + nat_6t_i2o_flow_init (sm, thread_index, s, ip->dst_address, lookup_sport, + ip->src_address, lookup_dport, rx_fib_index, + ip->protocol); + nat_6t_flow_txfib_rewrite_set (&s->i2o, rx_fib_index); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return; + } per_vrf_sessions_register_session (s, thread_index); } @@ -586,261 +685,75 @@ create_bypass_for_fwd (snat_main_t * sm, vlib_buffer_t * b, ip4_header_t * ip, nat44_session_update_lru (sm, s, thread_index); } -static_always_inline int -create_bypass_for_fwd_worker (snat_main_t * sm, - vlib_buffer_t * b, ip4_header_t * ip, - u32 rx_fib_index, u32 thread_index) -{ - ip4_header_t tmp = { - .src_address = ip->dst_address, - }; - u32 index = sm->worker_in2out_cb (&tmp, rx_fib_index, 0); - - if (index != thread_index) - { - vnet_buffer2 (b)->nat.thread_next = index; - return 1; - } - - create_bypass_for_fwd (sm, b, ip, rx_fib_index, thread_index); - return 0; -} - -#ifndef CLIB_MARCH_VARIANT -u32 -icmp_match_out2in_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b, - ip4_header_t * ip, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate) +static snat_session_t * +nat44_ed_out2in_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) { - u32 next = ~0, sw_if_index, rx_fib_index; - clib_bihash_kv_16_8_t kv, value; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - snat_session_t *s = 0; - u8 is_addr_only, identity_nat; - u16 l_port, r_port; - vlib_main_t *vm = vlib_get_main (); - ip4_address_t sm_addr; - u16 sm_port; - u32 sm_fib_index; - *dont_translate = 0; + clib_bihash_kv_8_8_t kv, value; snat_static_mapping_t *m; + snat_session_t *s; - sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); - - if (get_icmp_o2i_ed_key - (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, &r_port, &kv)) + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_UNSUPPORTED_PROTOCOL]; - next = NAT_NEXT_DROP; - goto out; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_notice ("maximum sessions exceeded"); + return 0; } - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + init_nat_k (&kv, ip->dst_address, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) { - if (snat_static_mapping_match - (sm, ip->dst_address, l_port, rx_fib_index, - ip_proto_to_nat_proto (ip->protocol), &sm_addr, &sm_port, - &sm_fib_index, 1, &is_addr_only, 0, 0, 0, &identity_nat, &m)) - { - // static mapping not matched - if (!sm->forwarding_enabled) - { - /* Don't NAT packet aimed at the intfc address */ - if (PREDICT_FALSE (is_interface_addr (sm, node, sw_if_index, - ip->dst_address.as_u32))) - { - *dont_translate = 1; - } - else - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - next = NAT_NEXT_DROP; - } - } - else - { - *dont_translate = 1; - if (next_src_nat (sm, ip, l_port, r_port, - thread_index, rx_fib_index)) - { - next = NAT_NEXT_IN2OUT_ED_FAST_PATH; - } - else - { - if (sm->num_workers > 1) - { - if (create_bypass_for_fwd_worker (sm, b, ip, - rx_fib_index, - thread_index)) - { - next = NAT_NEXT_OUT2IN_ED_HANDOFF; - } - } - else - { - create_bypass_for_fwd (sm, b, ip, rx_fib_index, - thread_index); - } - } - } - goto out; - } - - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request || !is_addr_only))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } - - if (PREDICT_FALSE (identity_nat)) - { - *dont_translate = 1; - goto out; - } - - /* Create session initiated by host from external network */ - s = - create_session_for_static_mapping_ed (sm, b, sm_addr, sm_port, - sm_fib_index, ip->dst_address, - l_port, rx_fib_index, *proto, - node, rx_fib_index, - thread_index, 0, 0, - vlib_time_now (vm), m); - if (!s) - next = NAT_NEXT_DROP; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + return 0; } - else - { - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request - && !icmp_type_is_error_message (vnet_buffer (b)->ip. - reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } - ASSERT (thread_index == ed_value_get_thread_index (&value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&value)); - } -out: - if (s) + m = pool_elt_at_index (sm->static_mappings, value.value); + + /* Create a new session */ + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) { - *addr = s->in2out.addr; - *port = s->in2out.port; - *fib_index = s->in2out.fib_index; + b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED]; + nat_elog_warn ("create NAT session failed"); + return 0; } - if (d) - *(snat_session_t **) d = s; - return next; -} -#endif - -static snat_session_t * -nat44_ed_out2in_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, - ip4_header_t * ip, - u32 rx_fib_index, - u32 thread_index, - f64 now, - vlib_main_t * vm, vlib_node_runtime_t * node) -{ - clib_bihash_kv_8_8_t kv, value; - clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - u32 old_addr, new_addr; - ip_csum_t sum; - snat_session_t *s; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - old_addr = ip->dst_address.as_u32; - - init_ed_k (&s_kv, ip->dst_address, 0, ip->src_address, 0, rx_fib_index, - ip->protocol); - - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + s->ext_host_addr.as_u32 = ip->src_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = ip->dst_address.as_u32; + s->out2in.fib_index = rx_fib_index; + s->in2out.addr.as_u32 = m->local_addr.as_u32; + s->in2out.fib_index = m->fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + + nat_6t_o2i_flow_init (sm, thread_index, s, ip->dst_address, 0, + ip->src_address, 0, m->fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, ip->dst_address.as_u32); + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) { - ASSERT (thread_index == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&s_value)); - new_addr = ip->dst_address.as_u32 = s->in2out.addr.as_u32; + nat_elog_notice ("in2out key add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; } - else - { - if (PREDICT_FALSE - (nat44_ed_maximum_sessions_exceeded - (sm, rx_fib_index, thread_index))) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_elog_notice ("maximum sessions exceeded"); - return 0; - } - - init_nat_k (&kv, ip->dst_address, 0, 0, 0); - if (clib_bihash_search_8_8 - (&sm->static_mapping_by_external, &kv, &value)) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; - return 0; - } - - m = pool_elt_at_index (sm->static_mappings, value.value); - - new_addr = ip->dst_address.as_u32 = m->local_addr.as_u32; - /* Create a new session */ - s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); - if (!s) - { - b->error = node->errors[NAT_OUT2IN_ED_ERROR_MAX_USER_SESS_EXCEEDED]; - nat_elog_warn ("create NAT session failed"); - return 0; - } - - s->ext_host_addr.as_u32 = ip->src_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; - s->out2in.addr.as_u32 = old_addr; - s->out2in.fib_index = rx_fib_index; - s->in2out.addr.as_u32 = new_addr; - s->in2out.fib_index = m->fib_index; - s->in2out.port = s->out2in.port = ip->protocol; - - /* Add to lookup tables */ - s_kv.value = s - tsm->sessions; - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) - nat_elog_notice ("out2in key add failed"); - - init_ed_kv (&s_kv, ip->dst_address, 0, ip->src_address, 0, m->fib_index, - ip->protocol, thread_index, s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1)) - nat_elog_notice ("in2out key add failed"); - - per_vrf_sessions_register_session (s, thread_index); + nat_6t_o2i_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_daddr_rewrite_set (&s->o2i, m->local_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, m->fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; } - /* Update IP checksum */ - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, dst_address); - ip->checksum = ip_csum_fold (sum); - - vnet_buffer (b)->sw_if_index[VLIB_TX] = s->in2out.fib_index; + per_vrf_sessions_register_session (s, thread_index); /* Accounting */ nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), @@ -873,18 +786,24 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; ip4_header_t *ip0; - udp_header_t *udp0; - tcp_header_t *tcp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; b0 = *b; b++; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; + /* Prefetch next iteration. */ if (PREDICT_TRUE (n_left_from >= 2)) { @@ -916,56 +835,68 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) + if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - goto trace0; + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } } - - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) + else { - next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; - goto trace0; + lookup_saddr.as_u32 = ip0->src_address.as_u32; + lookup_daddr.as_u32 = ip0->dst_address.as_u32; + lookup_protocol = ip0->protocol; } - init_ed_k (&kv0, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, - ip0->protocol); - - /* there is a stashed index in vnet_buffer2 from handoff node, - * see if we can use it */ - if (is_multi_worker - && - PREDICT_TRUE (!pool_is_free_index - (tsm->sessions, - vnet_buffer2 (b0)->nat.ed_out2in_nat_session_index))) + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) { s0 = pool_elt_at_index (tsm->sessions, - vnet_buffer2 (b0)-> - nat.ed_out2in_nat_session_index); - if (PREDICT_TRUE - (s0->out2in.addr.as_u32 == ip0->dst_address.as_u32 - && s0->out2in.port == vnet_buffer (b0)->ip.reass.l4_dst_port - && s0->nat_proto == ip_proto_to_nat_proto (ip0->protocol) - && s0->out2in.fib_index == rx_fib_index0 - && s0->ext_host_addr.as_u32 == ip0->src_address.as_u32 - && s0->ext_host_port == - vnet_buffer (b0)->ip.reass.l4_src_port)) + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) || + (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)))) { /* yes, this is the droid we're looking for */ + lookup_skipped = 1; goto skip_lookup; } + s0 = NULL; } - // lookup for session - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0)) + init_ed_k (&kv0, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + rx_fib_index0, lookup_protocol); + + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { - // session does not exist go slow path + // flow does not exist go slow path next[0] = NAT_NEXT_OUT2IN_ED_SLOW_PATH; goto trace0; } @@ -973,7 +904,6 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); - skip_lookup: if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) @@ -1014,48 +944,72 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - old_addr0 = ip0->dst_address.as_u32; - new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - src_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; - - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + if (nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else if (s0->flags & SNAT_SESSION_FLAG_TWICE_NAT && + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) + f = &s0->i2o; + } + else + { + /* + * Send DHCP packets to the ipv4 stack, or we won't + * be able to use dhcp client on the outside interface + */ + if (PREDICT_FALSE ( + proto0 == NAT_PROTOCOL_UDP && + (vnet_buffer (b0)->ip.reass.l4_dst_port == + clib_host_to_net_u16 (UDP_DST_PORT_dhcp_to_client)))) + { + goto trace0; + } + + if (!sm->forwarding_enabled) { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) + b0->error = node->errors[NAT_OUT2IN_ED_ERROR_NO_TRANSLATION]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + else + { + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, + lookup_protocol, rx_fib_index0)) { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; + f = &s0->i2o; + } + else + { + // FIXME TODO bypass ??? + // create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + // thread_index); + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; } - tcp0->checksum = ip_csum_fold (sum0); } + } + + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } + + switch (proto0) + { + case NAT_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_o2i (sm, now, s0, @@ -1066,46 +1020,20 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip. reass.tcp_seq_number, thread_index); - } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, length); - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp, thread_index, sw_if_index0, 1); - } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - } - vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.udp, + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.out2in_ed.icmp, thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter ( + &sm->counters.fastpath.out2in_ed.other, thread_index, sw_if_index0, + 1); + break; } /* Accounting */ @@ -1124,11 +1052,21 @@ nat44_ed_out2in_fast_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); + t->lookup_skipped = lookup_skipped; if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1169,21 +1107,20 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, old_addr0, new_addr0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0; + nat_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; - tcp_header_t *tcp0; icmp46_header_t *icmp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; lb_nat_type_t lb_nat0; twice_nat_type_t twice_nat0; u8 identity_nat0; ip4_address_t sm_addr; u16 sm_port; u32 sm_fib_index; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; b0 = *b; next[0] = vnet_buffer2 (b0)->nat.arc_next; @@ -1206,20 +1143,26 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { - s0 = - nat44_ed_out2in_unknown_proto (sm, b0, ip0, rx_fib_index0, - thread_index, now, vm, node); + s0 = nat44_ed_out2in_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!sm->forwarding_enabled) { if (!s0) next[0] = NAT_NEXT_DROP; } + if (NAT_NEXT_DROP != next[0] && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. out2in_ed.other, thread_index, sw_if_index0, 1); @@ -1231,19 +1174,28 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, next[0] = icmp_out2in_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, next[0], now, thread_index, &s0); + + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. out2in_ed.icmp, thread_index, sw_if_index0, 1); goto trace0; } - init_ed_k (&kv0, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, rx_fib_index0, + init_ed_k (&kv0, ip0->src_address, + vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, + vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); s0 = NULL; - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = @@ -1298,18 +1250,8 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } else { - if ((sm->num_workers > 1) - && create_bypass_for_fwd_worker (sm, b0, ip0, - rx_fib_index0, - thread_index)) - { - next[0] = NAT_NEXT_OUT2IN_ED_HANDOFF; - } - else - { - create_bypass_for_fwd (sm, b0, ip0, rx_fib_index0, - thread_index); - } + create_bypass_for_fwd (sm, b0, s0, ip0, rx_fib_index0, + thread_index); } } goto trace0; @@ -1345,48 +1287,16 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, } } - old_addr0 = ip0->dst_address.as_u32; - new_addr0 = ip0->dst_address.as_u32 = s0->in2out.addr.as_u32; - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->in2out.fib_index; - - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, ip4_header_t, - src_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_dst_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->o2i, proto0, 0 /* is_output_feature */))) + { + next[0] = NAT_NEXT_DROP; + goto trace0; + } if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (is_twice_nat_session (s0)) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, - length); - tcp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - tcp0->checksum = ip_csum_fold (sum0); - } vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_o2i (sm, now, s0, @@ -1398,42 +1308,8 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, reass.tcp_seq_number, thread_index); } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->dst_port = s0->in2out.port; - sum0 = udp0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->src_address.as_u32, - s0->ext_host_nat_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_src_port, - s0->ext_host_nat_port, ip4_header_t, length); - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); - vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp, - thread_index, sw_if_index0, 1); - } else { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->dst_port = s0->in2out.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->src_port = s0->ext_host_nat_port; - ip0->src_address.as_u32 = s0->ext_host_nat_addr.as_u32; - } - } vlib_increment_simple_counter (&sm->counters.slowpath.out2in_ed.udp, thread_index, sw_if_index0, 1); } @@ -1454,11 +1330,19 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1479,88 +1363,6 @@ nat44_ed_out2in_slow_path_node_fn_inline (vlib_main_t * vm, return frame->n_vectors; } -static inline uword -nat_handoff_node_fn_inline (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame, u32 fq_index) -{ - u32 n_enq, n_left_from, *from; - - u16 thread_indices[VLIB_FRAME_SIZE], *ti = thread_indices; - vlib_buffer_t *bufs[VLIB_FRAME_SIZE], **b = bufs; - - from = vlib_frame_vector_args (frame); - n_left_from = frame->n_vectors; - - vlib_get_buffers (vm, from, b, n_left_from); - - while (n_left_from >= 4) - { - if (PREDICT_TRUE (n_left_from >= 8)) - { - vlib_prefetch_buffer_header (b[4], LOAD); - vlib_prefetch_buffer_header (b[5], LOAD); - vlib_prefetch_buffer_header (b[6], LOAD); - vlib_prefetch_buffer_header (b[7], LOAD); - CLIB_PREFETCH (&b[4]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[5]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[6]->data, CLIB_CACHE_LINE_BYTES, LOAD); - CLIB_PREFETCH (&b[7]->data, CLIB_CACHE_LINE_BYTES, LOAD); - } - - ti[0] = vnet_buffer2 (b[0])->nat.thread_next; - ti[1] = vnet_buffer2 (b[1])->nat.thread_next; - ti[2] = vnet_buffer2 (b[2])->nat.thread_next; - ti[3] = vnet_buffer2 (b[3])->nat.thread_next; - - b += 4; - ti += 4; - n_left_from -= 4; - } - - while (n_left_from > 0) - { - ti[0] = vnet_buffer2 (b[0])->nat.thread_next; - - b += 1; - ti += 1; - n_left_from -= 1; - } - - if (PREDICT_FALSE ((node->flags & VLIB_NODE_FLAG_TRACE))) - { - u32 i; - b = bufs; - ti = thread_indices; - - for (i = 0; i < frame->n_vectors; i++) - { - if (b[0]->flags & VLIB_BUFFER_IS_TRACED) - { - nat44_ed_out2in_handoff_trace_t *t = - vlib_add_trace (vm, node, b[0], sizeof (*t)); - t->thread_next = ti[0]; - b += 1; - ti += 1; - } - else - break; - } - } - - n_enq = vlib_buffer_enqueue_to_thread (vm, fq_index, from, thread_indices, - frame->n_vectors, 1); - - if (n_enq < frame->n_vectors) - { - vlib_node_increment_counter (vm, node->node_index, - NAT44_HANDOFF_ERROR_CONGESTION_DROP, - frame->n_vectors - n_enq); - } - - return frame->n_vectors; -} - VLIB_NODE_FN (nat44_ed_out2in_node) (vlib_main_t * vm, vlib_node_runtime_t * node, vlib_frame_t * frame) @@ -1609,35 +1411,6 @@ VLIB_REGISTER_NODE (nat44_ed_out2in_slowpath_node) = { /* *INDENT-ON* */ static u8 * -format_nat44_ed_out2in_handoff_trace (u8 * s, va_list * args) -{ - CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); - CLIB_UNUSED (vlib_node_t * node) = va_arg (*args, vlib_node_t *); - nat44_ed_out2in_handoff_trace_t *t = - va_arg (*args, nat44_ed_out2in_handoff_trace_t *); - return format (s, "out2in ed handoff thread_next index %d", t->thread_next); -} - -VLIB_NODE_FN (nat44_ed_out2in_handoff_node) (vlib_main_t * vm, - vlib_node_runtime_t * node, - vlib_frame_t * frame) -{ - return nat_handoff_node_fn_inline (vm, node, frame, - snat_main.ed_out2in_node_index); -} - -/* *INDENT-OFF* */ -VLIB_REGISTER_NODE (nat44_ed_out2in_handoff_node) = { - .name = "nat44-ed-out2in-handoff", - .vector_size = sizeof (u32), - .sibling_of = "nat-default", - .format_trace = format_nat44_ed_out2in_handoff_trace, - .type = VLIB_NODE_TYPE_INTERNAL, - .n_errors = 0, -}; -/* *INDENT-ON* */ - -static u8 * format_nat_pre_trace (u8 * s, va_list * args) { CLIB_UNUSED (vlib_main_t * vm) = va_arg (*args, vlib_main_t *); diff --git a/src/plugins/nat/test/test_nat44_ed.py b/src/plugins/nat/test/test_nat44_ed.py index 0f4a7371048..3d8921790ee 100644 --- a/src/plugins/nat/test/test_nat44_ed.py +++ b/src/plugins/nat/test/test_nat44_ed.py @@ -2004,7 +2004,7 @@ class TestNAT44EDMW(TestNAT44ED): # out2in tc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/tcp') uc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/udp') - ic1 = self.get_stats_counter('/nat44/ed/out2in/slowpath/icmp') + ic1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/icmp') dc1 = self.get_stats_counter('/nat44/ed/out2in/fastpath/drops') pkts = self.create_stream_out(self.pg1) @@ -2017,7 +2017,7 @@ class TestNAT44EDMW(TestNAT44ED): if_idx = self.pg1.sw_if_index tc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/tcp') uc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/udp') - ic2 = self.get_stats_counter('/nat44/ed/out2in/slowpath/icmp') + ic2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/icmp') dc2 = self.get_stats_counter('/nat44/ed/out2in/fastpath/drops') self.assertEqual(tc2[if_idx] - tc1[if_idx], 2) @@ -3364,7 +3364,7 @@ class TestNAT44EDMW(TestNAT44ED): udpn = self.get_stats_counter( '/nat44/ed/out2in/fastpath/udp') icmpn = self.get_stats_counter( - '/nat44/ed/out2in/slowpath/icmp') + '/nat44/ed/out2in/fastpath/icmp') drops = self.get_stats_counter( '/nat44/ed/out2in/fastpath/drops') @@ -3383,7 +3383,7 @@ class TestNAT44EDMW(TestNAT44ED): '/nat44/ed/out2in/fastpath/udp') self.assertEqual(cnt[if_idx] - udpn[if_idx], 1) cnt = self.get_stats_counter( - '/nat44/ed/out2in/slowpath/icmp') + '/nat44/ed/out2in/fastpath/icmp') self.assertEqual(cnt[if_idx] - icmpn[if_idx], 1) cnt = self.get_stats_counter( '/nat44/ed/out2in/fastpath/drops') diff --git a/src/vnet/buffer.h b/src/vnet/buffer.h index 9e997b81c52..aae999620ac 100644 --- a/src/vnet/buffer.h +++ b/src/vnet/buffer.h @@ -452,12 +452,14 @@ typedef struct /* size of L4 prototol header */ u16 gso_l4_hdr_sz; + /* The union below has a u64 alignment, so this space is unused */ + u32 __unused2[1]; + struct { - u16 unused; - u16 thread_next; u32 arc_next; - u32 ed_out2in_nat_session_index; + /* cached session index from previous node */ + u32 cached_session_index; } nat; union |