diff options
author | Klement Sekera <ksekera@cisco.com> | 2020-12-15 18:47:05 +0100 |
---|---|---|
committer | Ole Tr�an <otroan@employees.org> | 2021-01-18 08:36:26 +0000 |
commit | 4881cb4c6f0d9c6276eb7a45ed355f9fc3d729b3 (patch) | |
tree | 07959eb6fc99b88b30e6f81f4620d8d6c70110e2 /src/plugins/nat/in2out_ed.c | |
parent | 4a58e49cfe03150034a65e147a2ffe8d24391b86 (diff) |
nat: deal with flows instead of sessions
This change introduces flow concept to endpoint-dependent NAT. Instead
of having a session and a plethora of special cases in code for e.g.
hairpinning, twice-nat and others, figure all this out and store it in
flow logic. Every flow has a match and a rewrite part. This unifies all
the NAT packet processing cases into one - match a flow and rewrite the
packet based on that flow. It also provides a cure for hairpinning
dilemma where one part of the flow is on one worker and another on
a different one. These cases are also sped up by not requiring
destination adress lookup every single time to be able to rewrite source
nat as this is now part of flow rewrite logic.
Type: improvement
Change-Id: Ib60c992e16792ea4d4129bc10202ebb99a73b5be
Signed-off-by: Klement Sekera <ksekera@cisco.com>
Diffstat (limited to 'src/plugins/nat/in2out_ed.c')
-rw-r--r-- | src/plugins/nat/in2out_ed.c | 1202 |
1 files changed, 563 insertions, 639 deletions
diff --git a/src/plugins/nat/in2out_ed.c b/src/plugins/nat/in2out_ed.c index ed9ad04bdb2..9dc68576fd4 100644 --- a/src/plugins/nat/in2out_ed.c +++ b/src/plugins/nat/in2out_ed.c @@ -48,7 +48,13 @@ typedef struct u32 sw_if_index; u32 next_index; u32 session_index; - u32 is_slow_path; + nat_translation_error_e translation_error; + nat_6t_flow_t i2of; + nat_6t_flow_t o2if; + clib_bihash_kv_16_8_t search_key; + u8 is_slow_path; + u8 translation_via_i2of; + u8 lookup_skipped; } nat_in2out_ed_trace_t; static u8 * @@ -65,144 +71,39 @@ format_nat_in2out_ed_trace (u8 * s, va_list * args) s = format (s, "%s: sw_if_index %d, next index %d, session %d", tag, t->sw_if_index, t->next_index, t->session_index); - - return s; -} - -#ifndef CLIB_MARCH_VARIANT -int -nat44_i2o_ed_is_idle_session_cb (clib_bihash_kv_16_8_t * kv, void *arg) -{ - snat_main_t *sm = &snat_main; - nat44_is_idle_session_ctx_t *ctx = arg; - snat_session_t *s; - u64 sess_timeout_time; - u8 proto; - u16 r_port, l_port; - ip4_address_t *l_addr, *r_addr; - u32 fib_index; - clib_bihash_kv_16_8_t ed_kv; - int i; - snat_address_t *a; - snat_main_per_thread_data_t *tsm = vec_elt_at_index (sm->per_thread_data, - ctx->thread_index); - - ASSERT (ctx->thread_index == ed_value_get_thread_index (kv)); - s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (kv)); - sess_timeout_time = s->last_heard + (f64) nat44_session_get_timeout (sm, s); - if (ctx->now >= sess_timeout_time) + if (~0 != t->session_index) { - if (is_fwd_bypass_session (s)) - goto delete; - - l_addr = &s->out2in.addr; - r_addr = &s->ext_host_addr; - fib_index = s->out2in.fib_index; - if (snat_is_unk_proto_session (s)) + s = format (s, ", translation result '%U' via %s", + format_nat_ed_translation_error, t->translation_error, + t->translation_via_i2of ? "i2of" : "o2if"); + s = format (s, "\n i2of %U", format_nat_6t_flow, &t->i2of); + s = format (s, "\n o2if %U", format_nat_6t_flow, &t->o2if); + } + if (!t->is_slow_path) + { + if (t->lookup_skipped) { - proto = s->in2out.port; - r_port = 0; - l_port = 0; + s = format (s, "\n lookup skipped - cached session index used"); } else { - proto = nat_proto_to_ip_proto (s->nat_proto); - l_port = s->out2in.port; - r_port = s->ext_host_port; - } - init_ed_k (&ed_kv, *l_addr, l_port, *r_addr, r_port, fib_index, proto); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &ed_kv, 0)) - nat_elog_warn ("out2in_ed key del failed"); - - if (snat_is_unk_proto_session (s)) - goto delete; - - nat_ipfix_logging_nat44_ses_delete (ctx->thread_index, - s->in2out.addr.as_u32, - s->out2in.addr.as_u32, - s->nat_proto, - s->in2out.port, - s->out2in.port, - s->in2out.fib_index); - - nat_syslog_nat44_sdel (s->user_index, s->in2out.fib_index, - &s->in2out.addr, s->in2out.port, - &s->ext_host_nat_addr, s->ext_host_nat_port, - &s->out2in.addr, s->out2in.port, - &s->ext_host_addr, s->ext_host_port, - s->nat_proto, is_twice_nat_session (s)); - - if (is_twice_nat_session (s)) - { - for (i = 0; i < vec_len (sm->twice_nat_addresses); i++) - { - // TODO FIXME this is obviously broken - which address should be - // freed here?! - a = sm->twice_nat_addresses + i; - if (a->addr.as_u32 == s->ext_host_nat_addr.as_u32) - { - snat_free_outside_address_and_port (sm->twice_nat_addresses, - ctx->thread_index, - &s->ext_host_nat_addr, - s->ext_host_nat_port, - s->nat_proto); - break; - } - } + s = format (s, "\n search key %U", format_ed_session_kvp, + &t->search_key); } - - if (snat_is_session_static (s)) - goto delete; - - snat_free_outside_address_and_port (sm->addresses, ctx->thread_index, - &s->out2in.addr, s->out2in.port, - s->nat_proto); - delete: - nat_ed_session_delete (sm, s, ctx->thread_index, 1); - return 1; } - return 0; -} -#endif - -static inline u32 -icmp_in2out_ed_slow_path (snat_main_t * sm, vlib_buffer_t * b0, - ip4_header_t * ip0, icmp46_header_t * icmp0, - u32 sw_if_index0, u32 rx_fib_index0, - vlib_node_runtime_t * node, u32 next0, f64 now, - u32 thread_index, snat_session_t ** p_s0) -{ - vlib_main_t *vm = vlib_get_main (); - - next0 = icmp_in2out (sm, b0, ip0, icmp0, sw_if_index0, rx_fib_index0, node, - next0, thread_index, p_s0, 0); - snat_session_t *s0 = *p_s0; - if (PREDICT_TRUE (next0 != NAT_NEXT_DROP && s0)) - { - /* Accounting */ - nat44_session_update_counters (s0, now, - vlib_buffer_length_in_chain - (vm, b0), thread_index); - /* Per-user LRU list maintenance */ - nat44_session_update_lru (sm, s0, thread_index); - } - return next0; + return s; } static int -nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, - u32 nat_proto, u32 thread_index, - ip4_address_t r_addr, u16 r_port, u8 proto, - u16 port_per_thread, u32 snat_thread_index, - snat_session_t * s, - ip4_address_t * outside_addr, - u16 * outside_port, - clib_bihash_kv_16_8_t * out2in_ed_kv) +nat_ed_alloc_addr_and_port (snat_main_t *sm, u32 rx_fib_index, u32 nat_proto, + u32 thread_index, ip4_address_t r_addr, u16 r_port, + u8 proto, u16 port_per_thread, + u32 snat_thread_index, snat_session_t *s, + ip4_address_t *outside_addr, u16 *outside_port) { int i; snat_address_t *a, *ga = 0; - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; const u16 port_thread_offset = (port_per_thread * snat_thread_index) + 1024; @@ -211,48 +112,50 @@ nat_ed_alloc_addr_and_port (snat_main_t * sm, u32 rx_fib_index, a = sm->addresses + i; switch (nat_proto) { -#define _(N, j, n, unused) \ - case NAT_PROTOCOL_##N: \ - if (a->fib_index == rx_fib_index) \ - { \ - /* first try port suggested by caller */ \ - u16 port = clib_net_to_host_u16 (*outside_port); \ - u16 port_offset = port - port_thread_offset; \ - if (port <= port_thread_offset || \ - port > port_thread_offset + port_per_thread) \ - { \ - /* need to pick a different port, suggested port doesn't fit in \ - * this thread's port range */ \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - } \ - u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ - do \ - { \ - init_ed_kv (out2in_ed_kv, a->addr, clib_host_to_net_u16 (port), \ - r_addr, r_port, s->out2in.fib_index, proto, \ - thread_index, s - tsm->sessions); \ - int rv = clib_bihash_add_del_16_8 (&sm->out2in_ed, out2in_ed_kv, \ - 2 /* is_add */); \ - if (0 == rv) \ - { \ - ++a->busy_##n##_port_refcounts[port]; \ - a->busy_##n##_ports_per_thread[thread_index]++; \ - a->busy_##n##_ports++; \ - *outside_addr = a->addr; \ - *outside_port = clib_host_to_net_u16 (port); \ - return 0; \ - } \ - port_offset = snat_random_port (0, port_per_thread - 1); \ - port = port_thread_offset + port_offset; \ - --attempts; \ - } \ - while (attempts > 0); \ - } \ - else if (a->fib_index == ~0) \ - { \ - ga = a; \ - } \ +#define _(N, j, n, unused) \ + case NAT_PROTOCOL_##N: \ + if (a->fib_index == rx_fib_index) \ + { \ + s->o2i.match.daddr = a->addr; \ + /* first try port suggested by caller */ \ + u16 port = clib_net_to_host_u16 (*outside_port); \ + u16 port_offset = port - port_thread_offset; \ + if (port <= port_thread_offset || \ + port > port_thread_offset + port_per_thread) \ + { \ + /* need to pick a different port, suggested port doesn't fit in \ + * this thread's port range */ \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + } \ + u16 attempts = ED_PORT_ALLOC_ATTEMPTS; \ + do \ + { \ + if (NAT_PROTOCOL_ICMP == nat_proto) \ + { \ + s->o2i.match.sport = clib_host_to_net_u16 (port); \ + } \ + s->o2i.match.dport = clib_host_to_net_u16 (port); \ + if (0 == \ + nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) \ + { \ + ++a->busy_##n##_port_refcounts[port]; \ + a->busy_##n##_ports_per_thread[thread_index]++; \ + a->busy_##n##_ports++; \ + *outside_addr = a->addr; \ + *outside_port = clib_host_to_net_u16 (port); \ + return 0; \ + } \ + port_offset = snat_random_port (0, port_per_thread - 1); \ + port = port_thread_offset + port_offset; \ + --attempts; \ + } \ + while (attempts > 0); \ + } \ + else if (a->fib_index == ~0) \ + { \ + ga = a; \ + } \ break; foreach_nat_protocol; @@ -311,28 +214,51 @@ nat_outside_fib_index_lookup (snat_main_t * sm, ip4_address_t addr) return ~0; } +static_always_inline int +nat44_ed_external_sm_lookup (snat_main_t *sm, ip4_address_t match_addr, + u16 match_port, nat_protocol_t match_protocol, + u32 match_fib_index, ip4_address_t *daddr, + u16 *dport) +{ + clib_bihash_kv_8_8_t kv, value; + init_nat_k (&kv, match_addr, match_port, match_fib_index, match_protocol); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, &value)) + { + /* Try address only mapping */ + init_nat_k (&kv, match_addr, 0, 0, 0); + if (clib_bihash_search_8_8 (&sm->static_mapping_by_external, &kv, + &value)) + return 0; + } + + snat_static_mapping_t *m = + pool_elt_at_index (sm->static_mappings, value.value); + *daddr = m->local_addr; + if (dport) + { + /* Address only mapping doesn't change port */ + *dport = is_addr_only_static_mapping (m) ? match_port : m->local_port; + } + return 1; +} + static u32 -slow_path_ed (snat_main_t * sm, - vlib_buffer_t * b, - ip4_address_t l_addr, - ip4_address_t r_addr, - u16 l_port, - u16 r_port, - u8 proto, - u32 rx_fib_index, - snat_session_t ** sessionp, - vlib_node_runtime_t * node, u32 next, u32 thread_index, f64 now) +slow_path_ed (snat_main_t *sm, vlib_buffer_t *b, ip4_address_t l_addr, + ip4_address_t r_addr, u16 l_port, u16 r_port, u8 proto, + u32 rx_fib_index, snat_session_t **sessionp, + vlib_node_runtime_t *node, u32 next, u32 thread_index, f64 now) { snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - clib_bihash_kv_16_8_t out2in_ed_kv; - nat44_is_idle_session_ctx_t ctx; ip4_address_t outside_addr; u16 outside_port; - u8 identity_nat; + u32 outside_fib_index; + u8 is_identity_nat; u32 nat_proto = ip_proto_to_nat_proto (proto); snat_session_t *s = NULL; lb_nat_type_t lb = 0; + ip4_address_t daddr = r_addr; + u16 dport = r_port; if (PREDICT_TRUE (nat_proto == NAT_PROTOCOL_TCP)) { @@ -358,43 +284,81 @@ slow_path_ed (snat_main_t * sm, } } + outside_fib_index = sm->outside_fib_index; + + switch (vec_len (sm->outside_fibs)) + { + case 0: + outside_fib_index = sm->outside_fib_index; + break; + case 1: + outside_fib_index = sm->outside_fibs[0].fib_index; + break; + default: + outside_fib_index = nat_outside_fib_index_lookup (sm, r_addr); + break; + } + ip4_address_t sm_addr; u16 sm_port; u32 sm_fib_index; /* First try to match static mapping by local address and port */ - if (snat_static_mapping_match - (sm, l_addr, l_port, rx_fib_index, nat_proto, &sm_addr, &sm_port, - &sm_fib_index, 0, 0, 0, &lb, 0, &identity_nat, 0)) + int is_sm; + if (snat_static_mapping_match (sm, l_addr, l_port, rx_fib_index, nat_proto, + &sm_addr, &sm_port, &sm_fib_index, 0, 0, 0, + &lb, 0, &is_identity_nat, 0)) + { + is_sm = 0; + } + else + { + is_sm = 1; + } + + if (PREDICT_FALSE (is_sm && is_identity_nat)) + { + *sessionp = NULL; + return next; + } + + s = nat_ed_session_alloc (sm, thread_index, now, proto); + ASSERT (s); + + if (!is_sm) { - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; + s->out2in.fib_index = outside_fib_index; + + // suggest using local port to allocation function + outside_port = l_port; - switch (vec_len (sm->outside_fibs)) + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + // destination addr/port updated with real values in + // nat_ed_alloc_addr_and_port + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, daddr, 0, + s->out2in.fib_index, proto); + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); } + else + { + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); - /* Try to create dynamic translation */ - outside_port = l_port; // suggest using local port to allocation function - if (nat_ed_alloc_addr_and_port (sm, rx_fib_index, nat_proto, - thread_index, r_addr, r_port, proto, - sm->port_per_thread, - tsm->snat_thread_index, s, - &outside_addr, - &outside_port, &out2in_ed_kv)) + if (nat_ed_alloc_addr_and_port ( + sm, rx_fib_index, nat_proto, thread_index, daddr, dport, proto, + sm->port_per_thread, tsm->snat_thread_index, s, &outside_addr, + &outside_port)) { nat_elog_notice ("addresses exhausted"); b->error = node->errors[NAT_IN2OUT_ED_ERROR_OUT_OF_PORTS]; @@ -406,42 +370,40 @@ slow_path_ed (snat_main_t * sm, } else { - if (PREDICT_FALSE (identity_nat)) - { - *sessionp = NULL; - return next; - } - s = nat_ed_session_alloc (sm, thread_index, now, proto); - ASSERT (s); - s->out2in.addr = sm_addr; - s->out2in.port = sm_port; + // static mapping + s->out2in.addr = outside_addr = sm_addr; + s->out2in.port = outside_port = sm_port; s->in2out.addr = l_addr; s->in2out.port = l_port; s->nat_proto = nat_proto; s->in2out.fib_index = rx_fib_index; - s->out2in.fib_index = sm->outside_fib_index; - switch (vec_len (sm->outside_fibs)) - { - case 0: - s->out2in.fib_index = sm->outside_fib_index; - break; - case 1: - s->out2in.fib_index = sm->outside_fibs[0].fib_index; - break; - default: - s->out2in.fib_index = nat_outside_fib_index_lookup (sm, r_addr); - break; - } - + s->out2in.fib_index = outside_fib_index; s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - init_ed_kv (&out2in_ed_kv, sm_addr, sm_port, r_addr, r_port, - s->out2in.fib_index, proto, thread_index, - s - tsm->sessions); - if (clib_bihash_add_or_overwrite_stale_16_8 - (&sm->out2in_ed, &out2in_ed_kv, nat44_o2i_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("out2in-ed key add failed"); + // hairpinning? + int is_hairpinning = nat44_ed_external_sm_lookup ( + sm, r_addr, r_port, nat_proto, outside_fib_index, &daddr, &dport); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, sm_port, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_icmp_id_rewrite_set (&s->o2i, l_port); + } + else + { + nat_6t_o2i_flow_init (sm, thread_index, s, daddr, dport, sm_addr, + sm_port, s->out2in.fib_index, proto); + nat_6t_flow_dport_rewrite_set (&s->o2i, l_port); + } + nat_6t_flow_daddr_rewrite_set (&s->o2i, l_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 2)) + { + nat_elog_notice ("out2in key add failed"); + goto error; + } } if (lb) @@ -450,17 +412,26 @@ slow_path_ed (snat_main_t * sm, s->ext_host_addr = r_addr; s->ext_host_port = r_port; - clib_bihash_kv_16_8_t in2out_ed_kv; - init_ed_kv (&in2out_ed_kv, l_addr, l_port, r_addr, r_port, rx_fib_index, - proto, thread_index, s - tsm->sessions); - ctx.now = now; - ctx.thread_index = thread_index; - if (clib_bihash_add_or_overwrite_stale_16_8 (&tsm->in2out_ed, &in2out_ed_kv, - nat44_i2o_ed_is_idle_session_cb, - &ctx)) - nat_elog_notice ("in2out-ed key add failed"); + nat_6t_i2o_flow_init (sm, thread_index, s, l_addr, l_port, r_addr, r_port, + rx_fib_index, proto); + nat_6t_flow_saddr_rewrite_set (&s->i2o, outside_addr.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->i2o, daddr.as_u32); + if (NAT_PROTOCOL_ICMP == nat_proto) + { + nat_6t_flow_icmp_id_rewrite_set (&s->i2o, outside_port); + } + else + { + nat_6t_flow_sport_rewrite_set (&s->i2o, outside_port); + nat_6t_flow_dport_rewrite_set (&s->i2o, dport); + } + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); - *sessionp = s; + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out key add failed"); + goto error; + } /* log NAT event */ nat_ipfix_logging_nat44_ses_create (thread_index, @@ -479,7 +450,21 @@ slow_path_ed (snat_main_t * sm, per_vrf_sessions_register_session (s, thread_index); + *sessionp = s; return next; +error: + if (s) + { + if (!is_sm) + { + snat_free_outside_address_and_port (sm->addresses, thread_index, + &outside_addr, outside_port, + nat_proto); + } + nat_ed_session_delete (sm, s, thread_index, 1); + } + *sessionp = s = NULL; + return NAT_NEXT_DROP; } static_always_inline int @@ -494,7 +479,7 @@ nat44_ed_not_translate (snat_main_t * sm, vlib_node_runtime_t * node, udp->src_port, sm->outside_fib_index, ip->protocol); /* NAT packet aimed at external address if has active sessions */ - if (clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { /* or is static mappings */ ip4_address_t placeholder_addr; @@ -530,8 +515,15 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, if (ip->protocol == IP_PROTOCOL_ICMP) { - if (get_icmp_i2o_ed_key (b, ip, 0, ~0, ~0, 0, 0, 0, &kv)) + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + if (nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol)) return 0; + init_ed_k (&kv, lookup_saddr, lookup_sport, lookup_daddr, lookup_dport, + 0, lookup_protocol); } else if (ip->protocol == IP_PROTOCOL_UDP || ip->protocol == IP_PROTOCOL_TCP) { @@ -545,12 +537,13 @@ nat_not_translate_output_feature_fwd (snat_main_t * sm, ip4_header_t * ip, ip->protocol); } - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) { if (ip->protocol == IP_PROTOCOL_TCP) @@ -588,7 +581,7 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* src NAT check */ init_ed_k (&kv, ip->src_address, src_port, ip->dst_address, dst_port, tx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&sm->out2in_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = @@ -606,12 +599,13 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, /* dst NAT check */ init_ed_k (&kv, ip->dst_address, dst_port, ip->src_address, src_port, rx_fib_index, ip->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv, &value)) { ASSERT (thread_index == ed_value_get_thread_index (&value)); s = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value)); + if (is_fwd_bypass_session (s)) return 0; @@ -629,141 +623,115 @@ nat44_ed_not_translate_output_feature (snat_main_t * sm, ip4_header_t * ip, return 0; } -#ifndef CLIB_MARCH_VARIANT -u32 -icmp_match_in2out_ed (snat_main_t * sm, vlib_node_runtime_t * node, - u32 thread_index, vlib_buffer_t * b, - ip4_header_t * ip, ip4_address_t * addr, - u16 * port, u32 * fib_index, nat_protocol_t * proto, - void *d, void *e, u8 * dont_translate) +static inline u32 +icmp_in2out_ed_slow_path (snat_main_t *sm, vlib_buffer_t *b, ip4_header_t *ip, + icmp46_header_t *icmp, u32 sw_if_index, + u32 rx_fib_index, vlib_node_runtime_t *node, + u32 next, f64 now, u32 thread_index, + nat_protocol_t nat_proto, snat_session_t **s_p) { - u32 sw_if_index; - u32 rx_fib_index; - clib_bihash_kv_16_8_t kv, value; - u32 next = ~0; + vlib_main_t *vm = vlib_get_main (); + u16 checksum; int err; snat_session_t *s = NULL; - u16 l_port = 0, r_port = 0; // initialize to workaround gcc warning - vlib_main_t *vm = vlib_get_main (); - snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - *dont_translate = 0; - f64 now = vlib_time_now (vm); - - sw_if_index = vnet_buffer (b)->sw_if_index[VLIB_RX]; - rx_fib_index = ip4_fib_table_get_index_for_sw_if_index (sw_if_index); + u8 lookup_protocol = ip->protocol; + u16 lookup_sport, lookup_dport; + ip4_address_t lookup_saddr, lookup_daddr; - err = - get_icmp_i2o_ed_key (b, ip, rx_fib_index, ~0, ~0, proto, &l_port, - &r_port, &kv); + err = nat_get_icmp_session_lookup_values (b, ip, &lookup_saddr, + &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); if (err != 0) { b->error = node->errors[err]; - next = NAT_NEXT_DROP; - goto out; + return NAT_NEXT_DROP; } - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv, &value)) + if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) { - if (vnet_buffer (b)->sw_if_index[VLIB_TX] != ~0) + if (PREDICT_FALSE (nat44_ed_not_translate_output_feature ( + sm, ip, lookup_sport, lookup_dport, thread_index, sw_if_index, + vnet_buffer (b)->sw_if_index[VLIB_TX], now))) { - if (PREDICT_FALSE - (nat44_ed_not_translate_output_feature - (sm, ip, l_port, r_port, thread_index, - sw_if_index, vnet_buffer (b)->sw_if_index[VLIB_TX], now))) - { - *dont_translate = 1; - goto out; - } + return next; } - else + } + else + { + if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, ip, + NAT_PROTOCOL_ICMP, + rx_fib_index, thread_index))) { - if (PREDICT_FALSE (nat44_ed_not_translate (sm, node, sw_if_index, - ip, NAT_PROTOCOL_ICMP, - rx_fib_index, - thread_index))) - { - *dont_translate = 1; - goto out; - } + return next; } + } - if (PREDICT_FALSE - (icmp_type_is_error_message - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; - next = NAT_NEXT_DROP; - goto out; - } + if (PREDICT_FALSE (icmp_type_is_error_message ( + vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + return NAT_NEXT_DROP; + } - next = - slow_path_ed (sm, b, ip->src_address, ip->dst_address, l_port, r_port, - ip->protocol, rx_fib_index, &s, node, next, - thread_index, vlib_time_now (vm)); + next = slow_path_ed (sm, b, ip->src_address, ip->dst_address, lookup_sport, + lookup_dport, ip->protocol, rx_fib_index, &s, node, + next, thread_index, vlib_time_now (vm)); - if (PREDICT_FALSE (next == NAT_NEXT_DROP)) - goto out; + if (NAT_NEXT_DROP == next) + goto out; - if (!s) - { - *dont_translate = 1; - goto out; - } - } - else + if (PREDICT_TRUE (!ip4_is_fragment (ip))) { - if (PREDICT_FALSE - (vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_request - && vnet_buffer (b)->ip.reass.icmp_type_or_tcp_flags != - ICMP4_echo_reply - && !icmp_type_is_error_message (vnet_buffer (b)->ip. - reass.icmp_type_or_tcp_flags))) + ip_csum_t sum = ip_incremental_checksum_buffer ( + vm, b, (u8 *) icmp - (u8 *) vlib_buffer_get_current (b), + ntohs (ip->length) - ip4_header_bytes (ip), 0); + checksum = ~ip_csum_fold (sum); + if (PREDICT_FALSE (checksum != 0 && checksum != 0xffff)) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; next = NAT_NEXT_DROP; goto out; } - - ASSERT (thread_index == ed_value_get_thread_index (&value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&value)); } + out: - if (s) + if (PREDICT_TRUE (next != NAT_NEXT_DROP && s)) { - *addr = s->out2in.addr; - *port = s->out2in.port; - *fib_index = s->out2in.fib_index; - } - if (d) - { - *(snat_session_t **) d = s; + /* Accounting */ + nat44_session_update_counters ( + s, now, vlib_buffer_length_in_chain (vm, b), thread_index); + /* Per-user LRU list maintenance */ + nat44_session_update_lru (sm, s, thread_index); } + *s_p = s; return next; } -#endif static snat_session_t * -nat44_ed_in2out_unknown_proto (snat_main_t * sm, - vlib_buffer_t * b, - ip4_header_t * ip, - u32 rx_fib_index, - u32 thread_index, - f64 now, - vlib_main_t * vm, vlib_node_runtime_t * node) +nat44_ed_in2out_slowpath_unknown_proto (snat_main_t *sm, vlib_buffer_t *b, + ip4_header_t *ip, u32 rx_fib_index, + u32 thread_index, f64 now, + vlib_main_t *vm, + vlib_node_runtime_t *node) { clib_bihash_kv_8_8_t kv, value; clib_bihash_kv_16_8_t s_kv, s_value; - snat_static_mapping_t *m; - u32 old_addr, new_addr = 0; - ip_csum_t sum; + snat_static_mapping_t *m = NULL; snat_main_per_thread_data_t *tsm = &sm->per_thread_data[thread_index]; - snat_session_t *s; + snat_session_t *s = NULL; u32 outside_fib_index = sm->outside_fib_index; int i; - u8 is_sm = 0; + ip4_address_t new_src_addr = { 0 }; + ip4_address_t new_dst_addr = ip->dst_address; + + if (PREDICT_FALSE ( + nat44_ed_maximum_sessions_exceeded (sm, rx_fib_index, thread_index))) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_ipfix_logging_max_sessions (thread_index, + sm->max_translations_per_thread); + nat_elog_notice ("maximum sessions exceeded"); + return 0; + } switch (vec_len (sm->outside_fibs)) { @@ -777,112 +745,105 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, outside_fib_index = nat_outside_fib_index_lookup (sm, ip->dst_address); break; } - old_addr = ip->src_address.as_u32; - init_ed_k (&s_kv, ip->src_address, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol); + init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &s_kv, &s_value)) + /* Try to find static mapping first */ + if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) { - ASSERT (thread_index == ed_value_get_thread_index (&s_value)); - s = - pool_elt_at_index (tsm->sessions, - ed_value_get_session_index (&s_value)); - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; + m = pool_elt_at_index (sm->static_mappings, value.value); + new_src_addr = m->external_addr; } else { - if (PREDICT_FALSE - (nat44_ed_maximum_sessions_exceeded - (sm, rx_fib_index, thread_index))) + pool_foreach (s, tsm->sessions) { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_ipfix_logging_max_sessions (thread_index, - sm->max_translations_per_thread); - nat_elog_notice ("maximum sessions exceeded"); - return 0; + if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) + { + init_ed_k (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, + outside_fib_index, ip->protocol); + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) + { + new_src_addr = s->out2in.addr; + } + break; + } } - init_nat_k (&kv, ip->src_address, 0, rx_fib_index, 0); - - /* Try to find static mapping first */ - if (!clib_bihash_search_8_8 (&sm->static_mapping_by_local, &kv, &value)) - { - m = pool_elt_at_index (sm->static_mappings, value.value); - new_addr = ip->src_address.as_u32 = m->external_addr.as_u32; - is_sm = 1; - goto create_ses; - } - else + if (!new_src_addr.as_u32) { - /* *INDENT-OFF* */ - pool_foreach (s, tsm->sessions) { - if (s->ext_host_addr.as_u32 == ip->dst_address.as_u32) - { - new_addr = ip->src_address.as_u32 = s->out2in.addr.as_u32; - - init_ed_k(&s_kv, s->out2in.addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) - goto create_ses; - - break; - } - } - /* *INDENT-ON* */ - for (i = 0; i < vec_len (sm->addresses); i++) { init_ed_k (&s_kv, sm->addresses[i].addr, 0, ip->dst_address, 0, outside_fib_index, ip->protocol); - if (clib_bihash_search_16_8 (&sm->out2in_ed, &s_kv, &s_value)) + if (clib_bihash_search_16_8 (&sm->flow_hash, &s_kv, &s_value)) { - new_addr = ip->src_address.as_u32 = - sm->addresses[i].addr.as_u32; - goto create_ses; + new_src_addr = sm->addresses[i].addr; } } - return 0; } + } - create_ses: - s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); - if (!s) - { - b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; - nat_elog_warn ("create NAT session failed"); - return 0; - } + if (!new_src_addr.as_u32) + { + // could not allocate address for translation ... + return 0; + } - s->ext_host_addr.as_u32 = ip->dst_address.as_u32; - s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; - s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; - s->out2in.addr.as_u32 = new_addr; - s->out2in.fib_index = outside_fib_index; - s->in2out.addr.as_u32 = old_addr; - s->in2out.fib_index = rx_fib_index; - s->in2out.port = s->out2in.port = ip->protocol; - if (is_sm) - s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; - - /* Add to lookup tables */ - init_ed_kv (&s_kv, s->in2out.addr, 0, ip->dst_address, 0, rx_fib_index, - ip->protocol, thread_index, s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&tsm->in2out_ed, &s_kv, 1)) - nat_elog_notice ("in2out key add failed"); - - init_ed_kv (&s_kv, s->out2in.addr, 0, ip->dst_address, 0, - outside_fib_index, ip->protocol, thread_index, - s - tsm->sessions); - if (clib_bihash_add_del_16_8 (&sm->out2in_ed, &s_kv, 1)) - nat_elog_notice ("out2in key add failed"); - - per_vrf_sessions_register_session (s, thread_index); + s = nat_ed_session_alloc (sm, thread_index, now, ip->protocol); + if (!s) + { + b->error = node->errors[NAT_IN2OUT_ED_ERROR_MAX_SESSIONS_EXCEEDED]; + nat_elog_warn ("create NAT session failed"); + return 0; } - /* Update IP checksum */ - sum = ip->checksum; - sum = ip_csum_update (sum, old_addr, new_addr, ip4_header_t, src_address); - ip->checksum = ip_csum_fold (sum); + nat_6t_i2o_flow_init (sm, thread_index, s, ip->src_address, 0, + ip->dst_address, 0, rx_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->i2o, new_src_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + // hairpinning? + int is_hairpinning = + nat44_ed_external_sm_lookup (sm, ip->dst_address, 0, NAT_PROTOCOL_OTHER, + outside_fib_index, &new_dst_addr, NULL); + s->flags |= is_hairpinning * SNAT_SESSION_FLAG_HAIRPINNING; + + nat_6t_flow_daddr_rewrite_set (&s->i2o, new_dst_addr.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->i2o, outside_fib_index); + + nat_6t_o2i_flow_init (sm, thread_index, s, new_dst_addr, 0, new_src_addr, 0, + outside_fib_index, ip->protocol); + nat_6t_flow_saddr_rewrite_set (&s->o2i, ip->dst_address.as_u32); + nat_6t_flow_daddr_rewrite_set (&s->o2i, ip->src_address.as_u32); + nat_6t_flow_txfib_rewrite_set (&s->o2i, rx_fib_index); + + s->ext_host_addr.as_u32 = ip->dst_address.as_u32; + s->flags |= SNAT_SESSION_FLAG_UNKNOWN_PROTO; + s->flags |= SNAT_SESSION_FLAG_ENDPOINT_DEPENDENT; + s->out2in.addr.as_u32 = new_src_addr.as_u32; + s->out2in.fib_index = outside_fib_index; + s->in2out.addr.as_u32 = ip->src_address.as_u32; + s->in2out.fib_index = rx_fib_index; + s->in2out.port = s->out2in.port = ip->protocol; + if (m) + s->flags |= SNAT_SESSION_FLAG_STATIC_MAPPING; + + if (nat_ed_ses_i2o_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("in2out flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + if (nat_ed_ses_o2i_flow_hash_add_del (sm, thread_index, s, 1)) + { + nat_elog_notice ("out2in flow hash add failed"); + nat_ed_session_delete (sm, s, thread_index, 1); + return NULL; + } + + per_vrf_sessions_register_session (s, thread_index); /* Accounting */ nat44_session_update_counters (s, now, vlib_buffer_length_in_chain (vm, b), @@ -890,13 +851,6 @@ nat44_ed_in2out_unknown_proto (snat_main_t * sm, /* Per-user LRU list maintenance */ nat44_session_update_lru (sm, s, thread_index); - /* Hairpinning */ - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - nat44_ed_hairpinning_unknown_proto (sm, b, ip); - - if (vnet_buffer (b)->sw_if_index[VLIB_TX] == ~0) - vnet_buffer (b)->sw_if_index[VLIB_TX] = outside_fib_index; - return s; } @@ -924,15 +878,17 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; - udp_header_t *udp0; - tcp_header_t *tcp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + nat_translation_error_e translation_error = NAT_ED_TRNSL_ERR_SUCCESS; + nat_6t_flow_t *f = 0; + ip4_address_t lookup_saddr, lookup_daddr; + u16 lookup_sport, lookup_dport; + u8 lookup_protocol; + int lookup_skipped = 0; b0 = *b; b++; @@ -973,16 +929,8 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, goto trace0; } - udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); - if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) - { - next[0] = def_slow; - goto trace0; - } - if (is_output_feature) { if (PREDICT_FALSE @@ -993,27 +941,79 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = def_slow; - goto trace0; + if (vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_request && + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags != + ICMP4_echo_reply && + !icmp_type_is_error_message ( + vnet_buffer (b0)->ip.reass.icmp_type_or_tcp_flags)) + { + b0->error = node->errors[NAT_IN2OUT_ED_ERROR_BAD_ICMP_TYPE]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + int err = nat_get_icmp_session_lookup_values ( + b0, ip0, &lookup_saddr, &lookup_sport, &lookup_daddr, + &lookup_dport, &lookup_protocol); + if (err != 0) + { + b0->error = node->errors[err]; + next[0] = NAT_NEXT_DROP; + goto trace0; + } + } + else + { + lookup_protocol = ip0->protocol; + lookup_saddr = ip0->src_address; + lookup_daddr = ip0->dst_address; + lookup_sport = vnet_buffer (b0)->ip.reass.l4_src_port; + lookup_dport = vnet_buffer (b0)->ip.reass.l4_dst_port; } - init_ed_k (&kv0, ip0->src_address, - vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, - vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, - ip0->protocol); + /* there might be a stashed index in vnet_buffer2 from handoff or + * classify node, see if it can be used */ + if (!pool_is_free_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index)) + { + s0 = pool_elt_at_index (tsm->sessions, + vnet_buffer2 (b0)->nat.cached_session_index); + if (PREDICT_TRUE ( + nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0) + // for some hairpinning cases there are two "i2i" flows instead + // of i2o and o2i as both hosts are on inside + || (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match ( + &s0->o2i, b0, lookup_saddr, lookup_sport, lookup_daddr, + lookup_dport, lookup_protocol, rx_fib_index0)))) + { + /* yes, this is the droid we're looking for */ + lookup_skipped = 1; + goto skip_lookup; + } + s0 = NULL; + } + + init_ed_k (&kv0, ip0->src_address, lookup_sport, ip0->dst_address, + lookup_dport, rx_fib_index0, lookup_protocol); - // lookup for session - if (clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + // lookup flow + if (clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { - // session does not exist go slow path + // flow does not exist go slow path next[0] = def_slow; goto trace0; } + ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = pool_elt_at_index (tsm->sessions, ed_value_get_session_index (&value0)); + skip_lookup: + if (PREDICT_FALSE (per_vrf_sessions_is_expired (s0, thread_index))) { // session is closed, go slow path @@ -1054,96 +1054,58 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); + if (nat_6t_flow_match (&s0->i2o, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->i2o; + } + else if (s0->flags & SNAT_SESSION_FLAG_HAIRPINNING && + nat_6t_flow_match (&s0->o2i, b0, lookup_saddr, lookup_sport, + lookup_daddr, lookup_dport, lookup_protocol, + rx_fib_index0)) + { + f = &s0->o2i; + } + else + { + translation_error = NAT_ED_TRNSL_ERR_FLOW_MISMATCH; + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, f, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + next[0] = NAT_NEXT_DROP; + goto trace0; + } - if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) + switch (proto0) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, - length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } + case NAT_PROTOCOL_TCP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); - } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); + break; + case NAT_PROTOCOL_UDP: vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.udp, thread_index, sw_if_index0, 1); - } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.fastpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } + break; + case NAT_PROTOCOL_ICMP: + vlib_increment_simple_counter (&sm->counters.fastpath.in2out_ed.icmp, + thread_index, sw_if_index0, 1); + break; + case NAT_PROTOCOL_OTHER: + vlib_increment_simple_counter ( + &sm->counters.fastpath.in2out_ed.other, thread_index, sw_if_index0, + 1); + break; } /* Accounting */ @@ -1163,11 +1125,21 @@ nat44_ed_in2out_fast_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 0; + t->translation_error = translation_error; + t->lookup_skipped = lookup_skipped; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = (&s0->i2o == f); + } else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) @@ -1208,16 +1180,14 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, while (n_left_from > 0) { vlib_buffer_t *b0; - u32 sw_if_index0, rx_fib_index0, proto0, new_addr0, old_addr0, - iph_offset0 = 0; - u16 old_port0, new_port0; + u32 sw_if_index0, rx_fib_index0, iph_offset0 = 0; + nat_protocol_t proto0; ip4_header_t *ip0; udp_header_t *udp0; - tcp_header_t *tcp0; icmp46_header_t *icmp0; snat_session_t *s0 = 0; clib_bihash_kv_16_8_t kv0, value0; - ip_csum_t sum0; + int translation_error = NAT_ED_TRNSL_ERR_SUCCESS; b0 = *b; @@ -1244,18 +1214,23 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, } udp0 = ip4_next_header (ip0); - tcp0 = (tcp_header_t *) udp0; icmp0 = (icmp46_header_t *) udp0; proto0 = ip_proto_to_nat_proto (ip0->protocol); if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_OTHER)) { - s0 = nat44_ed_in2out_unknown_proto (sm, b0, ip0, - rx_fib_index0, - thread_index, now, vm, node); + s0 = nat44_ed_in2out_slowpath_unknown_proto ( + sm, b0, ip0, rx_fib_index0, thread_index, now, vm, node); if (!s0) next[0] = NAT_NEXT_DROP; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.other, thread_index, sw_if_index0, 1); @@ -1264,10 +1239,17 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, if (PREDICT_FALSE (proto0 == NAT_PROTOCOL_ICMP)) { - next[0] = - icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, - rx_fib_index0, node, next[0], now, - thread_index, &s0); + next[0] = icmp_in2out_ed_slow_path (sm, b0, ip0, icmp0, sw_if_index0, + rx_fib_index0, node, next[0], + now, thread_index, proto0, &s0); + if (NAT_NEXT_DROP != next[0] && s0 && + NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + goto trace0; + } + vlib_increment_simple_counter (&sm->counters.slowpath. in2out_ed.icmp, thread_index, sw_if_index0, 1); @@ -1278,7 +1260,7 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, vnet_buffer (b0)->ip.reass.l4_src_port, ip0->dst_address, vnet_buffer (b0)->ip.reass.l4_dst_port, rx_fib_index0, ip0->protocol); - if (!clib_bihash_search_16_8 (&tsm->in2out_ed, &kv0, &value0)) + if (!clib_bihash_search_16_8 (&sm->flow_hash, &kv0, &value0)) { ASSERT (thread_index == ed_value_get_thread_index (&value0)); s0 = @@ -1342,95 +1324,27 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, b0->flags |= VNET_BUFFER_F_IS_NATED; - if (!is_output_feature) - vnet_buffer (b0)->sw_if_index[VLIB_TX] = s0->out2in.fib_index; - - old_addr0 = ip0->src_address.as_u32; - new_addr0 = ip0->src_address.as_u32 = s0->out2in.addr.as_u32; - sum0 = ip0->checksum; - sum0 = ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - src_address); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, ip4_header_t, - dst_address); - ip0->checksum = ip_csum_fold (sum0); - - old_port0 = vnet_buffer (b0)->ip.reass.l4_src_port; + if (NAT_ED_TRNSL_ERR_SUCCESS != + (translation_error = nat_6t_flow_buf_translate ( + sm, b0, ip0, &s0->i2o, proto0, is_output_feature))) + { + nat_free_session_data (sm, s0, thread_index, 0); + nat_ed_session_delete (sm, s0, thread_index, 1); + s0 = NULL; + goto trace0; + } if (PREDICT_TRUE (proto0 == NAT_PROTOCOL_TCP)) { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = tcp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, - ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = - ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - tcp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - mss_clamping (sm->mss_clamping, tcp0, &sum0); - tcp0->checksum = ip_csum_fold (sum0); - } vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.tcp, thread_index, sw_if_index0, 1); nat44_set_tcp_session_state_i2o (sm, now, s0, b0, thread_index); } - else if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment - && udp0->checksum) + else { - new_port0 = udp0->src_port = s0->out2in.port; - sum0 = udp0->checksum; - sum0 = - ip_csum_update (sum0, old_addr0, new_addr0, ip4_header_t, - dst_address); - sum0 = - ip_csum_update (sum0, old_port0, new_port0, ip4_header_t, length); - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - sum0 = ip_csum_update (sum0, ip0->dst_address.as_u32, - s0->ext_host_addr.as_u32, - ip4_header_t, dst_address); - sum0 = - ip_csum_update (sum0, - vnet_buffer (b0)->ip.reass.l4_dst_port, - s0->ext_host_port, ip4_header_t, length); - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - udp0->checksum = ip_csum_fold (sum0); vlib_increment_simple_counter (&sm->counters.slowpath.in2out_ed.udp, thread_index, sw_if_index0, 1); } - else - { - if (!vnet_buffer (b0)->ip.reass.is_non_first_fragment) - { - new_port0 = udp0->src_port = s0->out2in.port; - if (PREDICT_FALSE (is_twice_nat_session (s0))) - { - udp0->dst_port = s0->ext_host_port; - ip0->dst_address.as_u32 = s0->ext_host_addr.as_u32; - } - vlib_increment_simple_counter (&sm->counters.slowpath. - in2out_ed.udp, thread_index, - sw_if_index0, 1); - } - } /* Accounting */ nat44_session_update_counters (s0, now, @@ -1448,11 +1362,21 @@ nat44_ed_in2out_slow_path_node_fn_inline (vlib_main_t * vm, t->sw_if_index = sw_if_index0; t->next_index = next[0]; t->is_slow_path = 1; + t->translation_error = translation_error; + clib_memcpy (&t->search_key, &kv0, sizeof (t->search_key)); if (s0) - t->session_index = s0 - tsm->sessions; + { + t->session_index = s0 - tsm->sessions; + clib_memcpy (&t->i2of, &s0->i2o, sizeof (t->i2of)); + clib_memcpy (&t->o2if, &s0->o2i, sizeof (t->o2if)); + t->translation_via_i2of = 1; + } + else - t->session_index = ~0; + { + t->session_index = ~0; + } } if (next[0] == NAT_NEXT_DROP) |